def step(self, player_str, action_idx, next_strategy=None): log.debug( paint("\n----------- Performing GAME step -----------", bcolors.REF)) action_str = self.game_def.encoder.all_actions[action_idx] try: legal_action = self.current_state.get_legal_action_from_str( action_str) log.debug(legal_action) except RuntimeError as err: #Acounts for Ilegal Action log.debug(paint("\tSelected non legal action", bcolors.FAIL)) player_reward = -100 log.debug( paint_bool( "••••••• EPISODE FINISHED Reward:{} •••••••".format( player_reward), player_reward > 0)) return self.current_observation, player_reward, True, {} #Construct next state self.match.add_step( Step(self.current_state, legal_action, len(self.match.steps))) next_state = self.current_state.get_next(legal_action, strategy_path=next_strategy) self.current_state = next_state #Get information from next state done = self.current_state.is_terminal goals_dic = self.current_rewards player_reward = goals_dic[player_str] if (done): log.debug( paint_bool( "••••••• EPISODE FINISHED Reward:{} •••••••".format( player_reward), player_reward > 0)) return self.current_observation, player_reward, done, {}
def random_reset(self): n_init = len(self.possible_initial_states) initial_idx = np.random.randint(n_init) self.game_def.initial = self.possible_initial_states[initial_idx] initial_state = self.game_def.get_initial_state() self.current_state = initial_state self.match = Match([Step(initial_state, None, 0)])
def run_episode(game_def, net, expl=0.3): """ Runs one episode to generate examples. The full episode will run a MCTS simulation in the root if the tree, the choose the best action with the computed probabilities and add this as one example. The process will then be repeated with the new node until a terminal node in reached. It will generate as many examples as steps in the match. """ examples = [] state = game_def.get_initial_state() root = TreeZero.node_class(Step(state,None,0),"a") current_state = state is_first = True j=0 while True: j+=1 if not is_first: current_state = root.step.next_state() else: current_state = state is_first = False root = TreeZero.node_class(Step(current_state,None,0),"a") tree = TreeZero(root,game_def,net) tree.run_mcts(net.args.n_mcts_simulations,expl=expl) # if j==1: tree.print_in_file("train-{}.png".format(j)) if root.step.state.is_terminal: examples.append((root.step.state,[game_def.encoder.mask_state(current_state), np.zeros(game_def.encoder.action_size), None])) goals = root.step.state.goals v = goals[root.step.state.control] for s,e in examples[::-1]: e[2]=v # print("Example: \n{}\n{}\n".format(s,v)) v=-v return [e[1] for e in examples] pi = root.pis(game_def) examples.append((current_state,[game_def.encoder.mask_state(current_state), pi, None])) a = np.random.choice(game_def.encoder.all_actions, p=pi) root = [n for n in root.children if str(n.step.action.action)==str(a)][0]
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ if not 'first_build' in args: log.debug("Creating new files") new_files = 'w' args.first_build = False else: log.debug("Appending to existent files") new_files = 'a' log.debug("Computing mcts for tree") state = game_def.get_initial_state() root = TreeMCTS.node_class(Step(state, None, 0), args.main_player) tree = TreeMCTS(root, game_def, args.main_player) tree.run_mcts(args.iter) t0 = time.time() if (not args.tree_image_file_name is None): file_name = '{}/{}'.format(game_def.name, args.tree_image_file_name) tree.print_in_file(file_name=file_name) log.debug("Tree image saved in {}".format(file_name)) n_nodes = tree.get_number_of_nodes() if (not args.tree_name is None): file_path = "./approaches/mcts/trees/{}/{}".format( game_def.name, args.tree_name) tree.save_values_in_file(file_path) log.debug("Tree saved in {}".format(file_path)) if (not args.train_file is None): file_path = "./approaches/mcts/train/{}/{}".format( game_def.name, args.train_file) l = tree.get_train_list() os.makedirs(os.path.dirname(file_path), exist_ok=True) training_data_to_csv(file_path, l, game_def, new_files, extra_array=['p', 'n']) log.debug("Training data saved in {}".format(file_path)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'number_of_nodes': n_nodes, 'save_time': save_time}
def expand_root(tree, main_player="a"): """ Function to expand a tree downwards until terminal leaves in place Args: tree (anytree.Node): a tree to expand till its terminal leaves """ disable_tqdm = log.is_disabled_for('debug') valid_moves = tree.step.state.legal_actions for legal_action in valid_moves: step = Step(tree.step.state, legal_action, 1) TreeMinmax.node_class(step, main_player, parent=tree) expand_further = True time_step = 2 while expand_further: # define current player expand_further = False # starting iteration to fill branches log.debug("Depth: %s" % (time_step)) for leaf in tqdm(tree.leaves, disable=disable_tqdm): current_state = leaf.step.state if current_state.is_terminal: continue next_state = current_state.get_next(leaf.step.action) valid_moves = next_state.legal_actions for legal_action in valid_moves: step = Step(next_state, legal_action, time_step) TreeMinmax.node_class(step, main_player, parent=leaf) expand_further = True if next_state.is_terminal: goals = next_state.goals leaf.score = goals[main_player] time_step += 1
def minmax_from_game_def(game_def, initial_state=None, main_player="a"): """ Wrapper function to start with a game definition and expand root downwards to branch all possibilities. Next, the tree is reviewed upwards using the minimax algorithm. Args: game_def (GameDef*): game definition class """ if initial_state is None: initial_state = StateExpanded.from_game_def(game_def, game_def.initial) tree = TreeMinmax(main_player=main_player) root_node = tree.create_node(Step(initial_state, None, 0)) # Tree.expand_rec(root_node,0) expand_root(tree=root_node, main_player=main_player) root_node = build_minimax(root_node, main_player=main_player) return TreeMinmax(root_node, main_player)
def match_from_time_model(model, game_def, main_player = None): """ Given a stabel model for the full time representation of the game, the functions creates a match with each action taken. Args: model: Stable model from the full time representation game_def: The game definition main_player: The player for which we aim to minmax """ atoms = model.symbols(atoms=True) fluent_steps = defaultdict(lambda: {'fluents':[],'goals':[], 'action':None}) for a in atoms: if(a.name == "goal"): time = a.arguments[2].number fluent_steps[time]['goals'].append(a) elif(a.name=="holds"): time = a.arguments[1].number fluent_steps[time]['fluents'].append(a.arguments[0]) elif(a.name=="does"): time = a.arguments[2].number fluent_steps[time]['action'] = a fluent_steps = dict(fluent_steps) steps = [] for i in range(len(fluent_steps)): state = State(fluent_steps[i]['fluents'],fluent_steps[i]['goals'], game_def) action = None if(not fluent_steps[i]['action']): pass else: action = Action(fluent_steps[i]['action'].arguments[0].name, fluent_steps[i]['action'].arguments[1]) step = Step(state,action,i) steps.append(step) steps[-1].state.is_terminal = True # steps[-1].set_score_player(main_player) # steps[-2].set_score_player(main_player) # steps[-2].state.goals = steps[-1].state.goals # steps =steps[:-1] return Match(steps)
def choose_action(self, state): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ step = Step(state, None, 0) tree = TreeMCTS(TreeMCTS.node_class(step, self.main_player), self.game_def, self.main_player) try: tree.run_mcts(10000) except TimeoutError: log.debug("Reached timeout error for mcts, computation will stop") action = tree.get_best_action(tree.root) action_ex = [l_a for l_a in state.legal_actions if l_a == action][0] return action_ex
def simulate(game_def, players, depth=None, ran_init=False, signal_on=True): """ Call it with the path to the game definition Args: players (Player,Player): A tuple of the players depth: - n: Generate until depth n or terminal state reached """ def handler(signum, frame): raise TimeoutError("Action time out") if signal_on: signal.signal(signal.SIGALRM, handler) if (ran_init): initial = game_def.get_random_initial() else: initial = game_def.initial state = StateExpanded.from_game_def(game_def, initial, strategy=players[0].strategy) match = Match([]) time_step = 0 continue_depth = True if depth == None else time_step < depth log.debug("\n--------------- Simulating match ----------------") log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name)) letters = ['a', 'b'] response_times = {'a': [], 'b': []} while (not state.is_terminal and continue_depth): if signal_on: signal.alarm(3) t0 = time.time() try: selected_action = players[time_step % 2].choose_action(state) except TimeoutError as ex: log.info( "Time out for player {}, choosing random action".format( letters[time_step % 2])) index = randint(0, len(state.legal_actions) - 1) selected_action = state.legal_actions[index] if signal_on: signal.alarm(0) t1 = time.time() response_times[letters[time_step % 2]].append( round((t1 - t0) * 1000, 3)) step = Step(state, selected_action, time_step) match.add_step(step) time_step += 1 continue_depth = True if depth == None else time_step < depth state = state.get_next(selected_action, strategy_path=players[time_step % 2].strategy) match.add_step(Step(state, None, time_step)) log.debug(match) return match, { k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3) for k, lst in response_times.items() }
def generate_from(cls,game_def,net,state): """ Generates a tree with the predictions of the network. Will generate as children all the legal actions and also the illegal actions with higher probabilities than the legal ones. Will only further open legal actions """ log.debug("Generating net tree...") root = TreeNet.node_class(Step(state,None,0),"a",dic={"is_legal":1,"p":1,"v":0}) tree = TreeNet(root,game_def,net) set_visited = set() current_nodes = [root] it = 0 while(len(current_nodes)>0): it+=1 new_nodes = [] for n in current_nodes: s = n.step.state if s.is_terminal: #Dont expand terminal nodes continue if not n.is_legal: #Dont expand illegal moves continue if n.step.action is None: #Case for root node state = n.step.state else: state = n.step.next_state() if state.is_terminal: pi, v = net.predict_pi_v(state) n.v=v continue pi, v = net.predict_pi_v(state) n.v=v legal_actions_masked = game_def.encoder.mask_legal_actions(state) illegal_print_th = 0.001 #Only illegal states with more than this amout in diference will be printed general_print_th = 0.001 #Only states with at least this prob will be printed max_prob = pi[np.argmax(legal_actions_masked*pi)]+illegal_print_th for i,p in enumerate(pi): if p<general_print_th: continue if p<=max_prob and legal_actions_masked[i]==0: continue action_str= str(game_def.encoder.all_actions[i]) if legal_actions_masked[i]==0: action = Action.from_facts("does({},{}).".format(state.control,action_str),game_def) else: action = state.get_legal_action_from_str(action_str) step = Step(state,action,n.step.time_step+1) step_hash = step.__hash__() if step_hash in set_visited: continue node = TreeNet.node_class(step,"a",parent=n,dic={"is_legal":legal_actions_masked[i]==1,"p":p,"v":0}) new_nodes.append(node) set_visited.add(step_hash) current_nodes = new_nodes return tree
def simulate(game_def, players, depth=None, time_out_sec=None, penalize_illegal=False): """ Call it with the path to the game definition Args: players (Player,Player): A tuple of the players depth: Generate until depth or terminal state reached time_out_sec: The number of seconds the player will have to make a move penalize_illegal: True if a selection of an illegal action should be highly penalized by the player """ signal_on = not time_out_sec is None def handler(signum, frame): raise TimeoutError("Action time out") if signal_on: signal.signal(signal.SIGALRM, handler) initial = game_def.initial state = StateExpanded.from_game_def(game_def, initial, strategy=players[0].strategy) match = Match([]) time_step = 0 continue_depth = True if depth == None else time_step < depth log.debug("\n--------------- Simulating match ----------------") log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name)) letters = ['a', 'b'] response_times = {'a': [], 'b': []} while (not state.is_terminal and continue_depth): current_control = letters[time_step % 2] if signal_on: signal.alarm(time_out_sec) t0 = time.time() try: selected_action = players[time_step % 2].choose_action( state, time_step=time_step, penalize_illegal=penalize_illegal) except TimeoutError as ex: log.debug( "Time out for player {}, choosing random action".format( current_control)) index = randint(0, len(state.legal_actions) - 1) selected_action = state.legal_actions[index] except IllegalActionError as ex: log.debug( "Player {}, choosing illegal action {} in step {} -> Match lost" .format(players[time_step % 2].name, str(ex.action), time_step)) state.is_terminal = True state.goals = { current_control: -1, letters[(time_step + 1) % 2]: +1, } selected_action = None match.illegal_lost = { "player": current_control, "time_step": time_step } if signal_on: signal.alarm(0) t1 = time.time() response_times[current_control].append(round((t1 - t0) * 1000, 3)) step = Step(state, selected_action, time_step) match.add_step(step) time_step += 1 continue_depth = True if depth == None else time_step < depth if not selected_action is None: state = state.get_next(selected_action, strategy_path=players[time_step % 2].strategy) match.add_step(Step(state, None, time_step)) log.debug(match) return match, { k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3) for k, lst in response_times.items() }