def step(self, player_str, action_idx, next_strategy=None): log.debug( paint("\n----------- Performing GAME step -----------", bcolors.REF)) action_str = self.game_def.encoder.all_actions[action_idx] try: legal_action = self.current_state.get_legal_action_from_str( action_str) log.debug(legal_action) except RuntimeError as err: #Acounts for Ilegal Action log.debug(paint("\tSelected non legal action", bcolors.FAIL)) player_reward = -100 log.debug( paint_bool( "••••••• EPISODE FINISHED Reward:{} •••••••".format( player_reward), player_reward > 0)) return self.current_observation, player_reward, True, {} #Construct next state self.match.add_step( Step(self.current_state, legal_action, len(self.match.steps))) next_state = self.current_state.get_next(legal_action, strategy_path=next_strategy) self.current_state = next_state #Get information from next state done = self.current_state.is_terminal goals_dic = self.current_rewards player_reward = goals_dic[player_str] if (done): log.debug( paint_bool( "••••••• EPISODE FINISHED Reward:{} •••••••".format( player_reward), player_reward > 0)) return self.current_observation, player_reward, done, {}
def choose_action(self, state): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ state_facts = state.to_facts() if state_facts in self.tree_scores: opt = self.tree_scores[state_facts].items() if self.scores_main_player == self.main_player: best = max(opt, key=lambda i: i[1]) else: best = min(opt, key=lambda i: i[1]) action = Action.from_facts(best[0], self.game_def) else: log.debug( "Minmax has no information in tree for current step, choosing random" ) index = randint(0, len(state.legal_actions) - 1) return state.legal_actions[index] action = [l_a for l_a in state.legal_actions if l_a == action][0] return action
def choose_action(self, state): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ if self.style == "tree": # Using tree state_facts = state.to_facts() if state_facts in self.tree_scores: opt = self.tree_scores[state_facts].items() if self.scores_main_player == self.main_player: best = max(opt, key=lambda i: i[1]) else: best = min(opt, key=lambda i: i[1]) action = Action.from_facts(best[0], self.game_def) else: log.debug( "Minmax has no information in tree for current step, choosing random" ) index = randint(0, len(state.legal_actions) - 1) return state.legal_actions[index] action = [l_a for l_a in state.legal_actions if l_a == action][0] return action elif self.style == "rule": return state.legal_actions[-1] else: print("Learning") # Using rules initial = fluents_to_asp_syntax(state.fluents, 0) match, tree, ex, ls, tl = get_minmax_init(self.game_def, self.main_player, initial, extra_fixed="\n".join( self.learned), learning_rules=True) self.learned.extend(ls) if (len(ls) > 0): log.info("{} learned new rules during game play".format( self.name)) if match is None: raise TimeoutError action = match.steps[0].action.action action_name = str(action) action = [ l_a for l_a in state.legal_actions if str(l_a.action) == action_name ][0] return action
def from_name(cls, name,initial=None,constants={}): """ Creates a game definition of the subclass whose folder matches the argument 'name' Args: name: the name of the folder containing the class definition initial: an optional initial state constants: the constants to be passed to clingo for this game """ games = game_def_sub_classes() if name in games: log.debug("Creating game definition from class {}".format(games[name].__name__)) return games[name](name,initial,constants) raise RuntimeError("No folder inside game_definitions matched {}".format(name))
def from_name_style(cls, game_def, name_style, main_player): """ Creates a player by finding the Player's subclass matching the name_style. Args: game_def (GameDef): The game_definition used name_style (str): String to match with the function match_name_style of the subclass main_player (str): String with the name of the main player """ approaches = player_approaches_sub_classes() for n, c in approaches.items(): if c.match_name_style(name_style): log.debug("Creating player from class {}".format(c.__name__)) return c(game_def, name_style, main_player) raise RuntimeError( "No subclass of Player matched {}".format(name_style))
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ log.debug("Computing normal minmax for tree") tree = minmax_from_game_def(game_def, main_player=args.main_player) t0 = time.time() if (not args.tree_image_file_name is None): file_name = '{}/{}'.format(game_def.name, args.tree_image_file_name) tree.print_in_file(file_name=file_name) log.debug("Tree image saved in {}".format(file_name)) n_nodes = tree.get_number_of_nodes() if (not args.tree_name is None): file_path = "./approaches/minmax/trees/{}/{}".format( game_def.name, args.tree_name) tree.save_scores_in_file(file_path) log.debug("Tree saved in {}".format(file_path)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'number_of_nodes': n_nodes, 'save_time': save_time}
def choose_action(self, state): """ The player chooses an action given a current state. Args: state (State): The current state Returns: action (Action): The selected action. Should be one from the list of state.legal_actions """ step = Step(state, None, 0) tree = TreeMCTS(TreeMCTS.node_class(step, self.main_player), self.game_def, self.main_player) try: tree.run_mcts(10000) except TimeoutError: log.debug("Reached timeout error for mcts, computation will stop") action = tree.get_best_action(tree.root) action_ex = [l_a for l_a in state.legal_actions if l_a == action][0] return action_ex
def build_minimax(tree, main_player="a"): """ Function to review and annotate tree with minimax scores Args: tree (anytree.Node): a tree with scores on its leaves main_player (str): The player to maximize Returns: tree (anytree.Node): minimax-annotated version of tree """ disable_tqdm = log.is_disabled_for('debug') log.debug("tracking minimax scores recursively") # work recursively backwards to fill up slots for node in tqdm(list( reversed(list(LevelOrderIter(tree.root, maxlevel=tree.root.height)))), disable=disable_tqdm): scores = [child.score for child in node.children if child != ()] if node.score == None: if node.children[0].step.state.control == main_player: node.score = max(scores) else: node.score = min(scores) return tree
def expand_root(tree, main_player="a"): """ Function to expand a tree downwards until terminal leaves in place Args: tree (anytree.Node): a tree to expand till its terminal leaves """ disable_tqdm = log.is_disabled_for('debug') valid_moves = tree.step.state.legal_actions for legal_action in valid_moves: step = Step(tree.step.state, legal_action, 1) TreeMinmax.node_class(step, main_player, parent=tree) expand_further = True time_step = 2 while expand_further: # define current player expand_further = False # starting iteration to fill branches log.debug("Depth: %s" % (time_step)) for leaf in tqdm(tree.leaves, disable=disable_tqdm): current_state = leaf.step.state if current_state.is_terminal: continue next_state = current_state.get_next(leaf.step.action) valid_moves = next_state.legal_actions for legal_action in valid_moves: step = Step(next_state, legal_action, time_step) TreeMinmax.node_class(step, main_player, parent=leaf) expand_further = True if next_state.is_terminal: goals = next_state.goals leaf.score = goals[main_player] time_step += 1
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ if not 'first_build' in args: log.debug("Creating new files") new_files = 'w' args.first_build = False else: log.debug("Appending to existent files") new_files = 'a' log.debug("Computing mcts for tree") state = game_def.get_initial_state() root = TreeMCTS.node_class(Step(state, None, 0), args.main_player) tree = TreeMCTS(root, game_def, args.main_player) tree.run_mcts(args.iter) t0 = time.time() if (not args.tree_image_file_name is None): file_name = '{}/{}'.format(game_def.name, args.tree_image_file_name) tree.print_in_file(file_name=file_name) log.debug("Tree image saved in {}".format(file_name)) n_nodes = tree.get_number_of_nodes() if (not args.tree_name is None): file_path = "./approaches/mcts/trees/{}/{}".format( game_def.name, args.tree_name) tree.save_values_in_file(file_path) log.debug("Tree saved in {}".format(file_path)) if (not args.train_file is None): file_path = "./approaches/mcts/train/{}/{}".format( game_def.name, args.train_file) l = tree.get_train_list() os.makedirs(os.path.dirname(file_path), exist_ok=True) training_data_to_csv(file_path, l, game_def, new_files, extra_array=['p', 'n']) log.debug("Training data saved in {}".format(file_path)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'number_of_nodes': n_nodes, 'save_time': save_time}
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ if not 'first_build' in args: log.debug("Creating new files") new_files = 'w' args.first_build = False else: log.debug("Appending to existent files") new_files = 'a' learn_examples = not (args.ilasp_examples_file_name is None) learn_rules = not (args.rules_file_name is None) generate_train = not (args.train_file_name is None) log.debug("Computing asp minmax for tree") log.debug("Initial state: \n{}".format( game_def.get_initial_state().ascii)) initial = game_def.get_initial_time() minmax_match, min_max_tree, examples, learned_rules, training_list = get_minmax_init( game_def, args.main_player, initial, generating_training=generate_train, learning_rules=learn_rules, learning_examples=learn_examples) log.debug(minmax_match) t0 = time.time() if learn_examples: ilasp_examples_file_name = './approaches/ilasp/{}/examples/{}'.format( args.game_name, args.ilasp_examples_file_name) os.makedirs(os.path.dirname(ilasp_examples_file_name), exist_ok=True) with open(ilasp_examples_file_name, new_files) as text_file: text_file.write("\n".join(examples)) log.debug("ILASP Examples saved in " + ilasp_examples_file_name) if learn_rules: rules_file = './approaches/pruned_minmax/rules/{}/{}'.format( args.game_name, args.rules_file_name) os.makedirs(os.path.dirname(rules_file), exist_ok=True) with open(rules_file, new_files) as text_file: text_file.write("\n".join(learned_rules)) rules_file_to_gdl(rules_file) log.debug("Rules saved in " + rules_file) if generate_train: train_file = './approaches/ml_agent/train/{}/{}'.format( args.game_name, args.train_file_name) os.makedirs(os.path.dirname(train_file), exist_ok=True) training_data_to_csv(train_file, training_list, game_def, new_files) log.debug("Training data saved in " + train_file) remove_duplicates_training(train_file) if (not (args.tree_image_file_name is None)): image_file_name = '{}/{}'.format(args.game_name, args.tree_image_file_name) min_max_tree.print_in_file(file_name=image_file_name) n_nodes = min_max_tree.get_number_of_nodes() if (not args.tree_name is None): file_path = "./approaches/pruned_minmax/trees/{}/{}".format( game_def.name, args.tree_name) min_max_tree.save_scores_in_file(file_path) log.debug("Tree saved in {}".format(file_path)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'number_of_nodes': n_nodes, 'save_time': save_time}
def simulate(game_def, players, depth=None, ran_init=False, signal_on=True): """ Call it with the path to the game definition Args: players (Player,Player): A tuple of the players depth: - n: Generate until depth n or terminal state reached """ def handler(signum, frame): raise TimeoutError("Action time out") if signal_on: signal.signal(signal.SIGALRM, handler) if (ran_init): initial = game_def.get_random_initial() else: initial = game_def.initial state = StateExpanded.from_game_def(game_def, initial, strategy=players[0].strategy) match = Match([]) time_step = 0 continue_depth = True if depth == None else time_step < depth log.debug("\n--------------- Simulating match ----------------") log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name)) letters = ['a', 'b'] response_times = {'a': [], 'b': []} while (not state.is_terminal and continue_depth): if signal_on: signal.alarm(3) t0 = time.time() try: selected_action = players[time_step % 2].choose_action(state) except TimeoutError as ex: log.info( "Time out for player {}, choosing random action".format( letters[time_step % 2])) index = randint(0, len(state.legal_actions) - 1) selected_action = state.legal_actions[index] if signal_on: signal.alarm(0) t1 = time.time() response_times[letters[time_step % 2]].append( round((t1 - t0) * 1000, 3)) step = Step(state, selected_action, time_step) match.add_step(step) time_step += 1 continue_depth = True if depth == None else time_step < depth state = state.get_next(selected_action, strategy_path=players[time_step % 2].strategy) match.add_step(Step(state, None, time_step)) log.debug(match) return match, { k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3) for k, lst in response_times.items() }
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ args.rules_file_name = None args.tree_image_file_name = None args.train_file_name = None args.tree_name = None if args.ilasp_examples_file_name is None: log.debug("Generating examples using min_max_asp algorithm") args.ilasp_examples_file_name = 'temp_examples.las' PrunedMinmaxPlayer.build(game_def, args) base_path = './approaches/ilasp/{}/'.format(game_def.name) lines = [] with open(args.background_path, 'r') as background_file: lines.extend(background_file.readlines()) with open('{}languages/{}'.format(base_path, args.language_bias_name), 'r') as language_bias_file: langauage_bias_lines = language_bias_file.readlines() lines.extend(langauage_bias_lines) with open( '{}examples/{}'.format(base_path, args.ilasp_examples_file_name), 'r') as examples_file: lines.extend(examples_file.readlines()) with open('{}temporal.las'.format(base_path), 'w') as complete_file: complete_file.write("".join(lines)) complete_file.close() if not args.ilasp_arg is None: ilasp_args = ["--" + a for a in args.ilasp_arg] else: ilasp_args = [] command = [ "ILASP ", "--clingo5 ", "--version=2i", '{}temporal.las'.format(base_path), "--multi-wc ", "--simple", "--max-rule-length=6", "--max-wc-length=5", "-ml=5", "-q" ] command.extend(ilasp_args) string_command = " ".join(command) log.info("Running ilasp command: \n{}".format(" ".join(command))) result = subprocess.check_output(string_command, shell=True).decode("utf-8") log.debug("Found strategy: \n{}".format(result)) t0 = time.time() strategy_file_path = '{}/strategies/{}'.format(base_path, args.strategy_name) os.makedirs(os.path.dirname(strategy_file_path), exist_ok=True) langauage_bias_predicates = [ l for l in langauage_bias_lines if l[0] != "#" ] result = result + "".join(langauage_bias_predicates) with open(strategy_file_path, 'w') as startegy: startegy.write(result) startegy.close() log.debug("Strategy saved in {}/strategies/{}".format( base_path, args.strategy_name)) t1 = time.time() save_time = round((t1 - t0) * 1000, 3) return {'save_time': save_time}
def generate_from(cls,game_def,net,state): """ Generates a tree with the predictions of the network. Will generate as children all the legal actions and also the illegal actions with higher probabilities than the legal ones. Will only further open legal actions """ log.debug("Generating net tree...") root = TreeNet.node_class(Step(state,None,0),"a",dic={"is_legal":1,"p":1,"v":0}) tree = TreeNet(root,game_def,net) set_visited = set() current_nodes = [root] it = 0 while(len(current_nodes)>0): it+=1 new_nodes = [] for n in current_nodes: s = n.step.state if s.is_terminal: #Dont expand terminal nodes continue if not n.is_legal: #Dont expand illegal moves continue if n.step.action is None: #Case for root node state = n.step.state else: state = n.step.next_state() if state.is_terminal: pi, v = net.predict_pi_v(state) n.v=v continue pi, v = net.predict_pi_v(state) n.v=v legal_actions_masked = game_def.encoder.mask_legal_actions(state) illegal_print_th = 0.001 #Only illegal states with more than this amout in diference will be printed general_print_th = 0.001 #Only states with at least this prob will be printed max_prob = pi[np.argmax(legal_actions_masked*pi)]+illegal_print_th for i,p in enumerate(pi): if p<general_print_th: continue if p<=max_prob and legal_actions_masked[i]==0: continue action_str= str(game_def.encoder.all_actions[i]) if legal_actions_masked[i]==0: action = Action.from_facts("does({},{}).".format(state.control,action_str),game_def) else: action = state.get_legal_action_from_str(action_str) step = Step(state,action,n.step.time_step+1) step_hash = step.__hash__() if step_hash in set_visited: continue node = TreeNet.node_class(step,"a",parent=n,dic={"is_legal":legal_actions_masked[i]==1,"p":p,"v":0}) new_nodes.append(node) set_visited.add(step_hash) current_nodes = new_nodes return tree
def build(game_def, args): """ Runs the required computation to build a player. For instance, creating a tree or training a model. The computed information should be stored to be accessed latter on using the name_style Args: game_def (GameDef): The game definition used for the creation args (NameSpace): A name space with all the attributes defined in add_parser_build_args """ best_net = NetAlpha(game_def, args.model_name, model=None, args=args) best_net.load_model_from_args() game_def.get_random_initial() using_random = not args.train_rand is None if (using_random): log.info( "Using random seed {} for initial states in training".format( args.train_rand)) game_def.get_random_initial() initial_states = game_def.random_init random.Random(args.train_rand).shuffle(initial_states) else: log.info("Using default initial state in training {} ".format( game_def.initial)) initial_states = [game_def.initial] number_initial = len( initial_states) if args.n_vs > len(initial_states) else args.n_vs for i in range(args.n_train): log.info("------- Iteration {} --------".format(i)) training_examples = [] for e in range(args.n_episodes): log.debug("\t\tEpisode {}...".format(e)) new_examples = TreeZero.run_episode(game_def, best_net) training_examples += new_examples game_def.initial = initial_states[i % len(initial_states)] new_net = best_net.copy() #Training new net log.info("Training net with {} examples".format( len(training_examples))) new_net.train(training_examples) #Comparing nets log.info("Comparing networks...") p_old = AlphaZero(game_def, "training_old", "a", best_net) p_new = AlphaZero(game_def, "training_new", "a", new_net) benchmarks = Match.vs(game_def, args.n_vs, [[p_old, p_new], [p_new, p_old]], initial_states, ["old_net", "new_net"], penalize_illegal=args.penalize_illegal) log.info(benchmarks) new_wins = benchmarks["b"]["wins"] old_wins = benchmarks["a"]["wins"] log.info( "New: Wan {} Lost Illegal {}\nOld network: Wan {} Lost Illegal {}" .format(new_wins, benchmarks["b"]["matches_lost_by_illegal"], old_wins, benchmarks["a"]["matches_lost_by_illegal"])) #Updating best net if new_wins > old_wins: log.info( "{}--------------- New network is better {}vs{}------------------{}" .format(bcolors.FAIL, new_wins, old_wins, bcolors.ENDC)) best_net = new_net best_net.save_model( model_name="{}/{}".format(best_net.model_name, i)) if args.vis_tree: # Visualizing tree of best net game_def.initial = initial_states[0] state = game_def.get_initial_state() p_new.visualize_net( state, "train-{}-iter-{}-new".format(new_net.model_name, i)) log.info("Saving model") best_net.save_model()
def simulate(game_def, players, depth=None, time_out_sec=None, penalize_illegal=False): """ Call it with the path to the game definition Args: players (Player,Player): A tuple of the players depth: Generate until depth or terminal state reached time_out_sec: The number of seconds the player will have to make a move penalize_illegal: True if a selection of an illegal action should be highly penalized by the player """ signal_on = not time_out_sec is None def handler(signum, frame): raise TimeoutError("Action time out") if signal_on: signal.signal(signal.SIGALRM, handler) initial = game_def.initial state = StateExpanded.from_game_def(game_def, initial, strategy=players[0].strategy) match = Match([]) time_step = 0 continue_depth = True if depth == None else time_step < depth log.debug("\n--------------- Simulating match ----------------") log.debug("\na: {}\nb: {}\n".format(players[0].name, players[1].name)) letters = ['a', 'b'] response_times = {'a': [], 'b': []} while (not state.is_terminal and continue_depth): current_control = letters[time_step % 2] if signal_on: signal.alarm(time_out_sec) t0 = time.time() try: selected_action = players[time_step % 2].choose_action( state, time_step=time_step, penalize_illegal=penalize_illegal) except TimeoutError as ex: log.debug( "Time out for player {}, choosing random action".format( current_control)) index = randint(0, len(state.legal_actions) - 1) selected_action = state.legal_actions[index] except IllegalActionError as ex: log.debug( "Player {}, choosing illegal action {} in step {} -> Match lost" .format(players[time_step % 2].name, str(ex.action), time_step)) state.is_terminal = True state.goals = { current_control: -1, letters[(time_step + 1) % 2]: +1, } selected_action = None match.illegal_lost = { "player": current_control, "time_step": time_step } if signal_on: signal.alarm(0) t1 = time.time() response_times[current_control].append(round((t1 - t0) * 1000, 3)) step = Step(state, selected_action, time_step) match.add_step(step) time_step += 1 continue_depth = True if depth == None else time_step < depth if not selected_action is None: state = state.get_next(selected_action, strategy_path=players[time_step % 2].strategy) match.add_step(Step(state, None, time_step)) log.debug(match) return match, { k: round(sum(lst) / (len(lst) if len(lst) > 0 else 1), 3) for k, lst in response_times.items() }