def main(): env = Environment(config2) state_size = (config2.vision_range * 2 + 1)**2 * config2.color_num_dims + config2.scent_num_dims agent = RLAgent(env, state_size=state_size) agent._load("outputs/models/NELQ_190000") # ._load("NELQ.model") # optimizer = optim.Adam(agent.policy.parameters()) # print list(agent.policy.parameters()) test(agent, env)
def main(args, prt): config = tf.ConfigProto() #tensorflow的参数配置对象 config.gpu_options.allow_growth = True #动态申请显存,需要多少就申请多少 sess = tf.Session(config=config) # 加载任务特定的类 DataGenerator, Env, reward_func, AttentionActor, AttentionCritic = \ load_task_specific_components(args['task_name']) dataGen = DataGenerator(args) dataGen.reset() env = Env(args) #创建一个 RL(强化学习)代理 agent = RLAgent(args, prt, env, dataGen, reward_func, AttentionActor, AttentionCritic, is_train=args['is_train']) agent.Initialize(sess) #训练或评估 start_time = time.time() # 计算时间用的,记录开始时间 if args['is_train']:# 如果参数是训练 prt.print_out('训练开始') train_time_beg = time.time() # 开始训练的时间 for step in range(args['n_train']): summary = agent.run_train_step() _, _ , actor_loss_val, critic_loss_val, actor_gra_and_var_val, critic_gra_and_var_val,\ R_val, v_val, logprobs_val,probs_val, actions_val, idxs_val= summary if step%args['save_interval'] == 0: agent.saver.save(sess,args['model_dir']+'/model.ckpt', global_step=step)#sess是之前tensorflow的初始化对象 if step%args['log_interval'] == 0: train_time_end = time.time()-train_time_beg prt.print_out('训练步数: {} -- 时间: {} -- 训练奖励: {} -- 值: {}'\ .format(step,time.strftime("%H:%M:%S", time.gmtime(\ train_time_end)),np.mean(R_val),np.mean(v_val))) prt.print_out(' actor loss: {} -- critic loss: {}'\ .format(np.mean(actor_loss_val),np.mean(critic_loss_val))) train_time_beg = time.time() if step%args['test_interval'] == 0: agent.inference(args['infer_type']) else: # 否则就是在推论 prt.print_out('评估开始') agent.inference(args['infer_type']) prt.print_out('总时间为: {}'.format(\ time.strftime("%H:%M:%S", time.gmtime(time.time()-start_time)))) # 运行完毕之后输出运行总时间
def getAgentsAndAdjudicator(): agents = [None, None] idx = np.random.randint(2) agents[idx] = RLAgent(idx + 1, None) agents[1 - idx], adjudicator = getOpponentRandomly(2 - idx) # print("RL Agent: (%d, %d) Opponent: (%d, %d)" %(idx, idx+1, 1-idx, 2-idx)) return agents, adjudicator, idx + 1
def main(): env = Environment(config2) from agent import actions state_size = (config2.vision_range*2 + 1)**2 * config2.color_num_dims + config2.scent_num_dims + len(actions) agent = RLAgent(env, state_size=state_size) optimizer = optim.Adam(agent.policy.parameters(), lr=agent_config['learning_rate']) setup_output_dir() train(agent, env, [0, 1, 2, 3], optimizer)
def delete_temp(self, old_model, temp_model): """ Delete temporary files in case of 2 versions of same model (but different times of training). Keep the best model and delete the rest. Parameters ---------- old_model: string Model filename before training. temp_model: string Temporary model filename (after training). Return ------ use_training: boolean False only if old model wins against new temp model. True otherwise. """ use_training = True # Several versions of same model if (old_model is not None) and (temp_model == old_model + '_temp'): # Confront them agent1 = RLAgent() agent1.load_model(old_model) agent2 = RLAgent() agent2.load_model(temp_model) results = compare_agents(agent1, agent2, n_games=10, time_limit=100, verbose=False) # Keep best if results[3] >= results[2]: # More trained agent is the best os.remove('Models/' + old_model + '.csv') os.remove('Models/data/count_' + old_model + '.csv') os.rename(r'Models/' + temp_model + '.csv', r'Models/' + old_model + '.csv') os.rename(r'Models/data/count_' + temp_model + '.csv', r'Models/data/count_' + old_model + '.csv') else: # Less trained agent is the best os.remove('Models/' + temp_model + '.csv') os.remove('Models/data/count_' + temp_model + '.csv') use_training = False return use_training
import torch import random from game import Game from agent import RLAgent from stupid_ai import StupidAI game = Game() agent = RLAgent() stupid_ai = StupidAI() agent.state_values = torch.load('./model.pth') def check_board(): win_or_tie = True if game.who_wins() == -1: # Human win print('==================> You win!') game.clear() elif game.who_wins() == 1: # AI win print('==================> You lose..') game.clear() elif game.who_wins() == 2: # Tie print('==================> Tie!') game.clear() else: win_or_tie = False return win_or_tie while True: x, y = stupid_ai.human_move() if game.board[y, x] != 0:
def tournament(self, change_opp=False): """ Method to rank the different models obtained after any training. For the values of the factor epsilon of an RL Agent, declared in init method, this method creates a tournament for the corresponding different models. Each model plays 10 games against all others and the scores of each model against another are stored in a CSV file. A TXT file is also generated using the CSV file: it displays rankings of each model, alongside its total score against all other models. Parameter --------- change_opp: boolean Set to True to consider agents trained with mixed opponents participating to the tournament. Outputs ------- Tournament report: CSV file Located at: 'Models/results/(self.tournament_name).csv'. File storing the results of each confrontation between 2 agents. Tournament ranking: TXT file Located at: 'Models/results/(self.tournament_name).txt'. File ranking the agents using the results of the tournament, with total score of each agent displayed. """ n_players = len(self.epsilon_values) * ( (int(self.random_training) + int(self.self_training)) * (1 + int(change_opp))) print('-----------------------------') print('TOURNAMENT with {} agents'.format(n_players)) print('-----------------------------\n') # Initialization of scores: some rows and columns are # only used for saving configurations of models # (epsilon, opponents, change of opponent). scores = -np.ones((n_players + 3, n_players + 3)) # List of opponent kinds training_ways = [] if self.random_training: training_ways.append('Random') if change_opp: training_ways.append('RandomvsSelf') if self.self_training: training_ways.append('Self') if change_opp: training_ways.append('SelfvsRandom') # List of players players = [[epsilon, training_way] for epsilon in self.epsilon_values for training_way in training_ways] for idx1, player1 in enumerate(players): epsilon1 = player1[0] training_way1 = player1[1] filename = ('greedy' + str(epsilon1)[0] + '_' + str(epsilon1)[2:] + '_vs' + training_way1) # Load first agent agent1 = RLAgent() agent1.load_model(filename) # Save config of agent1 scores[idx1 + 3, 0] = epsilon1 # 0: RANDOM | 1: SELF scores[idx1 + 3, 1] = (int(training_way1 == 'Self') + int(training_way1 == 'SelfvsRandom')) # -1: nothing | 0: Random vs Self | 1: Self vs Random scores[idx1 + 3, 2] = -1 + (2 * int(training_way1 == 'SelfvsRandom') + int(training_way1 == 'RandomvsSelf')) for idx2, player2 in enumerate(players): epsilon2 = player2[0] training_way2 = player2[1] filename = ('greedy' + str(epsilon2)[0] + '_' + str(epsilon2)[2:] + '_vs' + training_way2) # Load second agent agent2 = RLAgent() agent2.load_model(filename) # Save config of agent2 scores[0, idx2 + 3] = epsilon2 scores[1, idx2 + 3] = (int(training_way2 == 'Self') + int(training_way2 == 'SelfvsRandom')) scores[2, idx2 + 3] = -1 + (2 * int(training_way2 == 'SelfvsRandom') + int(training_way2 == 'RandomvsSelf')) print('Current match:') print('Player1: epsilon = {}, trained vs {}'.format( epsilon1, training_way1)) print('Player2: epsilon = {}, trained vs {}'.format( epsilon2, training_way2)) results = compare_agents(agent1, agent2, n_games=10, time_limit=100, verbose=False) # Score of agent1 scores[idx1 + 3, idx2 + 3] = results[2] # Score of agent2 scores[idx2 + 3, idx1 + 3] = results[3] print('------') # Update tournament file name name = self.tournament_name[:-1] nbr = int(self.tournament_name[-1]) nbr += 1 self.tournament_name = name + str(nbr) # Save tournament np.savetxt(str('Models/results/' + self.tournament_name + '.csv'), scores, delimiter=',') # Rank players self.tournament_ranking(self.tournament_name, self.tournament_name) print('Results of tournament are stored in {}.csv and {}.txt\n'.format( self.tournament_name, self.tournament_name))
rl_agent.win() else: # print("Draw!") rl_agent.lose() break board.print() action = int(input())-1 board.update_state(player="O", action=action) done = board.check() if done != 2: # if done == -1: # print("Game Over!") # elif done == 0: # print("Draw!") rl_agent.lose() break board.print() board.print() print("V: ", rl_agent.wins, ", L: ", rl_agent.loses," D: ", experiments-rl_agent.loses-rl_agent.wins) board = Board(print_map=False) rl_agent = RLAgent(_id = "X") opponent = Agent(_id = "O") train(board, rl_agent, opponent) rl_agent.another_opponent() test(board, rl_agent)
def train(config): # train env print('Setting up TextWorld environment...') batch_size = config['training']['scheduling']['batch_size'] env_id = gym_textworld.make_batch(env_id=config['general']['env_id'], batch_size=batch_size, parallel=True) env = gym.make(env_id) env.seed(config['general']['random_seed']) # valid and test env run_test = config['general']['run_test'] if run_test: test_batch_size = config['training']['scheduling']['test_batch_size'] # valid valid_env_name = config['general']['valid_env_id'] valid_env_id = gym_textworld.make_batch(env_id=valid_env_name, batch_size=test_batch_size, parallel=True) valid_env = gym.make(valid_env_id) valid_env.seed(config['general']['random_seed']) # test test_env_name_list = config['general']['test_env_id'] assert isinstance(test_env_name_list, list) test_env_id_list = [ gym_textworld.make_batch(env_id=item, batch_size=test_batch_size, parallel=True) for item in test_env_name_list ] test_env_list = [ gym.make(test_env_id) for test_env_id in test_env_id_list ] for i in range(len(test_env_list)): test_env_list[i].seed(config['general']['random_seed']) print('Done.') # Set the random seed manually for reproducibility. np.random.seed(config['general']['random_seed']) torch.manual_seed(config['general']['random_seed']) if torch.cuda.is_available(): if not config['general']['use_cuda']: logger.warning( "WARNING: CUDA device detected but 'use_cuda: false' found in config.yaml" ) else: torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(config['general']['random_seed']) else: config['general']['use_cuda'] = False # Disable CUDA. revisit_counting = config['general']['revisit_counting'] replay_batch_size = config['general']['replay_batch_size'] replay_memory_capacity = config['general']['replay_memory_capacity'] replay_memory_priority_fraction = config['general'][ 'replay_memory_priority_fraction'] word_vocab = dict2list(env.observation_space.id2w) word2id = {} for i, w in enumerate(word_vocab): word2id[w] = i # collect all nouns verb_list = ["go", "take"] object_name_list = ["east", "west", "north", "south", "coin"] verb_map = [word2id[w] for w in verb_list if w in word2id] noun_map = [word2id[w] for w in object_name_list if w in word2id] agent = RLAgent( config, word_vocab, verb_map, noun_map, replay_memory_capacity=replay_memory_capacity, replay_memory_priority_fraction=replay_memory_priority_fraction) init_learning_rate = config['training']['optimizer']['learning_rate'] exp_dir = get_experiment_dir(config) summary = SummaryWriter(exp_dir) parameters = filter(lambda p: p.requires_grad, agent.model.parameters()) if config['training']['optimizer']['step_rule'] == 'sgd': optimizer = torch.optim.SGD(parameters, lr=init_learning_rate) elif config['training']['optimizer']['step_rule'] == 'adam': optimizer = torch.optim.Adam(parameters, lr=init_learning_rate) log_every = 100 reward_avg = SlidingAverage('reward avg', steps=log_every) step_avg = SlidingAverage('step avg', steps=log_every) loss_avg = SlidingAverage('loss avg', steps=log_every) # save & reload checkpoint only in 0th agent best_avg_reward = -10000 best_avg_step = 10000 # step penalty discount_gamma = config['general']['discount_gamma'] provide_prev_action = config['general']['provide_prev_action'] # epsilon greedy epsilon_anneal_epochs = config['general']['epsilon_anneal_epochs'] epsilon_anneal_from = config['general']['epsilon_anneal_from'] epsilon_anneal_to = config['general']['epsilon_anneal_to'] # counting reward revisit_counting_lambda_anneal_epochs = config['general'][ 'revisit_counting_lambda_anneal_epochs'] revisit_counting_lambda_anneal_from = config['general'][ 'revisit_counting_lambda_anneal_from'] revisit_counting_lambda_anneal_to = config['general'][ 'revisit_counting_lambda_anneal_to'] epsilon = epsilon_anneal_from revisit_counting_lambda = revisit_counting_lambda_anneal_from for epoch in range(config['training']['scheduling']['epoch']): agent.model.train() obs, infos = env.reset() agent.reset(infos) print_command_string, print_rewards = [[] for _ in infos ], [[] for _ in infos] print_interm_rewards = [[] for _ in infos] print_rc_rewards = [[] for _ in infos] dones = [False] * batch_size rewards = None avg_loss_in_this_game = [] new_observation_strings = agent.get_observation_strings(infos) if revisit_counting: agent.reset_binarized_counter(batch_size) revisit_counting_rewards = agent.get_binarized_count( new_observation_strings) current_game_step = 0 prev_actions = ["" for _ in range(batch_size) ] if provide_prev_action else None input_description, description_id_list = agent.get_game_step_info( obs, infos, prev_actions) while not all(dones): v_idx, n_idx, chosen_strings, state_representation = agent.generate_one_command( input_description, epsilon=epsilon) obs, rewards, dones, infos = env.step(chosen_strings) new_observation_strings = agent.get_observation_strings(infos) if provide_prev_action: prev_actions = chosen_strings # counting if revisit_counting: revisit_counting_rewards = agent.get_binarized_count( new_observation_strings, update=True) else: revisit_counting_rewards = [0.0 for _ in range(batch_size)] agent.revisit_counting_rewards.append(revisit_counting_rewards) revisit_counting_rewards = [ float(format(item, ".3f")) for item in revisit_counting_rewards ] for i in range(len(infos)): print_command_string[i].append(chosen_strings[i]) print_rewards[i].append(rewards[i]) print_interm_rewards[i].append(infos[i]["intermediate_reward"]) print_rc_rewards[i].append(revisit_counting_rewards[i]) if type(dones) is bool: dones = [dones] * batch_size agent.rewards.append(rewards) agent.dones.append(dones) agent.intermediate_rewards.append( [info["intermediate_reward"] for info in infos]) # computer rewards, and push into replay memory rewards_np, rewards, mask_np, mask = agent.compute_reward( revisit_counting_lambda=revisit_counting_lambda, revisit_counting=revisit_counting) curr_description_id_list = description_id_list input_description, description_id_list = agent.get_game_step_info( obs, infos, prev_actions) for b in range(batch_size): if mask_np[b] == 0: continue if replay_memory_priority_fraction == 0.0: # vanilla replay memory agent.replay_memory.push(curr_description_id_list[b], v_idx[b], n_idx[b], rewards[b], mask[b], dones[b], description_id_list[b], new_observation_strings[b]) else: # prioritized replay memory is_prior = rewards_np[b] > 0.0 agent.replay_memory.push(is_prior, curr_description_id_list[b], v_idx[b], n_idx[b], rewards[b], mask[b], dones[b], description_id_list[b], new_observation_strings[b]) if current_game_step > 0 and current_game_step % config["general"][ "update_per_k_game_steps"] == 0: policy_loss = agent.update(replay_batch_size, discount_gamma=discount_gamma) if policy_loss is None: continue loss = policy_loss # Backpropagate optimizer.zero_grad() loss.backward(retain_graph=True) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm( agent.model.parameters(), config['training']['optimizer']['clip_grad_norm']) optimizer.step() # apply gradients avg_loss_in_this_game.append(to_np(policy_loss)) current_game_step += 1 agent.finish() avg_loss_in_this_game = np.mean(avg_loss_in_this_game) reward_avg.add(agent.final_rewards.mean()) step_avg.add(agent.step_used_before_done.mean()) loss_avg.add(avg_loss_in_this_game) # annealing if epoch < epsilon_anneal_epochs: epsilon -= (epsilon_anneal_from - epsilon_anneal_to) / float(epsilon_anneal_epochs) if epoch < revisit_counting_lambda_anneal_epochs: revisit_counting_lambda -= ( revisit_counting_lambda_anneal_from - revisit_counting_lambda_anneal_to ) / float(revisit_counting_lambda_anneal_epochs) # Tensorboard logging # # (1) Log some numbers if (epoch + 1 ) % config["training"]["scheduling"]["logging_frequency"] == 0: summary.add_scalar('avg_reward', reward_avg.value, epoch + 1) summary.add_scalar('curr_reward', agent.final_rewards.mean(), epoch + 1) summary.add_scalar('curr_interm_reward', agent.final_intermediate_rewards.mean(), epoch + 1) summary.add_scalar('curr_counting_reward', agent.final_counting_rewards.mean(), epoch + 1) summary.add_scalar('avg_step', step_avg.value, epoch + 1) summary.add_scalar('curr_step', agent.step_used_before_done.mean(), epoch + 1) summary.add_scalar('loss_avg', loss_avg.value, epoch + 1) summary.add_scalar('curr_loss', avg_loss_in_this_game, epoch + 1) msg = 'E#{:03d}, R={:.3f}/{:.3f}/IR{:.3f}/CR{:.3f}, S={:.3f}/{:.3f}, L={:.3f}/{:.3f}, epsilon={:.4f}, lambda_counting={:.4f}' msg = msg.format(epoch, np.mean(reward_avg.value), agent.final_rewards.mean(), agent.final_intermediate_rewards.mean(), agent.final_counting_rewards.mean(), np.mean(step_avg.value), agent.step_used_before_done.mean(), np.mean(loss_avg.value), avg_loss_in_this_game, epsilon, revisit_counting_lambda) if (epoch + 1 ) % config["training"]["scheduling"]["logging_frequency"] == 0: print("=========================================================") for prt_cmd, prt_rew, prt_int_rew, prt_rc_rew in zip( print_command_string, print_rewards, print_interm_rewards, print_rc_rewards): print("------------------------------") print(prt_cmd) print(prt_rew) print(prt_int_rew) print(prt_rc_rew) print(msg) # test on a different set of games if run_test and (epoch + 1) % config["training"]["scheduling"][ "logging_frequency"] == 0: valid_R, valid_IR, valid_S = test(config, valid_env, agent, test_batch_size, word2id) summary.add_scalar('valid_reward', valid_R, epoch + 1) summary.add_scalar('valid_interm_reward', valid_IR, epoch + 1) summary.add_scalar('valid_step', valid_S, epoch + 1) # save & reload checkpoint by best valid performance model_checkpoint_path = config['training']['scheduling'][ 'model_checkpoint_path'] if valid_R > best_avg_reward or (valid_R == best_avg_reward and valid_S < best_avg_step): best_avg_reward = valid_R best_avg_step = valid_S torch.save(agent.model.state_dict(), model_checkpoint_path) print("========= saved checkpoint =========") for test_id in range(len(test_env_list)): R, IR, S = test(config, test_env_list[test_id], agent, test_batch_size, word2id) summary.add_scalar('test_reward_' + str(test_id), R, epoch + 1) summary.add_scalar('test_interm_reward_' + str(test_id), IR, epoch + 1) summary.add_scalar('test_step_' + str(test_id), S, epoch + 1)
def train(n_epochs, epsilon, gamma, load_model, filename, random_opponent, n_games_test, freq_test, n_skip_games=int(0), verbose=False): """ Train 2 agents by making them play and learn together. Save the learned Q-function into CSV file. It is possible to confront 1 of the agents (against either the user or a Random Agent) during training, as often as one wants. It is also possible to train an already trained model. Parameters ---------- n_epochs: int Number of games used for training. epsilon: float (in [0,1]) Fraction of greedy decisions during training of the 2 RL Agents. gamma: float (in [0,1]) Factor of significance of first actions over last ones for the 2 RL Agents. load_model: string CSV filename in which is stored the learned Q-function of an agent. If load_model = 'model', the function loads the model './Models/model.csv'. If load_model is not None, the previous parameters epsilon and gamma are used for a second training. filename: string Name of the CSV file that will store the learned Q-function of one of the agents. The path to CSV file is then ./Models/filename.csv. The counter of state-action pairs is also stored at ./Models/data/count_filename.csv for future training. random_opponent: boolean If set to true, the function trains 1 RL Agent by making it play against a Random Agent. Otherwise, the RL agent is trained by playing against another version of itself. n_games_test: int Number of games one of the RL Agent plays against a Random Agent for testing. If set to 0, the RL Agents will not be tested by a Random Agent. freq_test: int Number of epochs after which one of the RL Agents plays n_games_test games against a Random Agent. If set to 1000, each 1000 epochs of training, one of the RL Agents is tested against a Random Agent. If set to 0, test occurs at the last epoch of training only. If set to -1, none of the agents is tested during training. n_skip_games: int Number of epochs after which the user can choose to play against one of the learning agents. If set to 1000, each 1000 games, the user can choose to play against one agent. If set to 0, the user can choose to play against one agent at the last epoch only. If set to -1, no choice is offered and the user cannot test any agent. verbose: boolean If set to True, each game action during training has a written explanation. Return ------ learning_results: list Only significant with n_games_test > 0 (otherwise, empty list by default). List of each n_epochs // freq_test epoch test results against a Random Agent. Each test result is a list: [current epoch, score of RL Agent, number of finished games, n_games test]. """ # Learning agent agent1 = RLAgent(epsilon, gamma) if load_model is not None: agent1.load_model(load_model) # Choose opponent if random_opponent: agent2 = RandomAgent() time_limit = None print('Training vs Random') else: agent2 = RLAgent(epsilon, gamma) if load_model is not None: agent2.load_model(load_model) time_limit = None print('Training vs Self') start_idx = 0 scores = [0, 0] # If the user only confronts the agent at the last epoch # or if no confrontation if n_skip_games in [-1, 0]: n_skip_games = n_epochs - n_skip_games # Boolean for game between the user and agent1 preceding a game # between agent1 and agent2 play_checkpoint_usr = False # If there is a test of agent1 at the last epoch only or no test if freq_test in [-1, 0]: freq_test = n_epochs - freq_test # Number of games between agent1 and a Random Agent for testing n_games_test_mem = n_games_test learning_results = [] # Start training print('Training epoch:') for epoch in range(1, n_epochs + 1): if epoch % (n_epochs // 10) == 0: print(epoch, '/', n_epochs) #Update boolean for playing with user play_checkpoint_usr = bool(epoch % n_skip_games == 0) if play_checkpoint_usr: # Print training status print('Number of games: ', epoch) print('Scores: ', scores) # Ask user to play play = int(input('Play ? (1 Yes | 0 No)\n')) play_checkpoint_usr = bool(play) # Update boolean for test n_games_test = int(epoch % freq_test == 0) * n_games_test_mem # Start game game_over, winner, test_results = game_2Agents( agent1, agent2, start_idx=start_idx, train=True, time_limit=time_limit, n_games_test=n_games_test, play_checkpoint_usr=play_checkpoint_usr, verbose=verbose) assert game_over, str('Game not over but new game' + ' beginning during training') if winner in [0, 1]: scores[winner] += 1 # Save test games of agent1 against a Random Agent if bool(n_games_test): assert len(test_results) != 0, \ 'Agent1 has been tested but there is no result of that.' learning_results.append( [epoch, test_results[2], test_results[0], test_results[1]]) # Next round start_idx = 1 - start_idx # Save Q-function of agent1 np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',') # Save stats for learning rate of agent1 np.savetxt(str('Models/data/count_' + filename + '.csv'), agent1.count_state_action, delimiter=',') return learning_results
np.savetxt(str('Models/data/count_' + filename + '.csv'), agent1.count_state_action, delimiter=',') return learning_results if __name__ == "__main__": train(n_epochs=5000, epsilon=0.6, gamma=1.0, load_model=None, filename='greedy0_6_vsSelf_test', random_opponent=False, n_games_test=0, freq_test=-1, n_skip_games=-1, verbose=False) agent1 = RLAgent() agent1.load_model('greedy0_2_vsRandomvsSelf') agent2 = RLAgent() agent2.load_model('greedy0_6_vsSelf_test') results = compare_agents(agent1, agent2, n_games=10, time_limit=None, verbose=False) print(results)
import random import torch from game import Game from agent import RLAgent from moves import Moves game = Game() agent = RLAgent() moves = Moves() num_win = 0 #initialize no. of win by human num_lose = 0 #initialize no. of win by ai but loss by human num_tie = 0 random.seed(1000) def check_board_and_may_update_state_values(): global num_win, num_lose, num_tie win_or_tie = True if game.who_wins() == -1: #human win print("YOU WIN!!") agent.update_state_values(0) num_win += 1 elif game.who_wins() == 1: #ai win print("YOU LOSE!!") agent.update_state_values(1)
import torch import random from game import Game from agent import RLAgent from moves import Moves game = Game() agent = RLAgent() moves = Moves() agent.state_values = torch.load('./tic_tac_toe.pth') def check_board(): win_or_tie = True if game.who_wins() == -1: #human win print("YOU WIN!!") game.clear() elif game.who_wins() == 1: #ai win print("YOU LOSE!!") game.clear() elif game.who_wins() == 2: #Tie print("TIE!!") game.clear() else: win_or_tie = False return win_or_tie
def game_mngr(): """ Game manager, used for navigation among different choices offered to user. """ # Options command = options('PLAY', 'RULES', 'Tap 1 to play or 2 to read the rules') # Rules page if int(command) == 2: print_rules() # Go back print('Tap 1 to come back to the main menu\n') comeback = tap_valid_digits([1]) if int(comeback): game_mngr() # Game page if int(command) == 1: # Options players = options('PLAYER', 'PLAYERS', 'How many players ?', comeback=True) # Go back if int(players) == 0: game_mngr() # 2 players if int(players) == 2: # Ask players' name player1, player2 = input_names(n_players=2) # Init scores scores = [0, 0] # Games tapnswap = TapnSwap() over = False while not over: game_over, winner = game_1vs1(tapnswap, player1, player2) scores[winner] += 1 if game_over: # Display scores restart = display_endgame(scores, player1, player2) # Go back if not restart: over = True game_mngr() # 1 player if int(players) == 1: # Options level = options('EASY', 'DIFFICULT', 'Which level ?', comeback=True) # Go back if int(level) == 0: game_mngr() # Define agent elif int(level) == 1: agent = RandomAgent() # easy else: # Load agent agent = RLAgent() agent.load_model('greedy0_2_vsRandomvsSelf') # difficult # Ask player's name player = input_names(n_players=1) # Init scores scores = [0, 0] # Games tapnswap = TapnSwap() over = False while not over: game_over, winner = game_1vsAgent(tapnswap, player, agent, greedy=False) scores[winner] += 1 if game_over: # Display scores restart = display_endgame(scores, player, 'Computer') # Go back if not restart: over = True game_mngr()
args = parser.parse_args() # create world world = World(args.config_file, thread_num=args.thread) # create agents agents = [] for i in world.intersections: action_space = gym.spaces.Discrete(len(i.phases)) agents.append( RLAgent( action_space, LaneVehicleGenerator(world, i, ["lane_count"], in_only=True, average="road"), LaneVehicleGenerator(world, i, ["lane_waiting_count"], in_only=True, average="all", negative=True))) # create metric metric = TravelTimeMetric(world) # create env env = TSCEnv(world, agents, metric) # simulate obs = env.reset() for i in range(args.steps):