def run_model(game_count=1): """ run model for game_count games """ # Make environment env = WhaleEnv( config={ 'active_player': 0, 'seed': datetime.utcnow().microsecond, 'env_num': 1, 'num_players': 5 }) # Set up agents action_num = 3 agent = DqnAgent(action_num=action_num, player_num=5) agent_0 = RandomAgent(action_num=action_num) agent_1 = RandomAgent(action_num=action_num) agent_2 = RandomAgent(action_num=action_num) agent_3 = RandomAgent(action_num=action_num) agents = [agent, agent_0, agent_1, agent_2, agent_3] env.set_agents(agents) agent.load_pretrained() for game in range(game_count): # Generate data from the environment trajectories = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(game)) i = 0 for trajectory in trajectories: print('\tPlayer {}'.format(i)) p(trajectory[-1]) i += 1
def test(): board = Board.from_config_file('board.cfg') pile = Pile.from_config_file('pile.cfg') players = [ Player('Bot_1', RandomAgent()), Player('Bot_2', RandomAgent()) ] game_count = 10000 starter_winner_count = 0 total_move_count = 0 for i in range(game_count): new_board = copy.deepcopy(board) new_pile = copy.deepcopy(pile) new_players = copy.deepcopy(players) state = State(new_board, new_players, new_pile, False, False) game = Game(state) game.execute_setup() game.play_game() print(f'At game #{i:04}') total_move_count += game.move_count if game.starter == game.winner: starter_winner_count += 1 print(f'Percentage of games won by starter: {starter_winner_count/game_count*100}%') print(f'Average number of total moves in a game: {total_move_count/game_count}')
def main(): NUM_OF_GAMES = 100 ai = RandomAgent() s = Sim() all_records = [] for i in tqdm(range(NUM_OF_GAMES)): records = [] # preparation of simulator s.reset_s() # 引数のなしの時は何も置かれていない状態となる while True: # ban を上書きする前に,勝敗を保存する? reshape_self, reshape_opp, reshape_ban, kou = s.get_s() if reshape_ban != 2: act_num = ai.act(reshape_self, reshape_opp, reshape_ban, kou) s.act(act_num) else: # 黒の勝ち+1,負け-1 outcome = 2 * is_black_win(s) - 1 break records.append( [reshape_self[1:], reshape_opp[1:], reshape_ban, act_num]) #outcomeを付け足し for j in range(len(records)): records[j].append((1. - 2. * (j % 2)) * outcome) all_records.append(records) # stackしたレコードを書き出す w_fnc(all_records, 'all_records.csv') rec = [records[0][-1] for records in all_records] kuro_win = NUM_OF_GAMES / 2 + sum(rec) / 2 siro_win = NUM_OF_GAMES - kuro_win print('黒の勝ち:{0}回, 白の勝ち:{1}回'.format(kuro_win, siro_win))
def main(): # Environment options: # rollout_video=True: Generate .mp4 video of the battle at the end # verbose=False: Print debug statements unique_id = int(time.time()) video_filename = 'video_{}.mp4'.format(unique_id) env = fog_of_war.FogOfWarMultiplayerEnvironment( video_filename=video_filename) state = env.reset() done = False # Two agents play each other: the learner (blue) and the adversary (red) blue_agent = RandomAgent(env.action_space()) red_agent = RandomAgent(env.action_space()) while not done: # The buildable units are named Rock, Paper, and Scissors # 1: Build paper in reserves # 2: Build paper in front # 3: Build rock in reserves # 4: Build rock in front # 5: Build scissors in reserves # 6: Build scissors in front # 7: Scout to reveal the enemy's army import random blue_action = random.choice([1, 4, 7]) red_action = random.choice([5, 6, 7]) # Take an action and simulate the game for one time step (~10 seconds) state, reward, done, info = env.step(blue_action, red_action) # The state is a tuple of: # features_minimap: np array of features from the minimap view # features_screen: np array of features from the camera view # rgb_minimap: np array of an RGB pixel view of the minimap # rgb_screen: np array of RGB pixel rendered frame of Starcraft II features_minimap, features_screen, rgb_minimap, rgb_screen = state # Example code for visualizing the state filename = "output_frame_{}_{:05d}.jpg".format(unique_id, env.steps) blue_caption = fog_of_war.action_to_name[blue_action] red_caption = fog_of_war.action_to_name[red_action] caption = 't={} R={} Left: {} Right: {}'.format( env.steps, reward, blue_caption, red_caption) top = imutil.show(rgb_minimap, resize_to=(800, 480), return_pixels=True, display=False) bottom = imutil.show(rgb_screen, resize_to=(800, 480), return_pixels=True, display=False) imutil.show(np.concatenate([top, bottom], axis=0), filename=filename, caption=caption) print('Finished game')
def setup_algorithm(self, algorithm): """ Initialize agent that uses the given algorithm """ if algorithm == "random": from random_agent import RandomAgent self.agent = RandomAgent(self.actions, self.grid_shape) # Add agent here else: raise ValueError("No algorithm implemented for '" "{}'".format(algorithm))
def main(): board = Board.from_config_file('board.cfg') pile = Pile.from_config_file('pile.cfg') players = [ Player('Bot_1', RandomAgent()), Player('Bot_2', RandomAgent()) ] state = State(board, players, pile, True, True) game = Game(state) game.execute_setup() game.play_game()
def main(): script_args = parse_args() if script_args.use_retro: env = retro.make(script_args.env_name) else: env = gym.make(script_args.env_name) random_agent = RandomAgent(env) random_agent.train(script_args.num_episodes, script_args.render_train) play_reward = random_agent.play() print(f'Reward after playing: {play_reward}')
def main(): # preparation of pygame pygame.init() # 初期化 AIis = 2 #1が黒,2が白 ai = RandomAgent() bp = [115, 15] g = 40 mar = 20 x, y = 0, 0 pixels = [bp, g, mar] screen = pygame.display.set_mode((600, 400)) # ウィンドウサイズの指定 pygame.display.set_caption("GoSimulator") # ウィンドウの上の方に出てくるアレの指定 sysfont = pygame.font.SysFont(None, 40) # preparation of simulator s = Sim() s.reset_s() # 引数のなしの時は何も置かれていない状態となる while True: screen.fill(( 0, 100, 0, )) # 背景色の指定。 # state,ban = s.get_s() # gui用に整形をかませる reshape_self, reshape_opp, reshape_ban, kou = s.get_s() ban = 2 - int(reshape_ban) # game over すなわち ban == 0の時の処理 if ban != 0: state = ban * reshape_self + (3. - ban) * reshape_opp bl, wh = s.get_eval() # TODO for debug draw(pygame, screen, sysfont, pixels, state, ban, x, y, bl, wh) pygame.display.update() # 画面更新 if ban == AIis: s.act(ai.act(reshape_self, reshape_opp, reshape_ban, kou)) else: for event in pygame.event.get(): # 終了処理 if event.type == QUIT: pygame.quit() sys.exit() if event.type == MOUSEBUTTONDOWN and event.button == 1: x, y = event.pos num = convert_to_num(pixels, x, y) if num == -1: s.reset_s() else: if num in s.regal_acts() and ban != 0: s.act(num) bl, wh = s.get_eval() if len(bl) != len(set(bl)): print('[error] get_eval has over lap')
def train_model(max_episodes=1000): """ Trains a DQN agent to play the CartPole game by trial and error :return: None """ # buffer = ReplayBuffer() # Make environment env = WhaleEnv( config={ 'allow_step_back': False, 'allow_raw_data': False, 'single_agent_mode': False, 'active_player': 0, 'record_action': False, 'seed': 0, 'env_num': 1, 'num_players': 5 }) # Set a global seed using time set_global_seed(datetime.utcnow().microsecond) # Set up agents action_num = 3 agent = DqnAgent(dim=1, action_num=action_num) agent_0 = RandomAgent(action_num=action_num) agent_1 = RandomAgent(action_num=action_num) agent_2 = RandomAgent(action_num=action_num) agent_3 = RandomAgent(action_num=action_num) agents = [agent, agent_0, agent_1, agent_2, agent_3] env.set_agents(agents) agent.load_pretrained() UPDATE_TARGET_RATE = 20 GAME_COUNT_PER_EPISODE = 2 min_perf, max_perf = 1.0, 0.0 for episode_cnt in range(1, max_episodes + 1): loss = agent.train( collect_gameplay_experiences(env, agents, GAME_COUNT_PER_EPISODE)) avg_reward = evaluate_training_result(env, agent) target_update = episode_cnt % UPDATE_TARGET_RATE == 0 if avg_reward > max_perf: max_perf = avg_reward agent.save_weight() if avg_reward < min_perf: min_perf = avg_reward print('{0:03d}/{1} perf:{2:.2f}(min:{3} max:{4})' 'up:{5:1d} loss:{6}'.format(episode_cnt, max_episodes, avg_reward, min_perf, max_perf, target_update, loss)) if target_update: agent.update_target_network() # env.close() print('training end')
def run(): tf.reset_default_graph() sess = tf.Session() with sess: with tf.device("/cpu:0"): gym_env = gym.make(FLAGS.game) if FLAGS.monitor: gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/baseline', force=True) agent = RandomAgent(gym_env) agent.play()
def run(): tf.reset_default_graph() sess = tf.Session() with sess: with tf.device("/cpu:0"): gym_env = gym.make(FLAGS.game) if FLAGS.monitor: gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/baseline', force=True) agent = RandomAgent(gym_env) agent.play()
def test_die_encodings(): """Test the dice are encoded correctly in the attack state.""" agent = RandomAgent() attacker = ship.Ship(name="Attacker", template=ship_templates["Attacker"], upgrades=[], player_number=1) template_front_dice = 0 for color in ['Red', 'Blue', 'Black']: if 0 < len(ship_templates["Attacker"][f"Armament Front {color}"]): template_front_dice += int( ship_templates["Attacker"][f"Armament Front {color}"]) dice_begin = ArmadaTypes.hull_zones.index('front') * len( ArmadaDice.die_colors) dice_end = dice_begin + len(ArmadaDice.die_colors) front_dice = int(attacker.get_range('dice')[dice_begin:dice_end].sum()) # The ship encoding should have the same dice as the template assert front_dice == template_front_dice defender = ship.Ship(name="Defender", template=ship_templates["All Defense Tokens"], upgrades=[], player_number=2) encoding, world_state = make_encoding(attacker, defender, "short", agent) attack_state_encoding = Encodings.encodeAttackState(world_state) die_offset = Encodings.getAttackDiceOffset() # The attack state should have a roll with as many dice as the ship has. dice_encoding = attack_state_encoding[die_offset:die_offset + Encodings.dieEncodingSize()] assert int(dice_encoding.sum().item()) == front_dice
def run_random_agent_graphics(self): # Runs the random agent with graphics agent = RandomAgent(self._board) self.event = Event() Thread(target=agent.play_graphics, args=[self.event]).start() brd_view = BoardView(brd=self._board) brd_view.start(self)
def main(algorithm): env_names = ['Deterministic-4x4-FrozenLake-v0', 'Deterministic-8x8-FrozenLake-v0', 'Stochastic-8x8-FrozenLake-v0', 'Stochastic-4x4-FrozenLake-v0', 'Blackjack-v0', 'Roulette-v0', 'NChain-v0'] rng = np.random.RandomState(25) repeat_environments = False compare_random = False save_results = True if repeat_environments == True: env_names = rng.choice(env_names, size=20, replace=True) envs = [gym.make(name) for name in env_names] random_algorithm = RandomAgent() horizon = 100000 num_trials = 10 print(f'Running {len(env_names)} environments for {horizon} timesteps over {num_trials} trials...') start = time.time() scores = simulation.simulate_multiple_environment(envs, algorithm, T=horizon, num_trials=num_trials, discount=1) print(f'{algorithm.algorithm} took {time.time()-start} seconds') # print('Your score is', scores) mean_scores = np.mean(scores, axis=1) std_scores = np.std(scores, axis=1) if compare_random == True: random_scores = simulation.simulate_multiple_environment(envs, random_algorithm, T=horizon, num_trials=num_trials, discount=1) mean_random = np.mean(random_scores, axis=1) std_random = np.std(random_scores, axis=1) for i, (score, rand_score, score_std, rand_std) in enumerate( zip(mean_scores, mean_random, std_scores, std_random)): print(f'Environment: "{env_names[i]}"') print(f'-- Mean reward: {score} -- Std: {score_std}') print(f'-- Random reward: {rand_score} -- Std: {rand_std}') else: for i, (score, score_std) in enumerate(zip(mean_scores, std_scores)): print(f'Environment: "{env_names[i]}"') print(f'-- Mean reward: {score} -- Var: {score_std}') if save_results: with open('results.csv', 'w') as file: file.write('environment, runs, trials, mean_score, std_deviation\n') for env, mean, sigma in zip(env_names, mean_scores, std_scores): file.write('{}, {}, {}, {}, {}\n'.format(env, horizon, num_trials, mean, sigma)) return env_names, scores
def main(): agent_dict = [] random_dict = [] random_agent = RandomAgent(0) wall1, wall2 = MovingCube(1), MovingCube(2) rnd_ag_list = [random_agent, wall1] print('began running at %s' % datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")) for i in range(5): d = activate_agent(10000, render=True, print_info=False, reset_env=True, number_of_agents=2, get_avg_errors=False, get_values_field=False, number_of_error_agents=1) agent_dict.append(get_agent_dict(d)) print('finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))) means_curious = [] for i in agent_dict: means_curious.append(i['total_errors']) std_agent = np.array(means_curious).std(axis=0) means_random = [] for i in random_dict: means_random.append(i['total_errors']) std_random = np.array(means_random).std(axis=0) agent_dict = join_dict_list(agent_dict) #draw_plots(agent_dict) random_dict = join_dict_list(random_dict) #draw_plots(random_dict) #fig, ax ,q = plot_field(*agent_dict['fields'], title='Agent Value Field', color=agent_dict['fields_colors']) errors_rate_curious = agent_dict['total_errors'] errors_rate_random = random_dict['total_errors'] fig1, ax1 = plot_together(agent_dict['timesteps'], [errors_rate_curious, {'label':'curious', 'color':'blue'}], [errors_rate_random, {'label':'random', 'color':'red'}], title='Total Errors STD', std=[std_agent, std_random], axis_labels=['steps', 'total error']) fig2, ax2 = plot_together(agent_dict['timesteps'], [errors_rate_curious, {'label': 'curious', 'color': 'blue'}], [errors_rate_random, {'label': 'random', 'color': 'red'}], title='Total Errors Means', means=[means_curious, means_random], axis_labels=['steps', 'total error']) fig3, ax3 = plot_together(agent_dict['timesteps'][:-1], [stats.derivative(errors_rate_curious), {'label': 'curious', 'color': 'blue'}], [stats.derivative(errors_rate_random), {'label': 'random', 'color': 'red'}], title='Total Errors Derivative', axis_labels=['steps', 'total error']) fig1.savefig('./plots/std.png') fig2.savefig('./plots/means.png') plt.show() from IPython import embed embed()
def create_agent(self, agent_name, environment, sess): # agent = None from random_agent import RandomAgent agent = RandomAgent(action_space_size=self.action_space_size) # from deep_rl_agent import DeepRLAgent # agent = DeepRLAgent(agent_name=agent_name, # observation_space_size=self.observation_space_size, # action_space_size=self.action_space_size, # sess=sess) return agent
def evaluate_random(train_spec, current_network): logger.info("evaluating against random agent...") agent_a = RandomAgent() current_prediction_network = train_spec.prediction_network(current_network) agent_b = AlphaZeroAgent(current_prediction_network, train_spec.game_engine(), num_simulations=train_spec.num_simulations) evaluation = Evaluation(train_spec.game_engine(), agent_a, agent_b, competitive=True) scores = evaluation.play_n_games(train_spec.num_random_evaluation_games) logger.info(f"Eval scores vs random agent {scores}")
def run_experiment(): #specify hyper-parameters num_runs = 1 max_episodes = 1000000 max_steps_per_episode = 100 num_states = 181 num_actions = 2 alpha = 0.01 eps = 0.1 Q1 = 0 results = np.zeros(max_episodes) results_run = 0 agent = RandomAgent(num_states, num_actions, alpha, eps, Q1) environment = BlackJack() rlglue = RLGlue(environment, agent) print( "\nPrinting one dot for every run: {0} total runs to complete".format( num_runs)) for run in range(num_runs): np.random.seed(run) results_run = 0.0 rlglue.rl_init() for e in range(1, max_episodes + 1): rlglue.rl_start() for s in range(max_steps_per_episode): r, _, _, terminal = rlglue.rl_step() results_run += r results[e - 1] += r if terminal: break if e % 10000 == 0: print( "\nEpisode {}: average return till episode is {}, and policy is" .format(e, results_run / e)) print(rlglue.rl_agent_message("printPolicy")) print(".") print("Average return over experiment: {}".format( (results / num_runs).mean())) #save final policy to file -- change file name as necessary with open("policy.txt", 'w') as f: f.write(rlglue.rl_agent_message("printPolicy")) #save all the experiment data for analysis -- change file name as necessary save_results(results / num_runs, max_episodes, "RL_EXP_OUT.dat")
def render_GET(self, request): if request.args and len(request.args.get('u')): try: kwargs={'proxyhost': cfg.get('proxy','host'), 'proxyport': cfg.get('proxy','port')} except: kwargs={} return unshorten(request.args.get('u')[0], ua=RandomAgent(cfg.get('resolver','db')).get_agent, cache=self.cache, **kwargs) return "Error: try appending ?u=<URL>"
def main(): max_steps = 100 # max number of steps in an episode num_runs = 10 # number of repetitions of the experiment # Create and pass agent and environment objects to RLGlue agent = RandomAgent() environment = OneStateEnvironment() rlglue = RLGlue(environment, agent) del agent, environment # don't use these anymore result = experiment2(rlglue, num_runs, max_steps) print("experiment2 average reward: {}\n".format(result))
def evaluate(self, best_agent): two_player_wr = 0 three_player_wr = 0 four_player_wr = 0 agents = [best_agent, RandomAgent()] for _ in range(100): score_buf = new_game(agents) winner = 0 for i in range(2): if score_buf[winner] < score_buf[i]: winner = i if winner == 0: two_player_wr += 1 agents = [best_agent, RandomAgent(), RandomAgent()] for _ in range(100): score_buf = new_game(agents) winner = 0 for i in range(3): if score_buf[winner] < score_buf[i]: winner = i if winner == 0: three_player_wr += 1 agents = [best_agent, RandomAgent(), RandomAgent(), RandomAgent()] for _ in range(100): score_buf = new_game(agents) winner = 0 for i in range(4): if score_buf[winner] < score_buf[i]: winner = i if winner == 0: four_player_wr += 1 return two_player_wr, three_player_wr, four_player_wr
def test(): seed = 2 game = Geister2() tdagent = MCAgent(game, seed) tdagent.w = np.array([ 0.9, 0, 0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0, 0.7, 0, 0, 0, 0, 0, 0.6, 0, 0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) rndagent = RandomAgent(game, seed) agents = (tdagent, rndagent) arr0, arr1 = (agent.init_red() for agent in agents) game.setRed(arr0) game.changeSide() game.setRed(arr1) game.changeSide() game.printBoard() player = 0 while not game.is_ended(): agent = agents[player] states = game.after_states() i_act = agent.get_act_afterstates(states) game.on_action_number_received(i_act) if player == 0: game.printBoard() game.changeSide() player = (player+1) % 2
def __init__(self, size, ships, nb_samples=1000, player1="human", player2="random"): self.board_player1 = Board(size) self.board_player2 = Board(size) self.size = size self.ships = ships if player1 == "human": self.player1 = HumanAgent() elif player1 == "MC": self.player1 = MCAgent(ships=ships, size=size, nb_samples=nb_samples) elif player1 == "MC2": self.player1 = MCAgent(ships=ships, size=size, nb_samples=nb_samples) else: self.player1 = RandomAgent(size=size) if player2 == "human": self.player2 = HumanAgent() elif player2 == "MC": self.player2 = MCAgent(ships=ships.copy(), size=size, nb_samples=nb_samples) elif player2 == "MC2": self.player2 = MCAgent(ships=ships.copy(), size=size, nb_samples=nb_samples) else: self.player2 = RandomAgent(size=size)
def main(): # agents = [RandomAgent(), MCTSAgent(timeLimit=1000)] agents = [ RandomAgent(), MCTSAgent(timeLimit=1000), MCTSAgent(timeLimit=1000) ] #curr player is either 1 or -1, so index 0 is ignored. Player at index 1 plays first game = Game() done = False while not done: next_action = agents[game.currentPlayer].choose_action(game) next_state, value, done, info = game.step(next_action) game.gameState.get_visual_state() print('Reward: {}'.format( game.currentPlayer * -1)) # whoevers turn it is when game loop terminates has lost
def learn(): file_name = "td_9" seed = 91 game = Geister2() mcagent = MCAgent(game, seed) opponent = RandomAgent(game, seed+1) env = VsEnv(opponent, game, seed) mcagent.learn(env, seed) # for k in range(6*7*3): # for i in range(3): # for j in range(7): # print((mcagent.w[j+i*(6*7)+k*(6*7*3):6+j+i*(6*7)+k*(6*7*3)] # * 1000).round()*(1/1000)) # print("-----------") # print("-------------------") np.save(file_name, mcagent.w) w_td = np.load(file_name+'.npy') print(w_td.shape)
def battle_vsrandom(): seed = 29 bttl_num = 1 game = Geister2() agents = [[MCAgent(game, seed + i) for i in range(8)], [MCAgent(game, seed + i + 8) for i in range(8)]] agents_str = ["weights/td_" + str(i) + ".npy" for i in range(1, 9)] for agent, string in zip(agents[0], agents_str): agent.w = load(string) agents_str = [ "weights_2/td_learned2_" + str(i) + ".npy" for i in range(1, 9) ] for agent, string in zip(agents[1], agents_str): agent.w = load(string) rndagent = RandomAgent(game, seed - 1) means = np.zeros((2, 8)) for i in range(2): for j in range(len(agents[i])): r_list = np.zeros(bttl_num) for t in range(bttl_num): agent_s = (agents[i][j], rndagent) arr0, arr1 = (agent.init_red() for agent in agent_s) game.__init__() game.setRed(arr0) game.changeSide() game.setRed(arr1) game.changeSide() player = 0 while not game.is_ended(): agent = agent_s[player] states = game.after_states() i_act = agent.get_act_afterstates(states) game.on_action_number_received(i_act) game.changeSide() player = (player + 1) % 2 if player == 1: game.changeSide() result = game.checkResult() r = (1 if (result > 0) else (-1 if (result < 0) else 0)) r_list[t] = r means[i][j] = r_list.mean() print(means) print(means.mean(axis=1))
def __init__(self, subphase, num_samples, batch_size=32, deterministic=False): """Dataset for random actions. Arguments: subphase (str): Attack subphase to collect. TODO FIXME Cover more than just attacks num_samples (int): Approximate number of samples to gather. deterministic(bool): Make each iteration produce the same results. """ super(RandomActionDataset).__init__() self.subphase = subphase self.num_samples = num_samples self.batch_size = batch_size self.deterministic = deterministic # Variables for data generation self.randagent = RandomAgent() keys, ship_templates = parseShips('data/test_ships.csv') training_ships = [ "All Defense Tokens", "All Defense Tokens", "Imperial II-class Star Destroyer", "MC80 Command Cruiser", "Assault Frigate Mark II A", "No Shield Ship", "One Shield Ship", "Mega Die Ship" ] self.defenders = [] self.attackers = [] for name in training_ships: self.attackers.append( Ship(name=name, template=ship_templates[name], upgrades=[], player_number=1, device='cpu')) for name in training_ships: self.defenders.append( Ship(name=name, template=ship_templates[name], upgrades=[], player_number=2, device='cpu'))
def main(): # Setup interactive = 0 size = 4 brd = Board(size, graphics=0) rand_ag = RandomAgent() if interactive == 1: brd.start_interactive() agent = Agent.create(agent='tensorforce', environment=Board, update=64, objective='policy_gradient', reward_estimation=dict(horizon=20)) runner = Runner(agent=agent, environment=Board, max_episode_timesteps=500) runner.run(num_episodes=200)
def create(name, env, max_schedule_time=20, verbose=False): """Static method to create an agents by name""" if name not in AgentFactory.available_agents(): raise(Exception(f'Unsupported agent: {name}')) if name == 'baseline': from random_agent import RandomAgent return RandomAgent(env.action_space) if name == 'qlearning': from qlearning_td_agent import QLearningTDAgent return QLearningTDAgent(jobs_data=jobs_data, epsilon=.4, max_schedule_time=max_schedule_time, verbose=verbose) if name == 'dqn': from dqn_agent import DQNAgent return DQNAgent(env.observation_space, env.action_space, verbose=verbose) return None
def harvest_experience(episodes): opponent_policy = RandomAgent("Player 2 Random") opponent_policy.seed(12345) env = PentagoEnv(SIZE, opponent_policy, agent_starts = AGENT_STARTS, to_win=SIZE) env.seed(67890) exploration_policy = RandomAgent("Player 1 Random") exploration_policy.seed(24680) exploring_agent = TabularQAgent(env, AGENT_TAG, exploration_policy, load_model=True, userconfig={ "eps" : 0.3 }) exploring_agent.seed(13579) experience = [] for e in range(episodes): experience += simulate_episode(env, exploring_agent) return experience
def learn(): file_name = "weights/rfvsrnd6" seed = 103 game = Geister2() agent = REINFORCEAgent(game, seed) agent.w = np.random.randn(agent.W_SIZE) * agent.alpha * 0.0001 agent.theta = np.random.randn(agent.T_SIZE) * agent.beta * 0.0001 opponent = RandomAgent(game, seed + 1) env = VsEnv(opponent, game, seed) # 計測準備 pr = cProfile.Profile() pr.enable() # 計測開始 agent.learn(env, seed) # 計測終了,計測結果出力 pr.disable() stats = pstats.Stats(pr) stats.sort_stats('cumtime') stats.print_stats() pr.dump_stats('profile.stats') # 事後処理 np.save(file_name + "_w", agent.w) np.save(file_name + "_theta", agent.theta)
def a_vs_b(ship_a, ship_b, trials, attack_range): """This uses a random agent to choose actions during attacks from ship_a to ship_b. Args: ship_a ((Ship, str)): Attacker and hull zone tuple. ship_b ((Ship, str)): Defender and hull zone tuple. trials (int): Number of trials in average calculation. range (str): Attack range. Returns: state_log (List[List[("state" or "action", (WorldState or action tuple))]]) """ agent = RandomAgent() state_log = [] for trial in range(trials): # Reset ship b for each trial ship_b.reset() world_state = WorldState() world_state.addShip(ship_a, 0) world_state.addShip(ship_b, 1) num_rolls = 0 while ship_b.damage_cards() < ship_b.hull(): num_rolls += 1 # Handle the attack and receive the updated world state try: world_state = handleAttack(world_state=world_state, attacker=(ship_a, "front"), defender=(ship_b, "front"), attack_range=attack_range, offensive_agent=agent, defensive_agent=agent, state_log=state_log) except RuntimeError: # This is fine, the random agent will do illegal things plenty of times pass return state_log
# Global Variables board_size = 8 number_of_games = 100 player_1_wins = 0 player_2_wins = 0 draw_games = 0 total_start_time = time.time() for i in range(number_of_games): othello = Othello(board_size) player = 1 start_time = time.time() while True: move = RandomAgent.pick_move(othello, player) if move != -1: othello.put(move, player) game_over = othello.is_game_over(player) player *= -1 if game_over != None: if game_over == 0: draw_games += 1 else: if game_over == 1: player_1_wins += 1 else: player_2_wins += 1 break end_time = time.time() # print 'Time taken for game ' + str(i) + ' is %.2f seconds.' % (end_time - start_time)