Example #1
0
def run_model(game_count=1):
    """
    run model for game_count games
    """

    # Make environment
    env = WhaleEnv(
        config={
            'active_player': 0,
            'seed': datetime.utcnow().microsecond,
            'env_num': 1,
            'num_players': 5
        })
    # Set up agents
    action_num = 3
    agent = DqnAgent(action_num=action_num, player_num=5)
    agent_0 = RandomAgent(action_num=action_num)
    agent_1 = RandomAgent(action_num=action_num)
    agent_2 = RandomAgent(action_num=action_num)
    agent_3 = RandomAgent(action_num=action_num)
    agents = [agent, agent_0, agent_1, agent_2, agent_3]
    env.set_agents(agents)
    agent.load_pretrained()
    for game in range(game_count):

        # Generate data from the environment
        trajectories = env.run(is_training=False)

        # Print out the trajectories
        print('\nEpisode {}'.format(game))
        i = 0
        for trajectory in trajectories:
            print('\tPlayer {}'.format(i))
            p(trajectory[-1])
            i += 1
Example #2
0
def test():
    board = Board.from_config_file('board.cfg')
    pile = Pile.from_config_file('pile.cfg')
    players = [
        Player('Bot_1', RandomAgent()),
        Player('Bot_2', RandomAgent())
    ]
    game_count = 10000
    starter_winner_count = 0
    total_move_count = 0

    for i in range(game_count):
        new_board = copy.deepcopy(board)
        new_pile = copy.deepcopy(pile)
        new_players = copy.deepcopy(players)
        state = State(new_board, new_players, new_pile, False, False)
        game = Game(state)
        game.execute_setup()
        game.play_game()
        print(f'At game #{i:04}')
        total_move_count += game.move_count
        if game.starter == game.winner:
            starter_winner_count += 1

    print(f'Percentage of games won by starter: {starter_winner_count/game_count*100}%')
    print(f'Average number of total moves in a game: {total_move_count/game_count}')
def main():
    NUM_OF_GAMES = 100
    ai = RandomAgent()
    s = Sim()
    all_records = []
    for i in tqdm(range(NUM_OF_GAMES)):
        records = []
        # preparation of simulator
        s.reset_s()  # 引数のなしの時は何も置かれていない状態となる
        while True:
            # ban を上書きする前に,勝敗を保存する?
            reshape_self, reshape_opp, reshape_ban, kou = s.get_s()

            if reshape_ban != 2:
                act_num = ai.act(reshape_self, reshape_opp, reshape_ban, kou)
                s.act(act_num)
            else:
                # 黒の勝ち+1,負け-1
                outcome = 2 * is_black_win(s) - 1
                break
            records.append(
                [reshape_self[1:], reshape_opp[1:], reshape_ban, act_num])
        #outcomeを付け足し
        for j in range(len(records)):
            records[j].append((1. - 2. * (j % 2)) * outcome)
        all_records.append(records)
    # stackしたレコードを書き出す
    w_fnc(all_records, 'all_records.csv')
    rec = [records[0][-1] for records in all_records]
    kuro_win = NUM_OF_GAMES / 2 + sum(rec) / 2
    siro_win = NUM_OF_GAMES - kuro_win
    print('黒の勝ち:{0}回, 白の勝ち:{1}回'.format(kuro_win, siro_win))
def main():
    # Environment options:
    #   rollout_video=True: Generate .mp4 video of the battle at the end
    #   verbose=False: Print debug statements
    unique_id = int(time.time())
    video_filename = 'video_{}.mp4'.format(unique_id)
    env = fog_of_war.FogOfWarMultiplayerEnvironment(
        video_filename=video_filename)
    state = env.reset()
    done = False

    # Two agents play each other: the learner (blue) and the adversary (red)
    blue_agent = RandomAgent(env.action_space())
    red_agent = RandomAgent(env.action_space())

    while not done:
        # The buildable units are named Rock, Paper, and Scissors
        # 1: Build paper in reserves
        # 2: Build paper in front
        # 3: Build rock in reserves
        # 4: Build rock in front
        # 5: Build scissors in reserves
        # 6: Build scissors in front
        # 7: Scout to reveal the enemy's army
        import random
        blue_action = random.choice([1, 4, 7])
        red_action = random.choice([5, 6, 7])

        # Take an action and simulate the game for one time step (~10 seconds)
        state, reward, done, info = env.step(blue_action, red_action)

        # The state is a tuple of:
        #   features_minimap: np array of features from the minimap view
        #   features_screen: np array of features from the camera view
        #   rgb_minimap: np array of an RGB pixel view of the minimap
        #   rgb_screen: np array of RGB pixel rendered frame of Starcraft II
        features_minimap, features_screen, rgb_minimap, rgb_screen = state

        # Example code for visualizing the state
        filename = "output_frame_{}_{:05d}.jpg".format(unique_id, env.steps)
        blue_caption = fog_of_war.action_to_name[blue_action]
        red_caption = fog_of_war.action_to_name[red_action]
        caption = 't={}  R={}  Left: {}  Right: {}'.format(
            env.steps, reward, blue_caption, red_caption)
        top = imutil.show(rgb_minimap,
                          resize_to=(800, 480),
                          return_pixels=True,
                          display=False)
        bottom = imutil.show(rgb_screen,
                             resize_to=(800, 480),
                             return_pixels=True,
                             display=False)
        imutil.show(np.concatenate([top, bottom], axis=0),
                    filename=filename,
                    caption=caption)
    print('Finished game')
Example #5
0
 def setup_algorithm(self, algorithm):
     """
     Initialize agent that uses the given algorithm
     """
     if algorithm == "random":
         from random_agent import RandomAgent
         self.agent = RandomAgent(self.actions, self.grid_shape)
     # Add agent here
     else:
         raise ValueError("No algorithm implemented for '"
                          "{}'".format(algorithm))
Example #6
0
def main():
    board = Board.from_config_file('board.cfg')
    pile = Pile.from_config_file('pile.cfg')
    players = [
        Player('Bot_1', RandomAgent()),
        Player('Bot_2', RandomAgent())
    ]
    state = State(board, players, pile, True, True)
    game = Game(state)
    game.execute_setup()
    game.play_game()
Example #7
0
def main():
    script_args = parse_args()
    if script_args.use_retro:
        env = retro.make(script_args.env_name)
    else:
        env = gym.make(script_args.env_name)

    random_agent = RandomAgent(env)
    random_agent.train(script_args.num_episodes, script_args.render_train)

    play_reward = random_agent.play()
    print(f'Reward after playing: {play_reward}')
def main():
    # preparation of pygame
    pygame.init()  # 初期化
    AIis = 2  #1が黒,2が白
    ai = RandomAgent()
    bp = [115, 15]
    g = 40
    mar = 20
    x, y = 0, 0
    pixels = [bp, g, mar]
    screen = pygame.display.set_mode((600, 400))  # ウィンドウサイズの指定
    pygame.display.set_caption("GoSimulator")  # ウィンドウの上の方に出てくるアレの指定
    sysfont = pygame.font.SysFont(None, 40)

    # preparation of simulator
    s = Sim()
    s.reset_s()  # 引数のなしの時は何も置かれていない状態となる

    while True:
        screen.fill((
            0,
            100,
            0,
        ))  # 背景色の指定。
        # state,ban = s.get_s()
        # gui用に整形をかませる
        reshape_self, reshape_opp, reshape_ban, kou = s.get_s()
        ban = 2 - int(reshape_ban)
        # game over すなわち ban == 0の時の処理
        if ban != 0:
            state = ban * reshape_self + (3. - ban) * reshape_opp
        bl, wh = s.get_eval()
        # TODO for debug
        draw(pygame, screen, sysfont, pixels, state, ban, x, y, bl, wh)
        pygame.display.update()  # 画面更新
        if ban == AIis:
            s.act(ai.act(reshape_self, reshape_opp, reshape_ban, kou))
        else:
            for event in pygame.event.get():  # 終了処理
                if event.type == QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == MOUSEBUTTONDOWN and event.button == 1:
                    x, y = event.pos
                    num = convert_to_num(pixels, x, y)
                    if num == -1:
                        s.reset_s()
                    else:
                        if num in s.regal_acts() and ban != 0:
                            s.act(num)
                            bl, wh = s.get_eval()
                            if len(bl) != len(set(bl)):
                                print('[error] get_eval has over lap')
Example #9
0
def train_model(max_episodes=1000):
    """
    Trains a DQN agent to play the CartPole game by trial and error

    :return: None
    """

    # buffer = ReplayBuffer()
    # Make environment
    env = WhaleEnv(
        config={
            'allow_step_back': False,
            'allow_raw_data': False,
            'single_agent_mode': False,
            'active_player': 0,
            'record_action': False,
            'seed': 0,
            'env_num': 1,
            'num_players': 5
        })
    # Set a global seed using time
    set_global_seed(datetime.utcnow().microsecond)
    # Set up agents
    action_num = 3
    agent = DqnAgent(dim=1, action_num=action_num)
    agent_0 = RandomAgent(action_num=action_num)
    agent_1 = RandomAgent(action_num=action_num)
    agent_2 = RandomAgent(action_num=action_num)
    agent_3 = RandomAgent(action_num=action_num)
    agents = [agent, agent_0, agent_1, agent_2, agent_3]
    env.set_agents(agents)
    agent.load_pretrained()
    UPDATE_TARGET_RATE = 20
    GAME_COUNT_PER_EPISODE = 2
    min_perf, max_perf = 1.0, 0.0
    for episode_cnt in range(1, max_episodes + 1):
        loss = agent.train(
            collect_gameplay_experiences(env, agents, GAME_COUNT_PER_EPISODE))
        avg_reward = evaluate_training_result(env, agent)
        target_update = episode_cnt % UPDATE_TARGET_RATE == 0
        if avg_reward > max_perf:
            max_perf = avg_reward
            agent.save_weight()
        if avg_reward < min_perf:
            min_perf = avg_reward
        print('{0:03d}/{1} perf:{2:.2f}(min:{3} max:{4})'
              'up:{5:1d} loss:{6}'.format(episode_cnt, max_episodes,
                                          avg_reward, min_perf, max_perf,
                                          target_update, loss))
        if target_update:
            agent.update_target_network()
    # env.close()
    print('training end')
Example #10
0
def run():
    tf.reset_default_graph()

    sess = tf.Session()
    with sess:
        with tf.device("/cpu:0"):
            gym_env = gym.make(FLAGS.game)
            if FLAGS.monitor:
                gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/baseline', force=True)

            agent = RandomAgent(gym_env)

            agent.play()
def run():
    tf.reset_default_graph()

    sess = tf.Session()
    with sess:
        with tf.device("/cpu:0"):
            gym_env = gym.make(FLAGS.game)
            if FLAGS.monitor:
                gym_env = gym.wrappers.Monitor(gym_env,
                                               FLAGS.experiments_dir +
                                               '/baseline',
                                               force=True)

            agent = RandomAgent(gym_env)

            agent.play()
Example #12
0
def test_die_encodings():
    """Test the dice are encoded correctly in the attack state."""

    agent = RandomAgent()
    attacker = ship.Ship(name="Attacker",
                         template=ship_templates["Attacker"],
                         upgrades=[],
                         player_number=1)
    template_front_dice = 0
    for color in ['Red', 'Blue', 'Black']:
        if 0 < len(ship_templates["Attacker"][f"Armament Front {color}"]):
            template_front_dice += int(
                ship_templates["Attacker"][f"Armament Front {color}"])

    dice_begin = ArmadaTypes.hull_zones.index('front') * len(
        ArmadaDice.die_colors)
    dice_end = dice_begin + len(ArmadaDice.die_colors)
    front_dice = int(attacker.get_range('dice')[dice_begin:dice_end].sum())
    # The ship encoding should have the same dice as the template
    assert front_dice == template_front_dice

    defender = ship.Ship(name="Defender",
                         template=ship_templates["All Defense Tokens"],
                         upgrades=[],
                         player_number=2)

    encoding, world_state = make_encoding(attacker, defender, "short", agent)

    attack_state_encoding = Encodings.encodeAttackState(world_state)
    die_offset = Encodings.getAttackDiceOffset()
    # The attack state should have a roll with as many dice as the ship has.
    dice_encoding = attack_state_encoding[die_offset:die_offset +
                                          Encodings.dieEncodingSize()]
    assert int(dice_encoding.sum().item()) == front_dice
Example #13
0
 def run_random_agent_graphics(self):
     # Runs the random agent with graphics
     agent = RandomAgent(self._board)
     self.event = Event()
     Thread(target=agent.play_graphics, args=[self.event]).start()
     brd_view = BoardView(brd=self._board)
     brd_view.start(self)
Example #14
0
def main(algorithm):
    env_names = ['Deterministic-4x4-FrozenLake-v0',
                 'Deterministic-8x8-FrozenLake-v0',
                 'Stochastic-8x8-FrozenLake-v0',
                 'Stochastic-4x4-FrozenLake-v0',
                 'Blackjack-v0',
                 'Roulette-v0',
                 'NChain-v0']

    rng = np.random.RandomState(25)

    repeat_environments = False
    compare_random = False

    save_results = True

    if repeat_environments == True:
        env_names = rng.choice(env_names, size=20, replace=True)

    envs = [gym.make(name) for name in env_names]


    random_algorithm = RandomAgent()

    horizon = 100000
    num_trials = 10

    print(f'Running {len(env_names)} environments for {horizon} timesteps over {num_trials} trials...')
    start = time.time()
    scores = simulation.simulate_multiple_environment(envs, algorithm, T=horizon, num_trials=num_trials, discount=1)
    print(f'{algorithm.algorithm} took {time.time()-start} seconds')
    # print('Your score is', scores)
    mean_scores = np.mean(scores, axis=1)
    std_scores = np.std(scores, axis=1)

    if compare_random == True:
        random_scores = simulation.simulate_multiple_environment(envs, random_algorithm, T=horizon,
                                                                 num_trials=num_trials,
                                                                 discount=1)
        mean_random = np.mean(random_scores, axis=1)
        std_random = np.std(random_scores, axis=1)
        for i, (score, rand_score, score_std, rand_std) in enumerate(
                zip(mean_scores, mean_random, std_scores, std_random)):
            print(f'Environment: "{env_names[i]}"')
            print(f'-- Mean reward: {score} -- Std: {score_std}')
            print(f'-- Random reward: {rand_score} -- Std: {rand_std}')

    else:
        for i, (score, score_std) in enumerate(zip(mean_scores, std_scores)):
            print(f'Environment: "{env_names[i]}"')
            print(f'-- Mean reward: {score} -- Var: {score_std}')

    if save_results:
        with open('results.csv', 'w') as file:
            file.write('environment, runs, trials, mean_score, std_deviation\n')
            for env, mean, sigma in zip(env_names, mean_scores, std_scores):
                file.write('{}, {}, {}, {}, {}\n'.format(env, horizon, num_trials, mean, sigma))

    return env_names, scores
Example #15
0
def main():
    agent_dict = []
    random_dict = []
    random_agent = RandomAgent(0)
    wall1, wall2 = MovingCube(1), MovingCube(2)
    rnd_ag_list = [random_agent, wall1]
    print('began running at %s' %  datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))
    for i in range(5):


        d = activate_agent(10000, render=True, print_info=False, reset_env=True, number_of_agents=2, get_avg_errors=False,
                           get_values_field=False, number_of_error_agents=1)
        agent_dict.append(get_agent_dict(d))

        print('finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")))

    means_curious = []
    for i in agent_dict:
        means_curious.append(i['total_errors'])
    std_agent = np.array(means_curious).std(axis=0)

    means_random = []
    for i in random_dict:
        means_random.append(i['total_errors'])
    std_random = np.array(means_random).std(axis=0)


    agent_dict = join_dict_list(agent_dict)
    #draw_plots(agent_dict)
    random_dict = join_dict_list(random_dict)
    #draw_plots(random_dict)

    #fig, ax ,q = plot_field(*agent_dict['fields'], title='Agent Value Field', color=agent_dict['fields_colors'])

    errors_rate_curious = agent_dict['total_errors']
    errors_rate_random = random_dict['total_errors']


    fig1, ax1 = plot_together(agent_dict['timesteps'], [errors_rate_curious, {'label':'curious', 'color':'blue'}],
                  [errors_rate_random, {'label':'random', 'color':'red'}], title='Total Errors STD',
                  std=[std_agent, std_random], axis_labels=['steps', 'total error'])

    fig2, ax2 = plot_together(agent_dict['timesteps'], [errors_rate_curious, {'label': 'curious', 'color': 'blue'}],
                  [errors_rate_random, {'label': 'random', 'color': 'red'}], title='Total Errors Means',
                  means=[means_curious, means_random], axis_labels=['steps', 'total error'])

    fig3, ax3 = plot_together(agent_dict['timesteps'][:-1], [stats.derivative(errors_rate_curious), {'label': 'curious', 'color': 'blue'}],
                  [stats.derivative(errors_rate_random), {'label': 'random', 'color': 'red'}], title='Total Errors Derivative',
                axis_labels=['steps', 'total error'])

    fig1.savefig('./plots/std.png')
    fig2.savefig('./plots/means.png')
    plt.show()

    from IPython import embed
    embed()
Example #16
0
 def create_agent(self, agent_name, environment, sess):
     # agent = None
     from random_agent import RandomAgent
     agent = RandomAgent(action_space_size=self.action_space_size)
     # from deep_rl_agent import DeepRLAgent
     # agent = DeepRLAgent(agent_name=agent_name,
     #                     observation_space_size=self.observation_space_size,
     #                     action_space_size=self.action_space_size,
     #                     sess=sess)
     return agent
Example #17
0
def evaluate_random(train_spec, current_network):
    logger.info("evaluating against random agent...")
    agent_a = RandomAgent()
    current_prediction_network = train_spec.prediction_network(current_network)
    agent_b = AlphaZeroAgent(current_prediction_network, train_spec.game_engine(),
                             num_simulations=train_spec.num_simulations)
    evaluation = Evaluation(train_spec.game_engine(), agent_a, agent_b, competitive=True)
    scores = evaluation.play_n_games(train_spec.num_random_evaluation_games)

    logger.info(f"Eval scores vs random agent {scores}")
Example #18
0
def run_experiment():

    #specify hyper-parameters
    num_runs = 1
    max_episodes = 1000000
    max_steps_per_episode = 100
    num_states = 181
    num_actions = 2
    alpha = 0.01
    eps = 0.1
    Q1 = 0

    results = np.zeros(max_episodes)
    results_run = 0

    agent = RandomAgent(num_states, num_actions, alpha, eps, Q1)
    environment = BlackJack()
    rlglue = RLGlue(environment, agent)

    print(
        "\nPrinting one dot for every run: {0} total runs to complete".format(
            num_runs))

    for run in range(num_runs):
        np.random.seed(run)
        results_run = 0.0

        rlglue.rl_init()
        for e in range(1, max_episodes + 1):
            rlglue.rl_start()
            for s in range(max_steps_per_episode):
                r, _, _, terminal = rlglue.rl_step()
                results_run += r
                results[e - 1] += r

                if terminal:
                    break

            if e % 10000 == 0:
                print(
                    "\nEpisode {}: average return till episode is {}, and policy is"
                    .format(e, results_run / e))
                print(rlglue.rl_agent_message("printPolicy"))
        print(".")

    print("Average return over experiment: {}".format(
        (results / num_runs).mean()))

    #save final policy to file -- change file name as necessary
    with open("policy.txt", 'w') as f:
        f.write(rlglue.rl_agent_message("printPolicy"))

    #save all the experiment data for analysis -- change file name as necessary
    save_results(results / num_runs, max_episodes, "RL_EXP_OUT.dat")
Example #19
0
 def render_GET(self, request):
     if request.args and len(request.args.get('u')):
         try:
             kwargs={'proxyhost': cfg.get('proxy','host'),
                     'proxyport': cfg.get('proxy','port')}
         except:
             kwargs={}
         return unshorten(request.args.get('u')[0],
                          ua=RandomAgent(cfg.get('resolver','db')).get_agent,
                          cache=self.cache,
                          **kwargs)
     return "Error: try appending ?u=<URL>"
Example #20
0
def main():
    max_steps = 100  # max number of steps in an episode
    num_runs = 10  # number of repetitions of the experiment

    # Create and pass agent and environment objects to RLGlue
    agent = RandomAgent()
    environment = OneStateEnvironment()
    rlglue = RLGlue(environment, agent)
    del agent, environment  # don't use these anymore

    result = experiment2(rlglue, num_runs, max_steps)
    print("experiment2 average reward: {}\n".format(result))
Example #21
0
	def evaluate(self, best_agent):
		two_player_wr = 0
		three_player_wr = 0
		four_player_wr = 0
		agents = [best_agent, RandomAgent()]
		for _ in range(100):
			score_buf = new_game(agents)
			winner = 0
			for i in range(2):
				if score_buf[winner] < score_buf[i]:
					winner = i
			if winner == 0:
				two_player_wr += 1
		agents = [best_agent, RandomAgent(), RandomAgent()]
		for _ in range(100):
			score_buf = new_game(agents)
			winner = 0
			for i in range(3):
				if score_buf[winner] < score_buf[i]:
					winner = i
			if winner == 0:
				three_player_wr += 1
		agents = [best_agent, RandomAgent(), RandomAgent(), RandomAgent()]
		for _ in range(100):
			score_buf = new_game(agents)
			winner = 0
			for i in range(4):
				if score_buf[winner] < score_buf[i]:
					winner = i
			if winner == 0:
				four_player_wr += 1
		return two_player_wr, three_player_wr, four_player_wr
Example #22
0
def test():
    seed = 2
    game = Geister2()

    tdagent = MCAgent(game, seed)
    tdagent.w = np.array([
        0.9, 0, 0, 0, 0, 0,
        0.8, 0, 0, 0, 0, 0,
        0.7, 0, 0, 0, 0, 0,
        0.6, 0, 0, 0, 0, 0,
        0.5, 0, 0, 0, 0, 0,
        0.1, 0, 0, 0, 0, 0,
        0,   0, 0, 0, 0, 1,

        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,

        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0
    ])

    rndagent = RandomAgent(game, seed)
    agents = (tdagent, rndagent)
    arr0, arr1 = (agent.init_red() for agent in agents)
    game.setRed(arr0)
    game.changeSide()
    game.setRed(arr1)
    game.changeSide()
    game.printBoard()
    player = 0
    while not game.is_ended():
        agent = agents[player]
        states = game.after_states()
        i_act = agent.get_act_afterstates(states)
        game.on_action_number_received(i_act)
        if player == 0:
            game.printBoard()
        game.changeSide()

        player = (player+1) % 2
Example #23
0
    def __init__(self,
                 size,
                 ships,
                 nb_samples=1000,
                 player1="human",
                 player2="random"):
        self.board_player1 = Board(size)
        self.board_player2 = Board(size)
        self.size = size
        self.ships = ships

        if player1 == "human":
            self.player1 = HumanAgent()
        elif player1 == "MC":
            self.player1 = MCAgent(ships=ships,
                                   size=size,
                                   nb_samples=nb_samples)
        elif player1 == "MC2":
            self.player1 = MCAgent(ships=ships,
                                   size=size,
                                   nb_samples=nb_samples)
        else:
            self.player1 = RandomAgent(size=size)

        if player2 == "human":
            self.player2 = HumanAgent()
        elif player2 == "MC":
            self.player2 = MCAgent(ships=ships.copy(),
                                   size=size,
                                   nb_samples=nb_samples)
        elif player2 == "MC2":
            self.player2 = MCAgent(ships=ships.copy(),
                                   size=size,
                                   nb_samples=nb_samples)
        else:
            self.player2 = RandomAgent(size=size)
Example #24
0
def main():
    # agents = [RandomAgent(), MCTSAgent(timeLimit=1000)]
    agents = [
        RandomAgent(),
        MCTSAgent(timeLimit=1000),
        MCTSAgent(timeLimit=1000)
    ]  #curr player is either 1 or -1, so index 0 is ignored. Player at index 1 plays first
    game = Game()

    done = False
    while not done:
        next_action = agents[game.currentPlayer].choose_action(game)
        next_state, value, done, info = game.step(next_action)
        game.gameState.get_visual_state()
    print('Reward: {}'.format(
        game.currentPlayer *
        -1))  # whoevers turn it is when game loop terminates has lost
Example #25
0
def learn():
    file_name = "td_9"
    seed = 91
    game = Geister2()
    mcagent = MCAgent(game, seed)
    opponent = RandomAgent(game, seed+1)
    env = VsEnv(opponent, game, seed)
    mcagent.learn(env, seed)
    # for k in range(6*7*3):
    #     for i in range(3):
    #         for j in range(7):
    #             print((mcagent.w[j+i*(6*7)+k*(6*7*3):6+j+i*(6*7)+k*(6*7*3)]
    #                   * 1000).round()*(1/1000))
    #         print("-----------")
    #     print("-------------------")
    np.save(file_name, mcagent.w)
    w_td = np.load(file_name+'.npy')
    print(w_td.shape)
Example #26
0
def battle_vsrandom():
    seed = 29
    bttl_num = 1
    game = Geister2()
    agents = [[MCAgent(game, seed + i) for i in range(8)],
              [MCAgent(game, seed + i + 8) for i in range(8)]]
    agents_str = ["weights/td_" + str(i) + ".npy" for i in range(1, 9)]
    for agent, string in zip(agents[0], agents_str):
        agent.w = load(string)
    agents_str = [
        "weights_2/td_learned2_" + str(i) + ".npy" for i in range(1, 9)
    ]
    for agent, string in zip(agents[1], agents_str):
        agent.w = load(string)
    rndagent = RandomAgent(game, seed - 1)
    means = np.zeros((2, 8))
    for i in range(2):
        for j in range(len(agents[i])):
            r_list = np.zeros(bttl_num)
            for t in range(bttl_num):
                agent_s = (agents[i][j], rndagent)
                arr0, arr1 = (agent.init_red() for agent in agent_s)
                game.__init__()
                game.setRed(arr0)
                game.changeSide()
                game.setRed(arr1)
                game.changeSide()
                player = 0
                while not game.is_ended():
                    agent = agent_s[player]
                    states = game.after_states()
                    i_act = agent.get_act_afterstates(states)
                    game.on_action_number_received(i_act)
                    game.changeSide()

                    player = (player + 1) % 2
                if player == 1:
                    game.changeSide()
                result = game.checkResult()
                r = (1 if (result > 0) else (-1 if (result < 0) else 0))
                r_list[t] = r
            means[i][j] = r_list.mean()
    print(means)
    print(means.mean(axis=1))
Example #27
0
    def __init__(self,
                 subphase,
                 num_samples,
                 batch_size=32,
                 deterministic=False):
        """Dataset for random actions.

        Arguments:
            subphase      (str): Attack subphase to collect. TODO FIXME Cover more than just attacks
            num_samples   (int): Approximate number of samples to gather.
            deterministic(bool): Make each iteration produce the same results.
        """
        super(RandomActionDataset).__init__()
        self.subphase = subphase
        self.num_samples = num_samples
        self.batch_size = batch_size
        self.deterministic = deterministic

        # Variables for data generation
        self.randagent = RandomAgent()
        keys, ship_templates = parseShips('data/test_ships.csv')

        training_ships = [
            "All Defense Tokens", "All Defense Tokens",
            "Imperial II-class Star Destroyer", "MC80 Command Cruiser",
            "Assault Frigate Mark II A", "No Shield Ship", "One Shield Ship",
            "Mega Die Ship"
        ]
        self.defenders = []
        self.attackers = []
        for name in training_ships:
            self.attackers.append(
                Ship(name=name,
                     template=ship_templates[name],
                     upgrades=[],
                     player_number=1,
                     device='cpu'))
        for name in training_ships:
            self.defenders.append(
                Ship(name=name,
                     template=ship_templates[name],
                     upgrades=[],
                     player_number=2,
                     device='cpu'))
Example #28
0
def main():
    # Setup
    interactive = 0
    size = 4
    brd = Board(size, graphics=0)
    rand_ag = RandomAgent()

    if interactive == 1:
        brd.start_interactive()

    agent = Agent.create(agent='tensorforce',
                         environment=Board,
                         update=64,
                         objective='policy_gradient',
                         reward_estimation=dict(horizon=20))

    runner = Runner(agent=agent, environment=Board, max_episode_timesteps=500)

    runner.run(num_episodes=200)
Example #29
0
    def create(name, env, max_schedule_time=20, verbose=False):
        """Static method to create an agents by name"""
        if name not in AgentFactory.available_agents():
            raise(Exception(f'Unsupported agent: {name}'))
        
        if name == 'baseline':
            from random_agent import RandomAgent
            return RandomAgent(env.action_space)

        if name == 'qlearning':
            from qlearning_td_agent import QLearningTDAgent
            return QLearningTDAgent(jobs_data=jobs_data, epsilon=.4,
                max_schedule_time=max_schedule_time, verbose=verbose)

        if name == 'dqn':
            from dqn_agent import DQNAgent
            return DQNAgent(env.observation_space, env.action_space,
                verbose=verbose)
        return None
Example #30
0
def harvest_experience(episodes):
    opponent_policy = RandomAgent("Player 2 Random")
    opponent_policy.seed(12345)
    env = PentagoEnv(SIZE, opponent_policy, agent_starts = AGENT_STARTS, to_win=SIZE)
    env.seed(67890)

    exploration_policy = RandomAgent("Player 1 Random")
    exploration_policy.seed(24680)
    exploring_agent = TabularQAgent(env, AGENT_TAG, exploration_policy, load_model=True, userconfig={ "eps" : 0.3 })
    exploring_agent.seed(13579)

    experience = []
    for e in range(episodes):
        experience += simulate_episode(env, exploring_agent)
    return experience
Example #31
0
def learn():
    file_name = "weights/rfvsrnd6"
    seed = 103
    game = Geister2()
    agent = REINFORCEAgent(game, seed)
    agent.w = np.random.randn(agent.W_SIZE) * agent.alpha * 0.0001
    agent.theta = np.random.randn(agent.T_SIZE) * agent.beta * 0.0001
    opponent = RandomAgent(game, seed + 1)
    env = VsEnv(opponent, game, seed)
    # 計測準備
    pr = cProfile.Profile()
    pr.enable()
    # 計測開始
    agent.learn(env, seed)
    # 計測終了,計測結果出力
    pr.disable()
    stats = pstats.Stats(pr)
    stats.sort_stats('cumtime')
    stats.print_stats()
    pr.dump_stats('profile.stats')
    # 事後処理

    np.save(file_name + "_w", agent.w)
    np.save(file_name + "_theta", agent.theta)
def a_vs_b(ship_a, ship_b, trials, attack_range):
    """This uses a random agent to choose actions during attacks from ship_a to ship_b.

    Args:
      ship_a ((Ship, str)): Attacker and hull zone tuple.
      ship_b ((Ship, str)): Defender and hull zone tuple.
      trials (int): Number of trials in average calculation.
      range (str): Attack range.
    Returns:
      state_log (List[List[("state" or "action", (WorldState or action tuple))]])
    
    """
    agent = RandomAgent()
    state_log = []
    for trial in range(trials):
        # Reset ship b for each trial
        ship_b.reset()
        world_state = WorldState()
        world_state.addShip(ship_a, 0)
        world_state.addShip(ship_b, 1)
        num_rolls = 0
        while ship_b.damage_cards() < ship_b.hull():
            num_rolls += 1
            # Handle the attack and receive the updated world state
            try:
                world_state = handleAttack(world_state=world_state,
                                           attacker=(ship_a, "front"),
                                           defender=(ship_b, "front"),
                                           attack_range=attack_range,
                                           offensive_agent=agent,
                                           defensive_agent=agent,
                                           state_log=state_log)
            except RuntimeError:
                # This is fine, the random agent will do illegal things plenty of times
                pass
    return state_log
# Global Variables
board_size = 8
number_of_games = 100

player_1_wins = 0
player_2_wins = 0
draw_games = 0

total_start_time = time.time()

for i in range(number_of_games):
    othello = Othello(board_size)
    player = 1
    start_time = time.time()
    while True:
        move = RandomAgent.pick_move(othello, player)
        if move != -1:
            othello.put(move, player)
        game_over = othello.is_game_over(player)
        player *= -1
        if game_over != None:
            if game_over == 0:
                draw_games += 1
            else:
                if game_over == 1:
                    player_1_wins += 1
                else:
                    player_2_wins += 1
            break
    end_time = time.time()
    # print 'Time taken for game ' + str(i) + ' is %.2f seconds.' % (end_time - start_time)