예제 #1
0
 def test_add_point(self):
     logger = Logger(xlabel="x", ylabel="y", legend="test", csv_path="./newtest/test_csv.csv")
     logger.add_point(x=1, y=1)
     self.assertEqual(logger.xs[0], 1)
     self.assertEqual(logger.ys[0], 1)
     with self.assertRaises(ValueError):
         logger.add_point(None, None)
예제 #2
0
 def test_add_point(self):
     logger = Logger(xlabel="x",
                     ylabel="y",
                     legend="test",
                     csv_path="./newtest/test_csv.csv")
     logger.add_point(x=1, y=1)
     self.assertEqual(logger.xs[0], 1)
     self.assertEqual(logger.ys[0], 1)
예제 #3
0
    def test_make_plot(self):
        logger = Logger(xlabel="x", ylabel="y", legend="test")
        for x in range(10):
            logger.add_point(x=x, y=x * x)
        self.assertEqual(9 * 9, logger.ys[9])
        save_path = './newtest/test.png'
        save_dir = os.path.dirname(save_path)
        if os.path.exists(save_dir):
            shutil.rmtree(save_dir)

        logger.make_plot(save_path=save_path)
        shutil.rmtree(save_dir)
예제 #4
0
            reward = 0
            reward_list = []
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode,
                                                                                              evaluate_num,
                                                                                              total_game_played,
                                                                                              time_difference_good_format(
                                                                                                  seconds,
                                                                                                  time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_list.append(payoffs[0])
                reward += payoffs[0]

            logger.log('\n########## Evaluation - Episode {} ##########'.format(episode))
            logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')
            logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                                  save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
    logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                          save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
예제 #5
0
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_random_list.append(payoffs[0])
                reward_random += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log(
                '\n########## Evaluation Against Random - Episode {} ##########'
                .format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(
                    env.timestep,
                    float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep,
                                    y=float(reward_random) / evaluate_num)

            # Make plot
            logger_random.make_plot(save_path=figure_path_random +
                                    str(episode) + '.png')
            logger_random.make_plot_hist(
                save_path_1=figure_path_random + str(episode) + '_hist.png',
                save_path_2=figure_path_random + str(episode) + '_freq.png',
                reward_list=reward_random_list,
                taking_list=taking_list)

            # Eval against last agent
            reward_opponent = 0
            reward_opponent_list = []
            taking_list = []
            eval_env.set_agents([agent] + [opponent_agent] *
                                                                                                         seconds,
                                                                                                         time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_random_list.append(payoffs[0])
                reward_random += payoffs[0]
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log('\n########## Evaluation Against Random - Episode {} ##########'.format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(env.timestep,
                                                                          float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=episode, y=float(reward_random) / evaluate_num)

            # Make plot
            logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png')
            logger_random.make_plot_hist(save_path_1=figure_path_random + str(episode) + '_hist.png',
                                         save_path_2=figure_path_random + str(episode) + '_freq.png',
                                         reward_list=reward_random_list, taking_list=taking_list)

        print('\rEPISODE {} - Number of game played {} - {}'.format(episode, total_game_played,
                                                                    time_difference_good_format(seconds,
                                                                                                time.time())),
              end='')

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)
        total_game_played += 1
예제 #7
0
                    rl_loss = agents[i].train_rl()
                    sl_loss = agents[i].train_sl()
                    print(
                        '\rINFO - Agent {}, step {}, rl-loss: {}, sl-loss: {}'.
                        format(i, step_counters[i], rl_loss, sl_loss),
                        end='')

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            reward = 0
            eval_episode = 0
            for eval_episode in range(evaluate_num):
                _, payoffs = eval_env.run(is_training=False)
                reward += payoffs[0]

            logger.log('\n########## Evaluation ##########')
            logger.log('episode: {} Average reward is {}'.format(
                episode / evaluate_every,
                float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=episode / evaluate_every,
                             y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
예제 #8
0
    sess.run(tf.compat.v1.global_variables_initializer())

    # STATS ON TAKING BID FOR FIRST PLAYER TO SPEAK
    print('\n------------------------')
    print('---- Stats on Bids -----')
    print('------------------------')
    for i in range(num_tests):
        if i * 100 % num_tests == 0:
            print('\rProgress Bids: {}%'.format(int(i * 100 / num_tests)),
                  end='')
        state, player_id = env.init_game()
        points_in_hand = get_hand_value(env.game.players[player_id].hand)
        bouts_in_hand = get_nb_bouts(env.game.players[player_id].hand)
        action = env.decode_action(agent.eval_step(state))
        logger_taking.add_point(x=points_in_hand,
                                y=bouts_in_hand,
                                z=action.get_bid_order())

    # Showing usual results against himself for this agent
    print('\n------------------------')
    print('---- Stats on Games ----')
    print('------------------------')
    # Make environment
    env = rlcard.make('tarot')
    global_step = tf.Variable(0, name='global_step', trainable=False)
    sess.run(tf.compat.v1.global_variables_initializer())
    for i in range(num_games):
        hand_value = dict()
        nb_bouts = dict()
        initial_hand = dict()
        # PRINTS HERE TO FORCE THE CODE TO CONTINUE (WEIRD PROBLEM IS NOTHING IS ASKED TO BE PRINTED HERE)
예제 #9
0
 def test_make_plot(self):
     logger = Logger(xlabel="x", ylabel="y", legend="test")
     for x in range(10):
         logger.add_point(x=x, y=x * x)
     self.assertEqual(9 * 9, logger.ys[9])
     logger.make_plot(save_path='./newtest/test.png')
예제 #10
0
        print('\rIteration {}'.format(episode), end='\n')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        #agent.save() # Save model
        reward = 0
        for eval_episode in range(evaluate_num):
            his, payoffs = eval_env.run(is_training=False)
            reward += payoffs[0]

        logger_reward.log('\n########## Evaluation ##########')
        logger_reward.log('Iteration: {} Average reward is {}'.format(
            episode,
            float(reward) / evaluate_num))

        # Add point to logger
        logger_reward.add_point(x=episode, y=float(reward) / evaluate_num)
        import time
        start = time.perf_counter()
        exploitability = agent.compute_exploitability(evaluate_num)
        end = time.perf_counter()
        logger.log('episode: {} cost {:10}s ,exploitability is {}'.format(
            episode, end - start, exploitability))
        logger.add_point(x=episode, y=exploitability)
        print("\n")
    # Make plot
    if episode % save_plot_every == 0 and episode > 0:
        logger.make_plot(save_path=figure_path + str(episode) + '.png')
        logger_reward.make_plot(save_path=figure_path + str(episode) +
                                'reward' + '.png')

# Make the final plot
예제 #11
0
                    is_training=False)

                bet_reward += bet_reward_sum
                change_reward += change_reward_sum

            bet_logger.log('\n########## Evaluation ##########')
            bet_logger.log(
                'Timestep: {} Average bet reward is {}. Average change reward is {}'
                .format(env.timestep,
                        float(bet_reward) / evaluate_num,
                        float(change_reward) / evaluate_num))

            # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num))

            # Add point to logger
            bet_logger.add_point(x=env.timestep,
                                 y=float(bet_reward) / evaluate_num)
            change_logger.add_point(x=env.timestep,
                                    y=float(change_reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            bet_logger.make_plot(save_path=figure_path + 'bet/' +
                                 str(episode) + '.png')
            change_logger.make_plot(save_path=figure_path + 'change/' +
                                    str(episode) + '.png')

        if episode % checkpoint_every == 0 and episode > 0:
            bet_path, change_path = agent.save(checkpoint_path, episode)
            print('Saved to {}, {}'.format(bet_path, change_path))

    # Make the final plot