Ejemplo n.º 1
0
def args_evaluate(args):
    model_agent0 = args.model_agent0
    model_agent1 = args.model_agent1
    model_type = args.type
    hidden_units_agent0 = args.hidden_units_agent0
    hidden_units_agent1 = args.hidden_units_agent1
    n_episodes = args.episodes

    if path_exists(model_agent0) and path_exists(model_agent1):
        # assert os.path.exists(model_agent0), print("The path {} doesn't exists".format(model_agent0))
        # assert os.path.exists(model_agent1), print("The path {} doesn't exists".format(model_agent1))

        if model_type == 'nn':
            net0 = TDGammon(hidden_units=hidden_units_agent0, lr=0.1, lamda=None, init_weights=False)
            net1 = TDGammon(hidden_units=hidden_units_agent1, lr=0.1, lamda=None, init_weights=False)
            env = gym.make('gym_backgammon:backgammon-v0')
        else:
            net0 = TDGammonCNN(lr=0.0001)
            net1 = TDGammonCNN(lr=0.0001)
            env = gym.make('gym_backgammon:backgammon-pixel-v0')

        net0.load(checkpoint_path=model_agent0, optimizer=None, eligibility_traces=False)
        net1.load(checkpoint_path=model_agent1, optimizer=None, eligibility_traces=False)

        agents = {WHITE: TDAgent(WHITE, net=net1), BLACK: TDAgent(BLACK, net=net0)}

        evaluate_agents(agents, env, n_episodes)
Ejemplo n.º 2
0
def main(args):
    # environment
    env = GridWorld()
    # agent
    agent = TDAgent(
        env, epsilon=args.epsilon, gamma=args.discout, alpha=0.05, lamda=0.7)
    agent.control(method=args.algorithm)
Ejemplo n.º 3
0
def evaluate(existing_model_path,
             num_episodes=100,
             num_hidden_units=(40,),
             starting_alpha=0.1, starting_lamda=0.9,
             min_alpha=0.1, min_lamda=0.7,
             alpha_decay=1, lamda_decay=0.96,
             alpha_decay_interval=1, lamda_decay_interval=3e4,
             hidden_activation=nn.Sigmoid(), num_inputs=198,
             opponent="pubeval"):
    """
    Evaluate a saved model against an opponent and prints out the model's win rate.

    :param existing_model_path: String. Path of the saved model.
    :param num_episodes: Integer. Number of games to play per model.
    :param num_hidden_units: See EvaluationModel class. 
    :param starting_alpha: See EvaluationModel class.
    :param starting_lamda: See EvaluationModel class.
    :param min_alpha: See EvaluationModel class.
    :param min_lamda: See EvaluationModel class.
    :param alpha_decay: See EvaluationModel class.
    :param lamda_decay: See EvaluationModel class.
    :param alpha_decay_interval: See EvaluationModel class.
    :param lamda_decay_interval: See EvaluationModel class.
    :param hidden_activation: See EvaluationModel class.
    :param num_inputs: See EvaluationModel class.
    :param opponent: "pubeval" or "random".
    """

    model = EvaluationModel(num_inputs=num_inputs, num_hidden_units=num_hidden_units,
                            starting_alpha=starting_alpha, starting_lamda=starting_lamda,
                            min_alpha=min_alpha, min_lamda=min_lamda,
                            alpha_decay=alpha_decay, lamda_decay=lamda_decay,
                            alpha_decay_interval=alpha_decay_interval, lamda_decay_interval=lamda_decay_interval,
                            hidden_activation=hidden_activation)

    model.load(checkpoint_path=existing_model_path)

    if opponent == "pubeval":
        opponent_agent = PubevalAgent(0)
    else:
        opponent_agent = RandomAgent(0)
    agents = [opponent_agent, TDAgent(1, model)]
    wins = [0, 0]
    for i in range(num_episodes):
        game = Game(agents)
        wins[game.play()] += 1

    print("\n{}: \t{}".format(existing_model_path, float(wins[1]) / float(sum(wins))))
Ejemplo n.º 4
0
def args_gui(args):
    if path_exists(args.model):
        # assert os.path.exists(args.model), print("The path {} doesn't exists".format(args.model))

        if args.type == 'nn':
            net = TDGammon(hidden_units=args.hidden_units, lr=0.1, lamda=None, init_weights=False)
            env = gym.make('gym_backgammon:backgammon-v0')
        else:
            net = TDGammonCNN(lr=0.0001)
            env = gym.make('gym_backgammon:backgammon-pixel-v0')

        net.load(checkpoint_path=args.model, optimizer=None, eligibility_traces=False)

        agents = {BLACK: TDAgent(BLACK, net=net), WHITE: HumanAgent(WHITE)}
        gui = GUI(env=env, host=args.host, port=args.port, agents=agents)
        gui.run()
Ejemplo n.º 5
0
def main(args):
    env = GridWorld()

    agent = TDAgent(env, epsilon=args.epsilon, gamma=args.discount, alpha=args.lr)
    agent.control(method=args.algorithm)
Ejemplo n.º 6
0
def args_plot(args, parser):
    '''
    This method is used to plot the number of time an agent wins when it plays against an opponent.
    Instead of evaluating the agent during training (it can require some time and slow down the training), I decided to plot the wins separately, loading the different
    model saved during training.
    For example, suppose I run the training for 100 games and save my model every 10 games.
    Later I will load these 10 models, and for each of them, I will compute how many times the agent would win against an opponent.
    :return: None
    '''

    src = args.save_path
    hidden_units = args.hidden_units
    n_episodes = args.episodes
    opponents = args.opponent.split(',')
    host = args.host
    port = args.port
    difficulties = args.difficulty.split(',')
    model_type = args.type

    if path_exists(src):
        # assert os.path.exists(src), print("The path {} doesn't exists".format(src))

        for d in difficulties:
            if d not in [
                    'beginner', 'intermediate', 'advanced', 'world_class'
            ]:
                parser.error(
                    "--difficulty should be (one or more of) 'beginner','intermediate', 'advanced' ,'world_class'"
                )

        dst = args.dst

        if 'gnubg' in opponents and (not host or not port):
            parser.error(
                "--host and --port are required when 'gnubg' is specified in --opponent"
            )

        for root, dirs, files in os.walk(src):
            global_step = 0
            files = sorted(files)

            writer = SummaryWriter(dst)

            for file in files:
                if ".tar" in file:
                    print("\nLoad {}".format(os.path.join(root, file)))

                    if model_type == 'nn':
                        net = TDGammon(hidden_units=hidden_units,
                                       lr=0.1,
                                       lamda=None,
                                       init_weights=False)
                        env = gym.make('gym_backgammon:backgammon-v0')
                    else:
                        net = TDGammonCNN(lr=0.0001)
                        env = gym.make('gym_backgammon:backgammon-pixel-v0')

                    net.load(checkpoint_path=os.path.join(root, file),
                             optimizer=None,
                             eligibility_traces=False)

                    if 'gnubg' in opponents:
                        tag_scalar_dict = {}

                        gnubg_interface = GnubgInterface(host=host, port=port)

                        for difficulty in difficulties:
                            gnubg_env = GnubgEnv(gnubg_interface,
                                                 difficulty=difficulty,
                                                 model_type=model_type)
                            wins = evaluate_vs_gnubg(agent=TDAgentGNU(
                                WHITE,
                                net=net,
                                gnubg_interface=gnubg_interface),
                                                     env=gnubg_env,
                                                     n_episodes=n_episodes)
                            tag_scalar_dict[difficulty] = wins[WHITE]

                        writer.add_scalars('wins_vs_gnubg/', tag_scalar_dict,
                                           global_step)

                        with open(root + '/results.txt', 'a') as f:
                            print("{};".format(file) + str(tag_scalar_dict),
                                  file=f)

                    if 'random' in opponents:
                        tag_scalar_dict = {}
                        agents = {
                            WHITE: TDAgent(WHITE, net=net),
                            BLACK: RandomAgent(BLACK)
                        }
                        wins = evaluate_agents(agents, env, n_episodes)

                        tag_scalar_dict['random'] = wins[WHITE]

                        writer.add_scalars('wins_vs_random/', tag_scalar_dict,
                                           global_step)

                    global_step += 1

                    writer.close()
Ejemplo n.º 7
0
    def train_agent(self,
                    env,
                    n_episodes,
                    save_path=None,
                    eligibility=False,
                    save_step=0,
                    name_experiment=''):
        start_episode = self.start_episode
        n_episodes += start_episode

        wins = {WHITE: 0, BLACK: 0}
        network = self

        agents = {
            WHITE: TDAgent(WHITE, net=network),
            BLACK: TDAgent(BLACK, net=network)
        }

        durations = []
        steps = 0
        start_training = time.time()

        for episode in range(start_episode, n_episodes):

            if eligibility:
                self.init_eligibility_traces()

            agent_color, first_roll, observation = env.reset()
            agent = agents[agent_color]

            t = time.time()

            for i in count():
                if first_roll:
                    roll = first_roll
                    first_roll = None
                else:
                    roll = agent.roll_dice()

                p = self(observation)

                actions = env.get_valid_actions(roll)
                action = agent.choose_best_action(actions, env)
                observation_next, reward, done, winner = env.step(action)
                p_next = self(observation_next)

                if done:
                    if winner is not None:
                        loss = self.update_weights(p, reward)

                        wins[agent.color] += 1

                    tot = sum(wins.values())
                    tot = tot if tot > 0 else 1

                    print(
                        "Game={:<6d} | Winner={} | after {:<4} plays || Wins: {}={:<6}({:<5.1f}%) | {}={:<6}({:<5.1f}%) | Duration={:<.3f} sec"
                        .format(episode + 1, winner, i, agents[WHITE].name,
                                wins[WHITE], (wins[WHITE] / tot) * 100,
                                agents[BLACK].name, wins[BLACK],
                                (wins[BLACK] / tot) * 100,
                                time.time() - t))

                    durations.append(time.time() - t)
                    steps += i
                    break
                else:
                    loss = self.update_weights(p, p_next)

                agent_color = env.get_opponent_agent()
                agent = agents[agent_color]

                observation = observation_next

            if save_path and save_step > 0 and episode > 0 and (
                    episode + 1) % save_step == 0:
                self.checkpoint(checkpoint_path=save_path,
                                step=episode,
                                name_experiment=name_experiment)
                agents_to_evaluate = {
                    WHITE: TDAgent(WHITE, net=network),
                    BLACK: RandomAgent(BLACK)
                }
                evaluate_agents(agents_to_evaluate, env, n_episodes=20)
                print()

        print("\nAverage duration per game: {} seconds".format(
            round(sum(durations) / n_episodes, 3)))
        print("Average game length: {} plays | Total Duration: {}".format(
            round(steps / n_episodes, 2),
            datetime.timedelta(seconds=int(time.time() - start_training))))

        if save_path:
            self.checkpoint(checkpoint_path=save_path,
                            step=n_episodes - 1,
                            name_experiment=name_experiment)

            with open('{}/comments.txt'.format(save_path), 'a') as file:
                file.write("Average duration per game: {} seconds".format(
                    round(sum(durations) / n_episodes, 3)))
                file.write(
                    "\nAverage game length: {} plays | Total Duration: {}".
                    format(
                        round(steps / n_episodes, 2),
                        datetime.timedelta(seconds=int(time.time() -
                                                       start_training))))

        env.close()
Ejemplo n.º 8
0
    easy_inputs = {"e", "easy"}
    medium_inputs = {"m", "med", "medium"}
    hard_inputs = {"h", "hard"}
    while difficulty_input not in easy_inputs.union(medium_inputs).union(
            hard_inputs):
        difficulty_input = input(
            "Select difficulty level: EASY (E), MEDIUM (M), HARD (H)").lower()
        if difficulty_input in easy_inputs:
            difficulty = Difficulty.EASY
        elif difficulty_input in medium_inputs:
            difficulty = Difficulty.MEDIUM
        elif difficulty_input in hard_inputs:
            difficulty = Difficulty.HARD

    v = vision.Vision()
    td_agent = TDAgent(WHITE, model, v, difficulty)
    human_agent = HumanAgent(BLACK, v)
    agents_list = [td_agent, human_agent]
    game = Game(agents_list)

    set_start_state = False
    if set_start_state:
        start_points = [[
            3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0
        ],
                        [
                            0, 0, 3, 3, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0
                        ]]
        start_bar = [0, 2]