Example #1
0
def train_agent(hype_space):

    print("----------------------")
    print("Evaluating model: ", hype_space)

    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST)
    game = brisc.BriscolaGame(2, logger)

    tf.reset_default_graph()

    # Initialize agents
    agents = []
    agent = QAgent(
        0,
        hype_space['epsilon_increment'],
        hype_space['epsilon_max'],
        hype_space['discount'],
        NETWORK,
        hype_space['layers'],
        hype_space['learning_rate'],
        hype_space['replace_target_iter'])

    agents.append(agent)
    agents.append(RandomAgent())

    best_total_wins = train(game, agents, NUM_EPOCHS, EVALUATE_EVERY, EVALUATE_FOR, MODEL_DIR)

    print ("Best total wins ----->", best_total_wins)
    best_total_loses = EVALUATE_FOR - best_total_wins
    return best_total_loses
Example #2
0
def main(argv=None):
    '''Evaluate agent performances against RandomAgent and AIAgent'''

    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST)
    game = brisc.BriscolaGame(2, logger)

    # agent to be evaluated is RandomAgent or QAgent if a model is provided
    if FLAGS.model_dir:
        eval_agent = QAgent(network=FLAGS.network)
        eval_agent.load_model(FLAGS.model_dir)
        eval_agent.make_greedy()
    else:
        eval_agent = RandomAgent()

    # test agent against RandomAgent
    agents = [eval_agent, RandomAgent()]

    total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations)
    stats_plotter(agents, points_history, total_wins)

    # test agent against AIAgent
    agents = [eval_agent, AIAgent()]

    total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations)
    stats_plotter(agents, points_history, total_wins)
Example #3
0
def main(argv=None):

    # Initializing the environment
    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TRAIN)
    game = brisc.BriscolaGame(2, logger)

    # Initialize agents
    agents = []
    agent = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max,
                   FLAGS.discount, FLAGS.network, FLAGS.layers,
                   FLAGS.learning_rate, FLAGS.replace_target_iter,
                   FLAGS.batch_size)
    agents.append(agent)
    agent = RandomAgent()
    agents.append(agent)

    train(game, agents, FLAGS.num_epochs, FLAGS.evaluate_every,
          FLAGS.num_evaluations, FLAGS.model_dir)
def initGridWorld(width, height, obstacles, initLocs, numRobots):
    gridworld = Grid.Grid(width, height, obstacles)
    env = Environment(height, width, gridworld)
    agents = [
        QAgent(j, -1, -1, env.state_n, env.action_n) for j in range(numRobots)
    ]

    i = 0
    for initLoc in initLocs:
        agents[i].setLocation(initLoc[0], initLoc[1])
        gridworld.cells[initLoc[0]][initLoc[1]].occupied = True
        gridworld.cells[initLoc[0]][initLoc[1]].visited = True
        i = i + 1

    env.agents = agents
    env.updateFrontiers()

    return env
Example #5
0
def main(argv=None):

    # Initializing the environment
    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.PVP)
    game = brisc.BriscolaGame(2, logger)

    # Initialize agents
    agents = []
    agents.append(HumanAgent())

    if FLAGS.model_dir:
        agent = QAgent(network=FLAGS.network)
        agent.load_model(FLAGS.model_dir)
        agent.make_greedy()
        agents.append(agent)
    else:
        agent = AIAgent()
        agents.append(agent)

    brisc.play_episode(game, agents, train=False)
Example #6
0
def main(argv):
    if len(argv) > 1:
        env_name = argv[1]
    else:
        env_name = "Taxi-v3"

    env = gym.make(env_name)

    start_alpha = 0.1
    start_gamma = 0.6
    start_epsilon = 0.1
    decay_rate = 0.0001

    if load_from_file:
        file = open('q_table', 'rb')
        loaded_q_table = pickle.load(file)
        q_agent = QAgent(env,
                         alpha=start_alpha,
                         gamma=start_gamma,
                         epsilon=start_epsilon,
                         q_table=loaded_q_table)
    else:
        q_agent = QAgent(env,
                         alpha=start_alpha,
                         gamma=start_gamma,
                         epsilon=start_epsilon)

    if not load_from_file:
        # metrics
        total_rewards = []  # every 100 epochs
        total_steps = []  # every 100 epochs

        epochs = 15000
        for i in range(0, epochs):
            steps, cum_reward = q_agent.q_learning()
            if i % 100 == 0 or i == 0:
                steps, total_reward = q_agent.validation()
                total_rewards.append(cum_reward)
                total_steps.append(steps)
                print(i, epochs, cum_reward)
                if decay:
                    # change alpha, beta, gamma
                    # https://www.youtube.com/watch?v=QzulmoOg2JE
                    new_alpha = 1 / (1 + decay_rate * i) * start_alpha
                    # linear increase of gamma
                    new_gamma = (1 - start_gamma) / epochs * i + start_gamma
                    q_agent.gamma = new_gamma
                    new_epsilon = 1 / (1 + decay_rate * i) * start_epsilon
                    q_agent.alpha = new_alpha
                    q_agent.epsilon = new_epsilon
                    print(new_alpha, new_gamma, new_epsilon)

        # pickle files
        file = open('q_table', 'wb')
        # dump information to that file
        pickle.dump(q_agent.q_table, file)
        # close the file
        file.close()

        # show graphs
        x = np.arange(0, epochs, 100)
        utils.plot(x,
                   y=total_rewards,
                   x_label='epochs of training',
                   y_label='cummulative reward')
        utils.plot(x,
                   y=total_steps,
                   x_label='epochs of training',
                   y_label='steps needed')

    # end of not loading_from_file
    # after training show agent 5 times
    input("Press Enter to continue...")
    os.system('clear')
    print(q_agent.q_table)
    for i in range(0, 5):
        steps, cum_reward = q_agent.validation(sleep_timer=0.25)
        print("needed steps", steps, "cum_reward", cum_reward)
        time.sleep(1)
    print('finished')
Example #7
0
from collections import defaultdict

import game
from agents.one_look_ahead_agent import OneLookAheadAgent
import numpy as np
import keras as ks

from agents.q_agent import QAgent
from agents.till_next_turn_agent import TillNextTurnAgent

agents = [
    QAgent(0,
           4,
           ks.models.load_model("win_loss_model.h5"),
           gamma=0.99,
           use_win_rewards=True),
    QAgent(1,
           4,
           ks.models.load_model("big_net.h5"),
           gamma=0.99,
           use_win_rewards=False),
    # TillNextTurnAgent(2, 4, 15, deeper_top_n=2),
    # OneLookAheadAgent(3, 4)
]

game.play_game(agents, True, shuffle_agents=True)

scores = []
wins = defaultdict(lambda: 0)
# switched = False
for i in range(100):
Example #8
0
        if self.board[0] != -1 and self.board[0] == self.board[4] and self.board[4] == self.board[8]:
            return self.board[0]

        if self.board[2] != -1 and self.board[2] == self.board[4] and self.board[4] == self.board[6]:
            return self.board[2]
        return -1


if __name__ == "__main__":
    from server import Server
    import threading

    server_name = ('localhost', 1337)

    s = Server(server_name, "ttt_rankings", TTTEnvironment())
    clients = [
        RandomAgent(server_name, "Random1", TTTEnvironment()),
        RandomAgent(server_name, "Random2", TTTEnvironment()),
        #QAgent(server_name, "QAgent2", TTTEnvironment(), 0.9, 0.1),
        QAgent(server_name, "ShortSightedQAgent", TTTEnvironment(), 0.1, 0.1),
        QAgent(server_name, "HighGammaQAgent", TTTEnvironment(), 0.999, 0.1),
        QAgent(server_name, "SlowQAgent", TTTEnvironment(), 0.9, 0.01),
        QAgent(server_name, "FastQAgent", TTTEnvironment(), 0.9, 0.3),
    ] + list([QAgent(server_name, "QAgent%d"%i, TTTEnvironment(), 0.9, 0.1) for i in range(10)]) \
        + list([SMaxQAgent(server_name, "SMaxQAgent%d" % i, TTTEnvironment(), 0.9, 0.1, temp=math.exp((i-5))) for i in range(10)])

    for c in clients:
        threading.Thread(target=c.run).start()

    s.run()
Example #9
0
def main(argv=None):

    global victory_history_1v2
    victory_history_1v2 = []

    global victory_history_1vR
    victory_history_1vR = []

    global victory_history_2vR
    victory_history_2vR = []

    global points_history_1v2
    points_history_1v2 = []

    global points_history_1vR
    points_history_1vR = []

    global points_history_2vR
    points_history_2vR = []

    # Initializing the environment
    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TRAIN)
    game = brisc.BriscolaGame(2, logger)

    # Initialize agent
    global agent1
    agent1 = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max,
                    FLAGS.discount, FLAGS.network, FLAGS.layers,
                    FLAGS.learning_rate, FLAGS.replace_target_iter,
                    FLAGS.batch_size)
    global agent2
    agent2 = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max,
                    FLAGS.discount, FLAGS.network, FLAGS.layers,
                    FLAGS.learning_rate, FLAGS.replace_target_iter,
                    FLAGS.batch_size)

    # Training
    start_time = time.time()
    best_total_wins = self_train(game, agent1, agent2, FLAGS.num_epochs,
                                 FLAGS.evaluate_every, FLAGS.num_evaluations,
                                 FLAGS.copy_every, FLAGS.model_dir)
    print('Best winning ratio : {:.2%}'.format(best_total_wins /
                                               FLAGS.num_evaluations))
    print(time.time() - start_time)

    # Summary graphs
    x = [
        FLAGS.evaluate_every * i
        for i in range(1, 1 + len(victory_history_1v2))
    ]

    # 1v2
    vict_hist = victory_history_1v2
    point_hist = points_history_1v2
    labels = [agent1.name + '1', agent2.name + '2']
    gv.training_summary(x, vict_hist, point_hist, labels, FLAGS,
                        "evaluation_dir/1v2")

    # 1vRandom
    vict_hist = victory_history_1vR
    point_hist = points_history_1vR
    labels = [agent1.name + '1', RandomAgent().name]
    gv.training_summary(x, vict_hist, point_hist, labels, FLAGS,
                        "evaluation_dir/1vR")

    # 2vRandom
    vict_hist = victory_history_2vR
    point_hist = points_history_2vR
    labels = [agent2.name + '2', RandomAgent().name]
    gv.training_summary(x, vict_hist, point_hist, labels, FLAGS,
                        "evaluation_dir/2vR")

    # Evaluation against ai agent
    agents = [agent1, AIAgent()]
    winners, points = evaluate(game, agents, FLAGS.num_evaluations)
    gv.evaluate_summary(
        winners, points, agents,
        "evaluation_dir/" + agents[0].name + "1 vs " + agents[1].name)

    agents = [agent2, AIAgent()]
    winners, points = evaluate(game, agents, FLAGS.num_evaluations)
    gv.evaluate_summary(
        winners, points, agents,
        "evaluation_dir/" + {agents[0].name} + "2 vs " + agents[1].name)