Exemple #1
0
def args_gui(args):
    if path_exists(args.model):
        # assert os.path.exists(args.model), print("The path {} doesn't exists".format(args.model))

        if args.type == 'nn':
            net = TDGammon(hidden_units=args.hidden_units, lr=0.1, lamda=None, init_weights=False)
            env = gym.make('gym_backgammon:backgammon-v0')
        else:
            net = TDGammonCNN(lr=0.0001)
            env = gym.make('gym_backgammon:backgammon-pixel-v0')

        net.load(checkpoint_path=args.model, optimizer=None, eligibility_traces=False)

        agents = {BLACK: TDAgent(BLACK, net=net), WHITE: HumanAgent(WHITE)}
        gui = GUI(env=env, host=args.host, port=args.port, agents=agents)
        gui.run()
Exemple #2
0
def play():
    rounds = 30
    white_wins = 0
    agent1 = HumanAgent()
    nnet = MiniShogiNNetWrapper()
    # nnet.nnet.model.summary()
    plot_model(nnet.nnet.model,
               to_file='model_plot.png',
               show_shapes=True,
               show_layer_names=True)
    agent2 = NNetMCTSAgent(nnet, comp=False)
    print('Preparing neural net')
    # agent2.train_neural_net()
    agent2.comp = True
    agent2.nnet.load_checkpoint(filename='best.h5')

    print('Preparation complete')
    for i in range(1, rounds + 1):
        begin = time.time()
        print('Game {0}/{1}'.format(i, rounds))
        g = MiniShogiGame()
        while True:
            current_agent = agent1 if g.game_state.colour == 'W' else agent2
            current_agent.act(g)
            # print(g.game_state.print())
            logging.info(
                g.game_state.print_state(flip=g.game_state.colour == 'B'))
            if g.game_state.game_ended():
                if g.game_state.colour == 'B':
                    white_wins += 1
                print(
                    'Stats: {0} win {1} ({2}%), {3} win {4} ({5}%)| time: {6}'.
                    format(agent1.__class__.__name__, white_wins,
                           white_wins / i * 100, agent2.__class__.__name__,
                           i - white_wins, (i - white_wins) / i * 100,
                           time.time() - begin))
                logging.info(
                    'Stats: {0} win {1} ({2}%), {3} win {4} ({5}%)| time: {6}'.
                    format(agent1.__class__.__name__, white_wins,
                           white_wins / i * 100, agent2.__class__.__name__,
                           i - white_wins, (i - white_wins) / i * 100,
                           time.time() - begin))
                break
            if g.game_state.move_count > 300:  # stop very long games
                print('Game too long, terminating')
                break
Exemple #3
0
def play():
    first_move = random.randint(1, 100)

    env = TicTacToeEnv(False)
    human = HumanAgent("X")
    machine = BaseAgent("O")
    agents = [human, machine]
    start_mark = "O" if first_move % 2 == 0 else "X"

    while True:
        env.set_start_mark(start_mark)
        state = env.reset()
        board, mark = state
        done = False
        env.render()

        while not done:
            agent = agent_by_mark(agents, mark)
            human = isinstance(agent, HumanAgent)
            env.show_turn(True, mark)
            available_actions = env.available_actions()
            if human:
                action = agent.act(available_actions)
                if action is None:
                    sys.exit()
            else:
                action = agent.act(board, state, available_actions)

            state, reward, done, info = env.step(action)

            env.render(mode="human")
            if done:
                env.show_result(True, mark, reward)
                break
            else:
                board, mark = state

        start_mark = next_mark(start_mark)
    parser = argparse.ArgumentParser()
    parser.add_argument('p1', choices=['r', 'h', 'c'])
    parser.add_argument('p2', choices=['r', 'h', 'c'])
    parser.add_argument('nrows', type=int)
    parser.add_argument('ncols', type=int)
    parser.add_argument('--prune', action='store_true')
    parser.add_argument('--depth', type=int)
    args = parser.parse_args()
    # print("args:", args)

    players = []
    for p in [args.p1, args.p2]:
        if p == 'r':
            player = RandomAgent()
        elif p == 'h':
            player = HumanAgent()
        elif p == 'c':
            if not args.depth:
                player = MinimaxAgent()
            else:
                if not args.prune:
                    player = MinimaxHeuristicAgent(args.depth)
                else:
                    player = MinimaxHeuristicPruneAgent(args.depth)
        players.append(player)

    start_state = GameState(args.nrows, args.ncols)

    results = []
    w1 = 0
    w2 = 0
Exemple #5
0
paddleB = Paddle(WHITE, 10, 100)
paddleB.rect.x = 670
paddleB.rect.y = 200

ball = Ball(WHITE, 10, 10)
ball.rect.x = 345
ball.rect.y = 195

all_sprite_list = pygame.sprite.Group()
all_sprite_list.add(paddleA)
all_sprite_list.add(paddleB)
all_sprite_list.add(ball)

#player agents
player_random = RandomAgent(3)
player_human = HumanAgent()

#Loop while true
gameOn = True

#clock controls how fast screen updates
clock = pygame.time.Clock()

#player score
scoreA = 0
scoreB = 0

#Main
while gameOn:

    #terminate if user quits
Exemple #6
0
import gym
from agents import A2CAgent, RandomAgent, HumanAgent, MCTSAgent

N_PLAYERS = 2

env = gym.make("gym_azul:azul-v0", n_players=N_PLAYERS)

# define some agents
human = HumanAgent()
mcts = MCTSAgent()  # beware, only supports 2 players
random = RandomAgent()
a2c = A2CAgent(env, hidden_dim=256)
a2c_path = 'checkpoints/12999.pt'
a2c.learning = False
if a2c_path:
    a2c.load(a2c_path)

# which agents do you want to see playing
agents = [mcts, random]

# playing loop
state = env.reset()
done = False
while not done:
    for id, agent in enumerate(agents):
        if done: break
        state, done = agent.play(state, env, id)

winner, score = env.get_winner()
print('Agent {} won with score {}!'.format(winner, score))
Exemple #7
0
    medium_inputs = {"m", "med", "medium"}
    hard_inputs = {"h", "hard"}
    while difficulty_input not in easy_inputs.union(medium_inputs).union(
            hard_inputs):
        difficulty_input = input(
            "Select difficulty level: EASY (E), MEDIUM (M), HARD (H)").lower()
        if difficulty_input in easy_inputs:
            difficulty = Difficulty.EASY
        elif difficulty_input in medium_inputs:
            difficulty = Difficulty.MEDIUM
        elif difficulty_input in hard_inputs:
            difficulty = Difficulty.HARD

    v = vision.Vision()
    td_agent = TDAgent(WHITE, model, v, difficulty)
    human_agent = HumanAgent(BLACK, v)
    agents_list = [td_agent, human_agent]
    game = Game(agents_list)

    set_start_state = False
    if set_start_state:
        start_points = [[
            3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0
        ],
                        [
                            0, 0, 3, 3, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0, 0, 0, 0, 0
                        ]]
        start_bar = [0, 2]
        start_roll = [3, 1]
Exemple #8
0
    if load_from:
        agent.load(load_from)
    agents.append(agent)
    agents.append(MCTSAgent())

if PLAYING_MODE == 'manual':
    # 1 agent and 1 human
    assert N_PLAYERS == 2
    actor_optim = optim.Adam
    critic_optim = optim.Adam
    agent = A2CAgent(env, HIDDEN_DIM, actor_optim, critic_optim, ACTOR_LR,
                     CRITIC_LR, GAMMA)
    if load_from:
        agent.load(load_from)
    agents.append(agent)
    agents.append(HumanAgent())

# ==================== ACTUAL TRAINING ==========================================

for ep in range(N_EPISODES):
    state = env.reset()
    done = False
    counter = 0
    print('Game {}/{}'.format(ep + 1, N_EPISODES))
    while not done:
        update = not ((counter + 1) % UPDATE_EVERY)
        counter += 1
        for id, agent in enumerate(agents):
            if done: break
            state, done = agent.play(state, env, id)
            if update and TRAINING: