Ejemplo n.º 1
0
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 experience_filename, gpu_frac):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_filename)
    agent2 = load_agent(agent2_filename)

    collector1 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
        else:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 2
0
def play_games(args):
    agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args

    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    with h5py.File(agent1_fname, 'r') as agent1f:
        agent1 = agent.load_policy_agent(agent1f)
    with h5py.File(agent2_fname, 'r') as agent2f:
        agent2 = agent.load_policy_agent(agent2f)

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins')
            wins += 1
        else:
            print('Agent 2 wins')
            losses += 1
        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.other
    return wins, losses
Ejemplo n.º 3
0
def play_games(args):
    agent1_fname, agent2_fname, num_games, board_size, gpu_frac, temperature = args

    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_fname)
    agent1.set_temperature(temperature)
    agent1.set_policy('eps-greedy')
    agent2 = load_agent(agent2_fname)
    agent2.set_temperature(temperature)
    agent2.set_policy('eps-greedy')

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins')
            wins += 1
        else:
            print('Agent 2 wins')
            losses += 1
        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.other
    return (wins, losses)
Ejemplo n.º 4
0
def play_games(agent1_fname, agent2_fname,
               num_games=480, board_size=19,
               gpu_frac=0.95, temperature=0.0):


    kerasutil.set_gpu_memory_target(gpu_frac)

    agent1 = load_agent(agent1_fname)
    agent1.set_temperature(temperature)
    agent1.set_policy('eps-greedy')
    agent2 = load_agent(agent2_fname)
    agent2.set_temperature(temperature)
    agent2.set_policy('eps-greedy')

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins , time is %s' % (datetime.datetime.now()))
            wins += 1
        else:
            print('Agent 2 wins, time is %s' % (datetime.datetime.now()))
            losses += 1
        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.other
    return wins, losses
Ejemplo n.º 5
0
def do_self_play(board_size,
                 agent1_filename,
                 agent2_filename,
                 num_games,
                 temperature,
                 experience_filename,
                 chunk=100,
                 gpu_frac=0.95):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_filename)
    agent1.set_temperature(temperature)
    agent1.set_policy('eps-greedy')
    agent2 = load_agent(agent2_filename)
    agent2.set_temperature(temperature)
    agent2.set_policy('eps-greedy')

    color1 = Player.black
    times = int(num_games / chunk)

    for current_chunk in range(times):
        print('Текущая порция %d' % current_chunk)
        collector1 = rl.ExperienceCollector()
        for i in range(chunk):
            print('Симуляция игры %d/%d...' % (i + 1, chunk))
            collector1.begin_episode()
            agent1.set_collector(collector1)

            if color1 == Player.black:
                black_player, white_player = agent1, agent2
            else:
                white_player, black_player = agent1, agent2
            game_record = simulate_game(black_player, white_player, board_size)
            cnt_moves = len(game_record.moves)
            if game_record.winner == color1:
                print('Агент 1 выигрывает, время: %s' %
                      (datetime.datetime.now()))
                collector1.complete_episode(reward=1)
            else:
                print('Агент 2 выигрывает, время: %s' %
                      (datetime.datetime.now()))
                collector1.complete_episode(reward=-1)
            print('Количество ходов в игре = ', cnt_moves)
            color1 = color1.other

        experience = rl.combine_experience([collector1])
        print('Saving experience buffer to %s\n' % (experience_filename + str(
            (current_chunk + 1) * chunk) + '.h5'))
        with h5py.File(
                experience_filename + '_' + str(
                    (current_chunk + 1) * chunk) + '.h5',
                'w') as experience_outf:
            experience.serialize(experience_outf)
Ejemplo n.º 6
0
def do_self_play(args):
    work_dir, board_size, agent1_num, agent2_num, num_games, experience_filename, gpu_frac, load_args = args
    kerasutil.set_gpu_memory_target(gpu_frac)
    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())
    agent1 = load_agent(agent1_num, work_dir, load_args)
    agent2 = load_agent(agent2_num, work_dir, load_args)
    collector1 = ExperienceCollector()
    collector2 = ExperienceCollector()

    ag1b = []
    ag2b = []

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=game_record.margin)
            collector2.complete_episode(reward=-game_record.margin)
        elif game_record.winner == color1.other:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-game_record.margin)
            collector2.complete_episode(reward=game_record.margin)
        else:
            print('Agents play a draw.')
            collector1.complete_episode(reward=0)
            collector2.complete_episode(reward=0)
        if game_record.winner == Player.black:
            black_score = game_record.margin
        elif game_record.winner == Player.white:
            black_score = -game_record.margin
        else:
            black_score = 0
        if color1 == Player.black:
            ag1b.append(black_score)
        else:
            ag2b.append(black_score)
        color1 = color1.other

    experience = combine_experience([collector1, collector2])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
    return (ag1b, ag2b)
Ejemplo n.º 7
0
def play_games(args):
    num_games, board_size, gpu_frac, load_args = args

    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(1, load_args.agent1_num, load_args.agent1_dir,
                        load_args)
    agent2 = load_agent(2, load_args.agent2_num, load_args.agent2_dir,
                        load_args)

    ag1b = []
    ag2b = []

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
        elif game_record.winner == color1.other:
            print('Agent 2 wins.')
        else:
            print('Agents play a draw.')
        if game_record.winner == Player.black:
            black_score = game_record.margin
        elif game_record.winner == Player.white:
            black_score = -game_record.margin
        else:
            black_score = 0
        if color1 == Player.black:
            ag1b.append(black_score)
        else:
            ag2b.append(black_score)
        color1 = color1.other

    return (ag1b, ag2b)