예제 #1
0
def experience_simulation(num_games, agent1, agent2):
    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent2, agent1
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    return rl.combine_experience([collector1, collector2])
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--num-games', '-n', type=int, default=10)
    # parser.add_argument('--game-log-out', required=True)
    parser.add_argument('--experience-out', required=True)
    parser.add_argument('--temperature', type=float, default=0.0)

    args = parser.parse_args()
    global BOARD_SIZE
    BOARD_SIZE = args.board_size
    agent1 = rl.load_ac_agent(h5py.File(args.learning_agent))
    agent2 = rl.load_ac_agent(h5py.File(args.learning_agent))
    agent1.set_temperature(args.temperature)
    agent2.set_temperature(args.temperature)

    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()

    color1 = Player.black
    # logf = open(args.game_log_out, 'a')
    # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),))
    for i in range(args.num_games):
        print('Simulating game %d/%d...' % (i + 1, args.num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Agent 2 wins.')
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1, collector2])
    # logf.write('Saving experience buffer to %s\n' % args.experience_out)
    with h5py.File(args.experience_out, 'w') as experience_outf:
        experience.serialize(experience_outf)
예제 #3
0
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 experience_filename, gpu_frac):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_filename)
    agent2 = load_agent(agent2_filename)

    collector1 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
        else:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
def do_self_play(board_size,
                 agent1_filename,
                 agent2_filename,
                 num_games,
                 temperature,
                 experience_filename,
                 chunk=100,
                 gpu_frac=0.95):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = load_agent(agent1_filename)
    agent1.set_temperature(temperature)
    agent1.set_policy('eps-greedy')
    agent2 = load_agent(agent2_filename)
    agent2.set_temperature(temperature)
    agent2.set_policy('eps-greedy')

    color1 = Player.black
    times = int(num_games / chunk)

    for current_chunk in range(times):
        print('Текущая порция %d' % current_chunk)
        collector1 = rl.ExperienceCollector()
        for i in range(chunk):
            print('Симуляция игры %d/%d...' % (i + 1, chunk))
            collector1.begin_episode()
            agent1.set_collector(collector1)

            if color1 == Player.black:
                black_player, white_player = agent1, agent2
            else:
                white_player, black_player = agent1, agent2
            game_record = simulate_game(black_player, white_player, board_size)
            cnt_moves = len(game_record.moves)
            if game_record.winner == color1:
                print('Агент 1 выигрывает, время: %s' %
                      (datetime.datetime.now()))
                collector1.complete_episode(reward=1)
            else:
                print('Агент 2 выигрывает, время: %s' %
                      (datetime.datetime.now()))
                collector1.complete_episode(reward=-1)
            print('Количество ходов в игре = ', cnt_moves)
            color1 = color1.other

        experience = rl.combine_experience([collector1])
        print('Saving experience buffer to %s\n' % (experience_filename + str(
            (current_chunk + 1) * chunk) + '.h5'))
        with h5py.File(
                experience_filename + '_' + str(
                    (current_chunk + 1) * chunk) + '.h5',
                'w') as experience_outf:
            experience.serialize(experience_outf)
예제 #5
0
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--learning-agent', required=True)
    # parser.add_argument('--num-games', '-n', type=int, default=10)
    # parser.add_argument('--game-log-out', required=True)
    # parser.add_argument('--experience-out', required=True)
    # parser.add_argument('--temperature', type=float, default=0.0)
    #
    # args = parser.parse_args()

    # 9.19
    agent1 = agent.load_policy_agent(h5py.File(agent_filename))
    agent2 = agent.load_policy_agent(h5py.File(agent_filename))
    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()
    agent1.set_collector(collector1)
    agent2.set_collector(collector2)


    # color1 = Player.black
    # logf = open(args.game_log_out, 'a')
    # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),))

    # 9.20
    for i in range(num_games):
        collector1.begin_episode()
        collector2.begin_episode()

        game_record = simulate_game(agent1, agent2)
        if game_record.winner == Player.black:
            collector1.complete_episode(reward=1)       # Agent1 won the game, so +reward
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)       # Agent2 won the game
            collector1.complete_episode(reward=-1)


    experience = rl.combine_experience([collector1, collector2])
    # logf.write('Saving experience buffer to %s\n' % args.experience_out)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
예제 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, required=True)
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--num-games', '-n', type=int, default=10)
    parser.add_argument('--experience-out', required=True)

    args = parser.parse_args()
    agent_filename = args.learning_agent
    experience_filename = args.experience_out
    num_games = args.num_games
    global BOARD_SIZE
    BOARD_SIZE = args.board_size

    agent1 = agent.load_policy_agent(h5py.File(agent_filename))
    agent2 = agent.load_policy_agent(h5py.File(agent_filename))
    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()
    agent1.set_collector(collector1)
    agent2.set_collector(collector2)

    for i in range(num_games):
        collector1.begin_episode()
        collector2.begin_episode()

        game_record = simulate_game(agent1, agent2)
        if game_record.winner == Player.black:
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)

    experience = rl.combine_experience([collector1, collector2])
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
예제 #7
0
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 temperature, experience_filename, gpu_frac):

    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
        except RuntimeError as e:
            print(e)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    with h5py.File(agent1_filename, 'r') as agent1f:
        agent1 = agent.load_policy_agent(agent1f)
    agent1.set_temperature(temperature)
    with h5py.File(agent2_filename, 'r') as agent2f:
        agent2 = agent.load_policy_agent(agent2f)

    collector1 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
        else:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
예제 #8
0
def main():

    learning_agent = input("Бот: ")
    temperature = float(input('Температура = '))
    game_log = input('game_log: ')
    experience_out = input('experience_out: ')
    num_games = int(input('Количество игр = '))
    try:
        chunk_size = int(input('Количество игр в "порции" ='))
    except:
        chunk_size = 100

    pth = '//media//nail//SSD_Disk//Experience//'
    learning_agent = '//media//nail//SSD_Disk//Models//' + learning_agent + '.h5'
    game_log = pth + game_log + '_' + str(num_games)
    experience_out = pth + experience_out + '_' + str(num_games) + '_'  #+'.h5'

    #args = parser.parse_args()
    # ==================================================
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    # ==================================================

    agent1 = agent.load_policy_agent(h5py.File(learning_agent, "r"))
    agent2 = agent.load_policy_agent(h5py.File(learning_agent, "r"))
    agent1.set_temperature(temperature)
    agent2.set_temperature(temperature)

    k = 0
    j = 0
    for i in range(num_games + 1):
        if j == 0:
            game_log_out = game_log + '_' + str((k + 1) * chunk_size) + ".txt"
            logf = open(game_log_out, 'a')
            logf.write('Начало игр в  %s\n' %
                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), ))
            logf.write(
                str((k + 1) * chunk_size) + ' из количества игр: ' +
                str(num_games) + '\n')
            print('Моделируемая игра %d/%d...' % (i + 1, num_games))
            collector1 = rl.ExperienceCollector()
            collector2 = rl.ExperienceCollector()

            color1 = Player.black
        j += 1
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent2, agent1
        game_record = simulate_game(black_player, white_player)
        print(" № игры : ", i + 1)
        if game_record.winner == color1:
            print('Агент 1 выигрывает.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Агент 2 выигрывает.')
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

        if i >= chunk_size and i % chunk_size == 0:

            experience = rl.combine_experience([collector1, collector2])
            experience_out_file = experience_out + str(
                (k + 1) * chunk_size) + ".h5"
            logf.write('Сохранение буфера в файл %s\n' % experience_out_file)
            logf.write('Завершение игр %s\n' %
                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), ))
            logf.close()
            with h5py.File(experience_out_file, 'w') as experience_outf:
                experience.serialize(experience_outf)
            print('Записано игр: ', (k + 1) * chunk_size, ' из ', num_games,
                  ' игр.')
            k += 1
            j = 0