Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--agent1', required=True)
    parser.add_argument('--agent2', required=True)
    parser.add_argument('--num-games', '-n', type=int, default=10)

    args = parser.parse_args()
    agent1 = agent.load_policy_agent(h5py.File(args.agent1))
    agent2 = agent.load_policy_agent(h5py.File(args.agent2))
    num_games = args.num_games

    wins = 0
    losses = 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            wins += 1
        else:
            losses += 1
        color1 = color1.other
    print('Agent 1 record: %d/%d' % (wins, wins + losses))
Ejemplo n.º 2
0
def play_games(args):
    agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args

    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    with h5py.File(agent1_fname, 'r') as agent1f:
        agent1 = agent.load_policy_agent(agent1f)
    with h5py.File(agent2_fname, 'r') as agent2f:
        agent2 = agent.load_policy_agent(agent2f)

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins')
            wins += 1
        else:
            print('Agent 2 wins')
            losses += 1
        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.other
    return wins, losses
Ejemplo n.º 3
0
def main():  # 10.8
    # PLACEHOLDER VARIABLES (CHANGE BEFORE RUNNING SCRIPT)
    agent1filepath = ""
    agent2filepath = ""
    agent1 = agent.load_policy_agent(h5py.File(agent1filepath))
    agent2 = agent.load_policy_agent(h5py.File(agent2filepath))
    num_games = 50
    #

    wins = 0  # This script tracks wins and losses from the point of view of agent1
    losses = 0
    color1 = Player.black  # color1 = black, color2 = white
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            wins += 1
        else:
            losses += 1
        color1 = color1.other  # swap colors after each game, in case either agent plays better depending on color
    print('Agent 1 record: %d/%d' % (wins, wins + losses))
Ejemplo n.º 4
0
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--agent1', required=True)
    # parser.add_argument('--agent2', required=True)
    # parser.add_argument('--num-games', '-n', type=int, default=10)
    #
    # args = parser.parse_args()
    #
    # agent1 = agent.load_policy_agent(h5py.File(args.agent1))
    # agent2 = agent.load_policy_agent(h5py.File(args.agent2))

    # ==================================================
    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    # ==================================================
    pth = '//home//nail//Code_Go//checkpoints//'
    num_games = int(input("Количество игр :"))
    agent1 = input('Игрок(агент) №1:')
    agent2 = input('Игрок(агент) №2:')
    agent1 = pth + agent1 + ".h5"
    agent2 = pth + agent2 + ".h5"
    agent1 = agent.load_policy_agent(h5py.File(agent1, "r"))
    agent2 = agent.load_policy_agent(h5py.File(agent2, "r"))

    wins = 0
    losses = 0
    color1 = Player.black

    for i in range(num_games):
        print('Симуляция игры %d/%d...' %
              (i + 1, num_games))  # args.num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
            print('Агент №1 - играет Черными, Агент №2  - играет Белыми')
        else:
            white_player, black_player = agent1, agent2
            print('Агент №1 - играет Белыми, Агент №2  - играет Черными')
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            wins += 1
        else:
            losses += 1
        color1 = color1.other
    print('Agent 1 record: %d/%d' % (wins, wins + losses))
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--num-games', '-n', type=int, default=10)
    parser.add_argument('--game-log-out', required=True)
    parser.add_argument('--experience-out', required=True)
    parser.add_argument('--temperature', type=float, default=0.0)

    args = parser.parse_args()

    agent1 = agent.load_policy_agent(h5py.File(args.learning_agent))
    agent2 = agent.load_policy_agent(h5py.File(args.learning_agent))
    agent1.set_temperature(args.temperature)
    agent2.set_temperature(args.temperature)

    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()

    color1 = Player.black
    logf = open(args.game_log_out, 'a')
    logf.write('Begin training at %s\n' %
               (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), ))
    for i in range(args.num_games):
        print('Simulating game %d/%d...' % (i + 1, args.num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent2, agent1
        game_record = simulate_game(black_player, white_player)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Agent 2 wins.')
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1, collector2])
    logf.write('Saving experience buffer to %s\n' % args.experience_out)
    with h5py.File(args.experience_out, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--bind-address', default='127.0.0.1')
    parser.add_argument('--port', '-p', type=int, default=5000)
    parser.add_argument('--pg-agent')
    parser.add_argument('--predict-agent')
    parser.add_argument('--q-agent')
    parser.add_argument('--ac-agent')

    args = parser.parse_args()

    bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)}
    if args.pg_agent:
        bots['pg'] = agent.load_policy_agent(h5py.File(args.pg_agent))
    if args.predict_agent:
        bots['predict'] = agent.load_prediction_agent(
            h5py.File(args.predict_agent))
    if args.q_agent:
        q_bot = rl.load_q_agent(h5py.File(args.q_agent))
        q_bot.set_temperature(0.01)
        bots['q'] = q_bot
    if args.ac_agent:
        ac_bot = rl.load_ac_agent(h5py.File(args.ac_agent))
        ac_bot.set_temperature(0.05)
        bots['ac'] = ac_bot

    web_app = httpfrontend.get_web_app(bots)
    web_app.run(host=args.bind_address, port=args.port, threaded=False)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--agent-out', required=True)
    parser.add_argument('--lr', type=float, default=0.0001)
    parser.add_argument('--clipnorm', type=float, default=1.0)
    parser.add_argument('--bs', type=int, default=512)
    parser.add_argument('experience', nargs='+')

    args = parser.parse_args()
    learning_agent_filename = args.learning_agent
    experience_files = args.experience
    updated_agent_filename = args.agent_out
    learning_rate = args.lr
    clipnorm = args.clipnorm
    batch_size = args.bs

    learning_agent = agent.load_policy_agent(h5py.File(learning_agent_filename))
    for exp_filename in experience_files:
        exp_buffer = rl.load_experience(h5py.File(exp_filename))
        learning_agent.train(
            exp_buffer,
            lr=learning_rate,
            clipnorm=clipnorm,
            batch_size=batch_size)

    with h5py.File(updated_agent_filename, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 8
0
    def test_4_alphago_mcts(self):

        print("TEST 4\n=====================================================")
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            # Restrict TensorFlow to only use the first GPU
            try:
                tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
                tf.config.experimental.set_memory_growth(gpus[0], True)
                tf.config.set_soft_device_placement(True)
            except RuntimeError as e:
                print(e)

        fast_policy = load_prediction_agent(
            h5py.File('test_alphago_sl_policy.h5', 'r'))
        strong_policy = load_policy_agent(
            h5py.File('test_alphago_rl_policy.h5', 'r'))
        value = load_value_agent(h5py.File('test_alphago_value.h5', 'r'))

        alphago = AlphaGoMCTS(strong_policy,
                              fast_policy,
                              value,
                              num_simulations=20,
                              depth=5,
                              rollout_limit=10)
        start = GameState.new_game(19)
        alphago.select_move(start)
Ejemplo n.º 9
0
def do_self_play(board_size, agent1_filename, agent2_filename, num_games,
                 temperature, experience_filename, gpu_frac):

    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
        except RuntimeError as e:
            print(e)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    with h5py.File(agent1_filename, 'r') as agent1f:
        agent1 = agent.load_policy_agent(agent1f)
    agent1.set_temperature(temperature)
    with h5py.File(agent2_filename, 'r') as agent2f:
        agent2 = agent.load_policy_agent(agent2f)

    collector1 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
        else:
            print('Agent 2 wins.')
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 10
0
def train_worker(learning_agent, output_file, experience_file, lr, batch_size):
    with h5py.File(learning_agent, 'r') as learning_agentf:
        learning_agent = agent.load_policy_agent(learning_agentf)
    with h5py.File(experience_file, 'r') as expf:
        exp_buffer = rl.load_experience(expf)
    learning_agent.train(exp_buffer, lr=lr, batch_size=batch_size)

    with h5py.File(output_file, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 11
0
def do_self_play(board_size, agent_filename,
                 num_games, temperature,
                 experience_filename,
                 gpu_frac):
    kerasutil.set_gpu_memory_target(gpu_frac)

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    agent1 = agent.load_policy_agent(h5py.File(agent_filename))
    agent1.set_temperature(temperature)
    agent2 = agent.load_policy_agent(h5py.File(agent_filename))
    agent2.set_temperature(temperature)

    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()

    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Agent 2 wins.')
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

    experience = rl.combine_experience([collector1, collector2])
    print('Saving experience buffer to %s\n' % experience_filename)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 12
0
def main():
    pth = '//home//nail//Code_Go//checkpoints//'
    pth_experience = '//home//nail//Experience//'
    experience = []
    os.chdir(pth_experience)
    lst_files = os.listdir(pth_experience)
    pattern = input('Паттерн для выборки файлов для обучения: ')
    if len(pattern) == 0:
        pattern = "exp*.h5"

    for entry in lst_files:
        if fnmatch.fnmatch(entry, pattern):
            experience.append(entry)

    experience.sort()
    learning_agent = input('learning_agent:')
    learning_agent = pth + learning_agent+'.h5'
    print('learning_agent: ', learning_agent)
    agent_out = input('agent_out:')
    agent_out = pth + agent_out+'.h5'
    print('agent_out: ', agent_out)
    try:
        lr = float(input('lr = '))
    except:
        lr = 0.000001
    try:
        bs = int(input('bs = '))
    except:
        bs = 1024

    # ==================================================
    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.98
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    # ==================================================


    learning_agent = agent.load_policy_agent(h5py.File(learning_agent, "r"))

    i = 1
    num_files = len(experience)
    for exp_filename in experience:
        print(50*'=')
        print('Файл для обучения: %s...' % exp_filename)
        print(50 * '=')
        exp_buffer = rl.load_experience(h5py.File(exp_filename, "r"))
        learning_agent.train(exp_buffer, lr=lr, batch_size=bs)
        print('Обработано файлов: ', i, ' из ', num_files)
        i += 1

    with h5py.File(agent_out, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 13
0
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--learning-agent', required=True)
    # parser.add_argument('--num-games', '-n', type=int, default=10)
    # parser.add_argument('--game-log-out', required=True)
    # parser.add_argument('--experience-out', required=True)
    # parser.add_argument('--temperature', type=float, default=0.0)
    #
    # args = parser.parse_args()

    # 9.19
    agent1 = agent.load_policy_agent(h5py.File(agent_filename))
    agent2 = agent.load_policy_agent(h5py.File(agent_filename))
    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()
    agent1.set_collector(collector1)
    agent2.set_collector(collector2)


    # color1 = Player.black
    # logf = open(args.game_log_out, 'a')
    # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),))

    # 9.20
    for i in range(num_games):
        collector1.begin_episode()
        collector2.begin_episode()

        game_record = simulate_game(agent1, agent2)
        if game_record.winner == Player.black:
            collector1.complete_episode(reward=1)       # Agent1 won the game, so +reward
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)       # Agent2 won the game
            collector1.complete_episode(reward=-1)


    experience = rl.combine_experience([collector1, collector2])
    # logf.write('Saving experience buffer to %s\n' % args.experience_out)
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 14
0
def play_games(args):

    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
        except RuntimeError as e:
            print(e)

    agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args

    random.seed(int(time.time()) + os.getpid())
    np.random.seed(int(time.time()) + os.getpid())

    with h5py.File(agent1_fname, 'r') as agent1f:
        agent1 = agent.load_policy_agent(agent1f)
    with h5py.File(agent2_fname, 'r') as agent2f:
        agent2 = agent.load_policy_agent(agent2f)

    wins, losses = 0, 0
    color1 = Player.black
    for i in range(num_games):
        print('Simulating game %d/%d...' % (i + 1, num_games))
        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent1, agent2
        game_record = simulate_game(black_player, white_player, board_size)
        if game_record.winner == color1:
            print('Agent 1 wins')
            wins += 1
        else:
            print('Agent 2 wins')
            losses += 1
        print('Agent 1 record: %d/%d' % (wins, wins + losses))
        color1 = color1.other
    return wins, losses
Ejemplo n.º 15
0
    def test_4_alphago_mcts(self):
        fast_policy = load_prediction_agent(
            h5py.File('test_alphago_sl_policy.h5', 'r'))
        strong_policy = load_policy_agent(
            h5py.File('test_alphago_rl_policy.h5', 'r'))
        value = load_value_agent(h5py.File('test_alphago_value.h5', 'r'))

        alphago = AlphaGoMCTS(strong_policy,
                              fast_policy,
                              value,
                              num_simulations=20,
                              depth=5,
                              rollout_limit=10)
        start = GameState.new_game(19)
        alphago.select_move(start)
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, required=True)
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--num-games', '-n', type=int, default=10)
    parser.add_argument('--experience-out', required=True)

    args = parser.parse_args()
    agent_filename = args.learning_agent
    experience_filename = args.experience_out
    num_games = args.num_games
    global BOARD_SIZE
    BOARD_SIZE = args.board_size

    agent1 = agent.load_policy_agent(h5py.File(agent_filename))
    agent2 = agent.load_policy_agent(h5py.File(agent_filename))
    collector1 = rl.ExperienceCollector()
    collector2 = rl.ExperienceCollector()
    agent1.set_collector(collector1)
    agent2.set_collector(collector2)

    for i in range(num_games):
        collector1.begin_episode()
        collector2.begin_episode()

        game_record = simulate_game(agent1, agent2)
        if game_record.winner == Player.black:
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)

    experience = rl.combine_experience([collector1, collector2])
    with h5py.File(experience_filename, 'w') as experience_outf:
        experience.serialize(experience_outf)
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning-agent', required=True)
    parser.add_argument('--agent-out', required=True)
    parser.add_argument('--lr', type=float, default=0.0001)
    parser.add_argument('--bs', type=int, default=512)
    parser.add_argument('experience', nargs='+')

    args = parser.parse_args()

    learning_agent = agent.load_policy_agent(h5py.File(args.learning_agent))
    for exp_filename in args.experience:
        print('Training with %s...' % exp_filename)
        exp_buffer = rl.load_experience(h5py.File(exp_filename))
        learning_agent.train(exp_buffer, lr=args.lr, batch_size=args.bs)

    with h5py.File(args.agent_out, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 18
0
def train_worker(learning_agent, output_file, experience_file, lr, batch_size):

    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
        except RuntimeError as e:
            print(e)

    with h5py.File(learning_agent, 'r') as learning_agentf:
        learning_agent = agent.load_policy_agent(learning_agentf)
    with h5py.File(experience_file, 'r') as expf:
        exp_buffer = rl.load_experience(expf)
    learning_agent.train(exp_buffer, lr=lr, batch_size=batch_size)

    with h5py.File(output_file, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 19
0
def main():
    # PLACEHOLDER VARIABLES (CHANGE BEFORE TRAINING MODEL)
    learning_agent_filename = h5py.File("")
    experience_files = ["", ""]
    learning_rate = 0.0001
    clipnorm = 0.5
    batchsize = 1024
    updated_agent_filename = "something"
    #
    learning_agent = agent.load_policy_agent(
        h5py.File(learning_agent_filename))  # 10.7
    for exp_filename in experience_files:
        exp_buffer = rl.load_experience(h5py.File(exp_filename))
        learning_agent.train(exp_buffer,
                             lr=learning_rate,
                             clipnorm=clipnorm,
                             batch_size=batchsize)
    with h5py.File(updated_agent_filename, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Ejemplo n.º 20
0
def main():
    workdir = '//home/nail//Code_Go//checkpoints//'
    os.chdir(workdir)
    bind_address = '127.0.0.1'
    port = 5000
    predict_agent, pg_agent, q_agent, ac_agent = '', '', '', ''
    agent_type = input('Агент(pg/predict/q/ac = ').lower()
    if agent_type == 'pg':
        pg_agent = input(
            'Введите имя файла для игры с ботом политика градиентов =')
        pg_agent = workdir + pg_agent + '.h5'
    if agent_type == 'predict':
        predict_agent = input(
            'Введите имя файла для игры с ботом предсказания хода =')
        predict_agent = workdir + predict_agent + '.h5'
    if agent_type == 'q':
        q_agent = input(
            'Введите имя файла для игры с ботом ценность действия =')
        q_agent = workdir + q_agent + '.h5'
    if agent_type == 'ac':
        ac_agent = input('Введите имя файла для игры с ботом актор-критик =')
        ac_agent = workdir + ac_agent + '.h5'

    bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)}
    if agent_type == 'pg':
        bots['pg'] = agent.load_policy_agent(h5py.File(pg_agent, 'r'))
    if agent_type == 'predict':
        bots['predict'] = agent.load_prediction_agent(
            h5py.File(predict_agent, 'r'))
    if agent_type == 'q':
        q_bot = rl.load_q_agent(h5py.File(q_agent, 'r'))
        q_bot.set_temperature(0.01)
        bots['q'] = q_bot
    if agent_type == 'ac':
        ac_bot = rl.load_ac_agent(h5py.File(ac_agent, 'r'))
        ac_bot.set_temperature(0.05)
        bots['ac'] = ac_bot

    web_app = httpfrontend.get_web_app(bots)
    web_app.run(host=bind_address, port=port, threaded=False)
Ejemplo n.º 21
0
from dlgo import rl
from dlgo import scoring
from dlgo import goboard_fast as goboard
from dlgo.goboard_fast import Move
from dlgo.gotypes import Player, Point
from dlgo.utils import print_board, print_move

from dlgo import agent
from dlgo.agent.predict import DeepLearningAgent, load_prediction_agent
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent

# Load policy agent and value agent
fast_policy = load_prediction_agent(
    h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r'))
strong_policy = load_policy_agent(
    h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r'))
value = load_value_agent(
    h5py.File('models/AlphaGo/alphago_valuev1-0-1.h5', 'r'))

# Create AlphaGo MCTS agent based on the policy agent and the value agent
alphago = AlphaGoMCTS(strong_policy,
                      fast_policy,
                      value,
                      depth=10,
                      rollout_limit=50,
                      num_simulations=100)

# Test duration for selecting a move
game_state = goboard.GameState.new_game(19)
start_time = time.time()
next_move = alphago.select_move(game_state)
Ejemplo n.º 22
0
# tag::run_alphago[]
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent
import h5py

fast_policy = load_prediction_agent(h5py.File('alphago_sl_policy.h5', 'r'))
strong_policy = load_policy_agent(h5py.File('alphago_rl_policy.h5', 'r'))
value = load_value_agent(h5py.File('alphago_value.h5', 'r'))

alphago = AlphaGoMCTS(strong_policy, fast_policy, value)
# end::run_alphago[]

# TODO: register in frontend
Ejemplo n.º 23
0
def main():

    learning_agent = input("Бот: ")
    temperature = float(input('Температура = '))
    game_log = input('game_log: ')
    experience_out = input('experience_out: ')
    num_games = int(input('Количество игр = '))
    try:
        chunk_size = int(input('Количество игр в "порции" ='))
    except:
        chunk_size = 100

    pth = '//media//nail//SSD_Disk//Experience//'
    learning_agent = '//media//nail//SSD_Disk//Models//' + learning_agent + '.h5'
    game_log = pth + game_log + '_' + str(num_games)
    experience_out = pth + experience_out + '_' + str(num_games) + '_'  #+'.h5'

    #args = parser.parse_args()
    # ==================================================
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    # ==================================================

    agent1 = agent.load_policy_agent(h5py.File(learning_agent, "r"))
    agent2 = agent.load_policy_agent(h5py.File(learning_agent, "r"))
    agent1.set_temperature(temperature)
    agent2.set_temperature(temperature)

    k = 0
    j = 0
    for i in range(num_games + 1):
        if j == 0:
            game_log_out = game_log + '_' + str((k + 1) * chunk_size) + ".txt"
            logf = open(game_log_out, 'a')
            logf.write('Начало игр в  %s\n' %
                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), ))
            logf.write(
                str((k + 1) * chunk_size) + ' из количества игр: ' +
                str(num_games) + '\n')
            print('Моделируемая игра %d/%d...' % (i + 1, num_games))
            collector1 = rl.ExperienceCollector()
            collector2 = rl.ExperienceCollector()

            color1 = Player.black
        j += 1
        collector1.begin_episode()
        agent1.set_collector(collector1)
        collector2.begin_episode()
        agent2.set_collector(collector2)

        if color1 == Player.black:
            black_player, white_player = agent1, agent2
        else:
            white_player, black_player = agent2, agent1
        game_record = simulate_game(black_player, white_player)
        print(" № игры : ", i + 1)
        if game_record.winner == color1:
            print('Агент 1 выигрывает.')
            collector1.complete_episode(reward=1)
            collector2.complete_episode(reward=-1)
        else:
            print('Агент 2 выигрывает.')
            collector2.complete_episode(reward=1)
            collector1.complete_episode(reward=-1)
        color1 = color1.other

        if i >= chunk_size and i % chunk_size == 0:

            experience = rl.combine_experience([collector1, collector2])
            experience_out_file = experience_out + str(
                (k + 1) * chunk_size) + ".h5"
            logf.write('Сохранение буфера в файл %s\n' % experience_out_file)
            logf.write('Завершение игр %s\n' %
                       (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), ))
            logf.close()
            with h5py.File(experience_out_file, 'w') as experience_outf:
                experience.serialize(experience_outf)
            print('Записано игр: ', (k + 1) * chunk_size, ' из ', num_games,
                  ' игр.')
            k += 1
            j = 0
Ejemplo n.º 24
0
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent
import h5py

fast_policy = load_prediction_agent(h5py.File('agents/GHGHbot1_sl_policy.h5', 'r'))
strong_policy = load_policy_agent(h5py.File('agents/GHGHbot1_rl_policy.h5', 'r'))
value = load_value_agent(h5py.File('agents/GHGHbot1_value.h5', 'r'))

alphago = AlphaGoMCTS(strong_policy, fast_policy, value)


# TODO: register in frontend