def generate_game(board_size, rounds, max_moves, temperature):
    boards, moves = [], []  

    encoder = get_encoder_by_name('oneplane', board_size)  

    game = goboard.GameState.new_game(board_size)  

    bot = mcts.MCTSAgent(rounds, temperature)  

    num_moves = 0
    while not game.is_over():
        print_board(game.board)
        move = bot.select_move(game)  
        if move.is_play:
            boards.append(encoder.encode(game))  

            move_one_hot = np.zeros(encoder.num_points())
            move_one_hot[encoder.encode_point(move.point)] = 1
            moves.append(move_one_hot)  

        print_move(game.next_player, move)
        game = game.apply_move(move)  
        num_moves += 1
        if num_moves > max_moves:  
            break

    return np.array(boards), np.array(moves)  # <10>
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--network', default='large')
    parser.add_argument('--hidden-size', type=int, default=512)
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('simple', args.board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')

    processed_board = board_input
    network = getattr(dlgo.networks, args.network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    policy_hidden_layer = Dense(args.hidden_size,
                                activation='relu')(processed_board)
    policy_output = Dense(encoder.num_points(),
                          activation='softmax')(policy_hidden_layer)

    value_hidden_layer = Dense(args.hidden_size,
                               activation='relu')(processed_board)
    value_output = Dense(1, activation='tanh')(value_hidden_layer)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    new_agent = rl.ACAgent(model, encoder)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #3
0
def main():

    workdir = r'/media/nail/SSD_Disk/Models/'

    board_size = 19
    network = 'large'
    hidden_size = 512
    output_file = workdir + 'ac_agent.h5'

    encoder = encoders.get_encoder_by_name('simple', board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')

    processed_board = board_input
    network = getattr(dlgo.networks, network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    policy_hidden_layer = Dense(hidden_size,
                                activation='relu')(processed_board)
    policy_output = Dense(encoder.num_points(),
                          activation='softmax')(policy_hidden_layer)

    value_hidden_layer = Dense(hidden_size, activation='relu')(processed_board)
    value_output = Dense(1, activation='tanh')(value_hidden_layer)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    new_agent = rl.ACAgent(model, encoder)
    with h5py.File(output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--network', default='large')
    parser.add_argument('--hidden-size', type=int, default=512)
    parser.add_argument('--output_file', '-o', type=str, default='./agents/q_agent_0.h5')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('simple', args.board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    action_input = Input(shape=(encoder.num_points(),), name='action_input')

    processed_board = board_input
    network = getattr(dlgo.networks, args.network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    board_plus_action = concatenate([action_input, processed_board])
    hidden_layer = Dense(args.hidden_size, activation='relu')(board_plus_action)
    value_output = Dense(1, activation='sigmoid')(hidden_layer)

    model = Model(inputs=[board_input, action_input], outputs=value_output)

    new_agent = rl.QAgent(model, encoder)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--output-file', required=True)
    args = parser.parse_args()

    board_size = args.board_size
    output_file = args.output_file

    encoder = encoders.get_encoder_by_name('simple', board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')
    action_input = Input(shape=(encoder.num_points(), ), name='action_input')

    conv1a = ZeroPadding2D((2, 2))(board_input)
    conv1b = Conv2D(64, (5, 5), activation='relu')(conv1a)

    conv2a = ZeroPadding2D((1, 1))(conv1b)
    conv2b = Conv2D(64, (3, 3), activation='relu')(conv2a)

    flat = Flatten()(conv2b)
    processed_board = Dense(512)(flat)

    board_and_action = concatenate([action_input, processed_board])
    hidden_layer = Dense(256, activation='relu')(board_and_action)
    value_output = Dense(1, activation='tanh')(hidden_layer)

    model = Model(inputs=[board_input, action_input], outputs=value_output)

    new_agent = rl.QAgent(model, encoder)
    with h5py.File(output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #6
0
def generate_games(board_size, rounds, max_moves, temperature):
    boards, moves = [], [
    ]  #In boards you store encoded board state; moves is for encoded moves

    encoder = get_encoder_by_name(
        'oneplane', board_size
    )  #Initialize a OnePlaneEncoder by name with given board size

    game = goboard.GameState.new_game(
        board_size)  #A new game of size board_size is instantiated

    bot = mcts.MCTSAgent(rounds, temperature)

    num_moves = 0
    while not game.is_over():
        print_board(game.board)
        move = bot.select_move(game)
        if move.is_play:
            boards.append(encoder.encode(game))

            move_one_hot = np.zeros(encoder.num_point())
            move_ont_hot[encoder.encode_point(move.point)] = 1
            moves.append(move_one_hot)

        print_move(game.next_player, move)
        game = game.apply_move(move)
        num_moves += 1
        if num_moves > max_moves:
            break

    return np.array(boards), np.array(moves)
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--output-file')
    args = parser.parse_args()

    board_size = args.board_size
    output_file = args.output_file

    encoder = encoders.get_encoder_by_name('simple', board_size)

    board_input = Input(shape=encoder.shape(), name='board_input')

    conv1 = Conv2D(64, (3, 3), padding='same', activation='relu')(board_input)
    conv2 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv1)
    conv3 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv2)

    flat = Flatten()(conv3)
    processed_board = Dense(512)(flat)

    policy_hidden_layer = Dense(512, activation='relu')(processed_board)
    policy_ouput = Dense(encoder.num_points(),
                         activation='softmax')(policy_hidden_layer)

    value_hidden_layer = Dense(512, activation='relu')(processed_board)
    value_output = Dense(1, activation='tanh')(value_hidden_layer)

    model = Model(inputs=board_input, outputs=[policy_ouput, value_output])

    new_agent = rl.ACAgent(model, encoder)
    with h5py.File(output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #8
0
def create_v_model(pth="//home//nail//Code_Go//checkpoints//",
                   board_size=19,
                   network='large',
                   hidden_size=512,
                   lr=0.01):
    output_file = pth + 'v_model' + '.h5'

    encoder = encoders.get_encoder_by_name('simple', board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    action_input = Input(shape=(encoder.num_points(), ), name='action_input')

    processed_board = board_input
    network = getattr(dlgo.networks, network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    board_plus_action = concatenate([action_input, processed_board])
    hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action)
    value_output = Dense(1, activation='sigmoid')(hidden_layer)

    model = Model(inputs=[board_input, action_input], outputs=value_output)
    opt = SGD(lr=lr)
    model.compile(loss='mse', optimizer=opt)
    new_agent = rl.ValueAgent(model, encoder)
    with h5py.File(output_file, 'w') as outf:
        new_agent.serialize(outf)
    return new_agent
Beispiel #9
0
def load_policy_agent(h5file):
    model = kerasutil.load_model_from_hdf5_group(h5file['model'])
    encoder_name = h5file['encoder'].attrs['name']
    board_width = h5file['encoder'].attrs['board_width']
    board_height = h5file['encoder'].attrs['board_height']
    encoder = encoders.get_encoder_by_name(encoder_name,
                                           (board_width, board_height))
    return PolicyAgent(model, encoder)
Beispiel #10
0
def load_prediction_agent(h5file):
    model = kerasutil.load_model_from_hdf5_group(h5file['model'])
    encoder_name = h5file['encoder'].attrs['name']
    if not isinstance(encoder_name, str):
        encoder_name = encoder_name.decode('ascii')
    board_width = h5file['encoder'].attrs['board_width']
    board_height = h5file['encoder'].attrs['board_height']
    encoder = encoders.get_encoder_by_name(encoder_name, (board_width, board_height))
    return DeepLearningAgent(model, encoder)
Beispiel #11
0
def load_policy_agent(h5file):
    model = kerasutil.load_model_from_hdf5_group(
        h5file['model'],
        custom_objects={'policy_gradient_loss': policy_gradient_loss})
    encoder_name = h5file['encoder'].attrs['name']
    if not isinstance(encoder_name, str):
        encoder_name = encoder_name.decode('ascii')
    board_width = h5file['encoder'].attrs['board_width']
    board_height = h5file['encoder'].attrs['board_height']
    encoder = encoders.get_encoder_by_name(encoder_name,
                                           (board_width, board_height))
    return PolicyAgent(model, encoder)
Beispiel #12
0
def load_policy_agent(h5file):
    model = kerasutil.load_model_from_hdf5_group(
        h5file['model']
    )  # Uses built in Keras functions to load the model structure and weights
    encoder_name = h5file['encoder'].attrs[
        'name']  # Recovers the board encoder
    board_width = h5file['encoder'].attrs['board_width']
    board_height = h5file['encoder'].attrs['board_height']
    if type(encoder_name) == bytes:
        encoder_name = encoder_name.decode()
    encoder = encoders.get_encoder_by_name(encoder_name,
                                           (board_width, board_height))
    return PolicyAgent(model, encoder)  # Reconstructs the agent
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    #     parser.add_argument('--network', default='large')
    #     parser.add_argument('--hidden-size', type=int, default=512)
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('zero', args.board_size)
    model = networks.dual_residual_network(input_shape=encoder.shape(),
                                           blocks=8)
    model.summary()

    new_agent = zero.ZeroAgent(model, encoder, rounds_per_move=1000, c=2.0)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('simple', args.board_size)
    model = Sequential()
    for layer in dlgo.networks.leaky.layers(encoder.shape()):
        model.add(layer)
    model.add(Dense(encoder.num_points()))
    model.add(Activation('softmax'))
    opt = SGD(lr=0.02)
    model.compile(loss=agent.policy_gradient_loss, optimizer=opt)

    new_agent = agent.PolicyAgent(model, encoder)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #15
0
def generate_game(board_size, rounds, max_moves, temperature):
    # boardsにはエンコードされた盤の状態が格納され、movesにはエンコードされた着手が格納される
    boards, moves = [], []  # <1>

    # OnePlaneEncoderを指定された盤のサイズで初期化する
    encoder = get_encoder_by_name('oneplane', board_size)  # <2>

    # サイズboard_sizeの新しいゲームがインスタンス化される
    game = goboard.GameState.new_game(board_size)  # <3>

    # ラウンド数と温度が指定されたモンテカルロ木探索エージェントがボットになる
    bot = mcts.MCTSAgent(rounds, temperature)  # <4>

    num_moves = 0
    while not game.is_over():
        print_board(game.board)

        # 次の着手がボットによって選択される
        move = bot.select_move(game)  # <5>
        if move.is_play:
            # エンコードされた盤の状態がboardsに追加される
            boards.append(encoder.encode(game))  # <6>

            move_one_hot = np.zeros(encoder.num_points())
            move_one_hot[encoder.encode_point(move.point)] = 1

            # one-hotエンコードされた次の着手がmovesに追加される
            moves.append(move_one_hot)  # <7>

        print_move(game.next_player, move)

        # その後、ボットの着手が盤に適用される
        game = game.apply_move(move)  # <8>
        num_moves += 1

        # 最大手数に達していない限り、次の手番を続ける
        if num_moves > max_moves:  # <9>
            break

    return np.array(boards), np.array(moves)  # <10>
def generate_game(board_size, rounds, max_moves, temperature):
    # initialize encoded board state and encoded moves
    boards, moves = [], []

    # initialize a OnePlaneEncoder by name with given board size
    encoder = get_encoder_by_name('oneplane', board_size)

    # Instantiate a new game with board_size
    game = goboard.GameState.new_game(board_size)

    # MCTS agent bot with specified rounds and temp
    bot = mcts.MCTSAgent(rounds, temperature)

    num_moves = 0
    while not game.is_over():
        print_board(game.board)

        # bot picks next move
        move = bot.select_move(game)
        if move.is_play:
            # append encoded board to board
            boards.append(encoder.encode(game))

            # The one-hot-encoded next move is appended to moves
            move_one_hot = np.zeros(encoder.num_points())
            move_one_hot[encoder.encode_point(move.point)] = 1
            moves.append(move_one_hot)

        # apply bots move to the board
        print_move(game.next_player, move)
        game = game.apply_move(move)
        num_moves += 1

        # keep going until max number of moves is reached.
        if num_moves > max_moves:
            break

    return np.array(boards), np.array(moves)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--network', default='large')
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('simple', args.board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')
    action_input = Input(shape=(encoder.num_points(), ), name='action_input')

    processed_board = board_input
    network = getattr(dlgo.networks, args.network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    value_output = Dense(1, activation='sigmoid')(processed_board)

    model = Model(inputs=board_input, outputs=value_output)

    new_agent = rl.ValueAgent(model, encoder)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)
Beispiel #18
0
def main():
    pth = "//home//nail//Code_Go//checkpoints//"
    pth_experience = '//home//nail//Experience//'
    board_size = 19
    network = 'small'
    hidden_size = 512
    learning_agent = input('Агент для обучения "ценность действия": ')
    num_games = int(input('Количество игр для формирования учебных данных = '))
    delta_games = int(input('Приращение количества игр = '))
    learning_agent = pth + learning_agent + '.h5'  # Это агент либо от политики градиентов(глава 10),либо из главы 7"
    output_file = pth + 'new_value_model.h5'  # Это будет уже агент с двумя входами для ценности действия
    current_agent = pth + 'current_model.h5'  # Текущий обучаемый агент
    lr = 0.0001
    temp_decay = 0.98
    min_temp = 0.00001
    try:
        temperature = float(input('Temperature = '))
    except:
        temperature = min_temp
    try:
        batch_size = int(input("batch_size = "))
    except:
        batch_size = 512
    try:
        epochs = int(input('Epochs = '))
    except:
        epochs = 1

    log_file = input('Журнал обработки: ')
    log_file = pth_experience + log_file + '.txt'

    logf = open(log_file, 'a')
    logf.write('----------------------\n')
    logf.write('Начало обучения агента %s в %s\n' %
               (learning_agent, datetime.datetime.now()))

    # Строится модель для обучения ценность действия
    # Два входа, один выход.
    # Компиляция модели если еще нет модели для ценности действия.
    # Иначе загружаем уже существующую модель.

    if 'value_model' in learning_agent and os.path.isfile(learning_agent):
        New_QAgent = False  # Модель уже есть, надо продолжить обучение
        encoder = ''  # Чтобы не было предупреждений о возможной ошибке в коде ниже.
        model = ''
    else:
        # Еще только надо создать модель для обучения
        # Нет модели с двумя входами.
        New_QAgent = True
        encoder = encoders.get_encoder_by_name('simple', board_size)
        board_input = Input(shape=encoder.shape(), name='board_input')
        action_input = Input(shape=(encoder.num_points(), ),
                             name='action_input')

        processed_board = board_input
        network = getattr(dlgo.networks, network)
        for layer in network.layers(encoder.shape()):
            processed_board = layer(processed_board)

        board_plus_action = concatenate([action_input, processed_board])
        hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action)
        value_output = Dense(1, activation='sigmoid')(hidden_layer)

        model = Model(inputs=[board_input, action_input], outputs=value_output)
        opt = SGD(lr=lr)
        model.compile(loss='mse', optimizer=opt)


# "Заполнение" данными модели обучения из игр

    experience = []
    os.chdir(pth_experience)
    lst_files = os.listdir(pth_experience)
    pattern = input('Паттерн для выборки файлов для обучения: ')
    if len(pattern) == 0:
        pattern = "exp*.h5"
    #==============================================================
    # Формируем список файлов с экспериментальными игровыми данными
    for entry in lst_files:
        if fnmatch.fnmatch(entry, pattern):
            experience.append(entry)
    # Получили список файлов игр для обучения
    exp_filename = ''
    if len(experience) > 0:
        experience.sort()
        exp_filename = experience[0]  # Нужен только один файл
    else:
        print(' Нет файлов в папке для обучения!!!!')
        exit(2)

    #==============================================================
    # callback_list = [ModelCheckpoint(pth, monitor='val_accuracy',
    #                                  save_best_only=True)]

    total_work = 0  # Счетчик "прогонов" обучения.
    exp_buffer = 'empty'  # Буфер с игровыми данными
    while True:  # Можно всегда прервать обучение и потом продолжть снова.
        if New_QAgent == False:
            q_agent = load_agent(
                learning_agent)  # Текущая обучаемая модель cуществует
            model = q_agent.model  # загружаем модель
            encoder = q_agent.encoder
            #temperature = q_agent.temperature

        logf.write('Прогон = %d\n' % total_work)
        print(50 * '=')
        print('Файл  с играми для обучения: %s...' % exp_filename)
        print(50 * '=')
        if exp_buffer == 'empty':
            exp_buffer = rl.load_experience(h5py.File(exp_filename, "r"))
        # Заполняем данными для обучения из считанного буфера с играми  скомпилированную модель.
        n = exp_buffer.states.shape[0]
        num_moves = encoder.num_points()
        y = np.zeros((n, ))
        actions = np.zeros((n, num_moves))
        for i in range(n):
            action = exp_buffer.actions[i]
            reward = exp_buffer.rewards[i]
            actions[i][action] = 1
            y[i] = 1 if reward > 0 else -1  # было 0
        # Обучение модели
        model.fit([exp_buffer.states, actions],
                  y,
                  batch_size=batch_size,
                  epochs=epochs)

        if total_work == 0:  # Нового обученного агента для сравнения еще нет.
            print('Обновление агента!!!!! Это первый обновленный агент.')
            logf.write('Первое начальное обновление обученного агента\n')
            # Сохраняем обученного агента
            #output_file = output_file + '_' + str(total_work) + '.h5'
            new_agent = rl.QAgent(model, encoder)
            with h5py.File(current_agent, 'w') as outf:
                new_agent.serialize(outf)

            # os.chdir(pth_experience)
            #
            # lst_files = os.listdir(pth_experience)
            # next_filename = 'exp_q_' + str(total_work) + '.h5'
            # for entry in lst_files:
            #     if fnmatch.fnmatch(entry, "exp*"):
            #         shutil.move(exp_filename, pth_experience + 'Exp_Save//' + next_filename)
            #         #os.remove('//home//nail//Experience//'+entry)  # Очистка каталога с данными игр "старого" агента
            # # Формируем новые игровые данные с новым агентом.
            # exp_filename = pth_experience+next_filename
            # do_self_play(19, output_file, output_file, num_games=num_games,
            #              temperature=temperature, experience_filename=exp_filename)
            # total_work += 1
            if New_QAgent == True:  # Не было еще агента с двумя входами.
                total_work += 1
                New_QAgent = False  # Теперь есть сохраненная модельс двумя входами.
                learning_agent = current_agent  # Обучать будем нового созданного с двумя входами.
                new_agent = rl.QAgent(model, encoder)
                with h5py.File(current_agent,
                               'w') as outf:  # Сохраняем агента как текущего
                    new_agent.serialize(outf)
                continue  # Сравнивать пока не с чем. Старые игровые данные оставляем

        new_agent = rl.QAgent(model, encoder)
        with h5py.File(current_agent,
                       'w') as outf:  # Сохраняем агента как текущего
            new_agent.serialize(outf)

        # Сравниваем результат игры нового текущего агента с "старым" агентом.
        wins = eval(current_agent, learning_agent, num_games=num_games)
        print('Выиграно %d / %s игр (%.3f)' %
              (wins, str(num_games), float(wins) / float(num_games)))
        logf.write('Выиграно %d / %s игр (%.3f)\n' %
                   (wins, str(num_games), float(wins) / float(num_games)))
        bt = binom_test(wins, num_games, 0.5) * 100
        print('Бином тест = ', bt, '%')
        logf.write('Бином тест = %f\n' % bt)
        if bt <= 5 and wins > num_games / 2 + num_games / 10:  # Означает не меньше чем 95% за то что новый бот играет лучше предыдущего
            print('Обновление агента!!!!!')
            # Сохраняем обученного агента
            new_agent = rl.QAgent(model, encoder)
            with h5py.File(output_file, 'w') as outf:
                new_agent.serialize(outf)

            logf.write(
                'Выполнено обновление агента после успешного обучения %d  время  %s\n'
                % (total_work, datetime.datetime.now()))
            logf.write('Новый агент : %s\n' % output_file)

            #os.remove('//home//nail//Experience//*')  # Очистка каталога с данными игр "старого" агента
            next_filename = 'exp_q_' + str(total_work) + '.h5'
            shutil.move(exp_filename,
                        pth_experience + 'Exp_Save//' + next_filename)
            # Формируем новые игровые данные с новым агентом.
            exp_filename = pth_experience + next_filename
            temperature = max(min_temp, temp_decay * temperature)
            do_self_play(19,
                         output_file,
                         output_file,
                         num_games=num_games,
                         temperature=temperature,
                         experience_filename=exp_filename)

            logf.write('Новая "температура" = %f\n' % temperature)
        else:
            print(
                'Агента не меняем, Игровые данные тоже оставляем прежними \n')

        total_work += 1
        print('Количество выполненных прогонов = ', total_work)
        logf.write('Выполнен прогон %d  время at %s\n' %
                   (total_work, datetime.datetime.now()))
        # Новая генерация учебных данных.
        # num_games += delta_games  # Увеличиваем количество игр для обучения.
        # #temperature = max(min_temp, temp_decay * temperature)
        # next_filename = 'exp_q_' + str(total_work) + '.h5'
        # shutil.move(exp_filename, pth_experience + 'Exp_Save//' + next_filename)
        # exp_filename = pth_experience + next_filename
        # do_self_play(19, current_agent, current_agent, num_games=num_games,
        #              temperature=0, experience_filename=exp_filename)
        #
        #
        # exp_buffer = rl.load_experience(h5py.File(exp_filename, "r"))  # Загружаем в буфер новый файл с играми.
        learning_agent = current_agent  # Обновляем "предыщуго обучаемого агента
        logf.flush()
Beispiel #19
0
def main():

    board_size = 19
    hidden_size = 1024
    workdir = '//media//nail//SSD_Disk//Models//'
    output_file = workdir + 'q_agent.h5'
    lr = 0.01
    batch_size = 512

    pth = "//media//nail//SSD_Disk//Models//"
    pth_experience = '//media//nail//SSD_Disk//Experience//'
    experience_filename = pth_experience+'exp'

    only_form_games = input('Только сформировать новые игровые данные с  существующей моделью?(Y/N)').lower()

    # "Заполнение" данными модели обучения из игр
    experience = []
    os.chdir(pth_experience)
    lst_files = os.listdir(pth_experience)
    pattern = input('Паттерн для выборки файлов для обучения: ')
    if len(pattern) == 0:
        pattern = "exp*.h5"

    if only_form_games != 'y':
        # ==============================================================
        # Формируем список файлов с экспериментальными игровыми данными
        new_form_games = input('Формировать новые игровые данные с созаданной моделью?(Y/N) ').lower()

        count_exp = int(input('Сколько взять файлов для первичного обучения ? '))
        len_lst_files = len(lst_files)
        if count_exp<= len_lst_files:
            for entry in lst_files[:count_exp]:
                if fnmatch.fnmatch(entry, pattern):
                    experience.append(entry)
        else:
            for entry in lst_files:
                if fnmatch.fnmatch(entry, pattern):
                    experience.append(entry)
        # Получили список файлов игр для обучения
        # Сортировка для удобства файлов.
        if len(experience) > 0:
            experience.sort()

        else:
            print(' Нет файлов в папке для обучения!!!!')

            exit(2)

        encoder = encoders.get_encoder_by_name('simple', board_size)
        board_input = Input(shape=encoder.shape(), name='board_input')
        action_input = Input(shape=(encoder.num_points(),), name='action_input')

        # =============================================================
        # Сеть такая же частично как  large оригинальня авторов книги
        # =============================================================

        conv_0a = Conv2D(64, (7, 7), padding='same', activation='relu')(board_input)
       # BatchNormalization()

        conv_1a = Conv2D(64, (5, 5), padding='same', activation='relu')(conv_0a)
        #BatchNormalization()

        conv_2a = Conv2D(48, (5, 5), padding='same', activation='relu')(conv_1a)
        #BatchNormalization()

        conv_3a = Conv2D(48, (5, 5), padding='same', activation='relu')(conv_2a)
        #BatchNormalization()

        conv_4a = Conv2D(32, (5, 5), padding='same',activation='relu')(conv_3a)
        #BatchNormalization()

        flat = Flatten()(conv_4a)

        processed_board = Dense(1024)(flat)


        board_plus_action = concatenate([action_input, processed_board])
        hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action)
        value_output = Dense(1, activation='sigmoid')(hidden_layer)

        model = Model(inputs=[board_input, action_input], outputs=value_output)
        opt = SGD(lr=lr)
        model.compile(loss='mse', optimizer=opt)

         # Обучение модели fit_generator
        model.fit_generator(
                generator=generator_q(experience=experience, num_moves=361, batch_size=batch_size),
                steps_per_epoch=get_num_samples(experience=experience, num_moves=361, batch_size=batch_size) / batch_size,
                verbose=1,
                epochs=1,
                initial_epoch=0)
            # Прошлись по всем файлам

        new_agent = rl.QAgent(model, encoder)
        with h5py.File(output_file, 'w') as outf:
            new_agent.serialize(outf)

        if new_form_games == 'y':
            # Формируем список файлов с экспериментальными игровыми данными c новым впервые обученным агентом с двумя входами.
            num_games = int(input('Количество игр для генерации = '))
            chunk = int(input('Количество игр в одном файле-порции = '))
            experience = []
            os.chdir(pth_experience)
            lst_files = os.listdir(pth_experience)
            for entry in lst_files:
                #if fnmatch.fnmatch(entry, 'exp*'): журналы тоже в папку сохранения, чистка всего.
                if os.path.isfile(entry) == True:
                    experience.append(entry)
            for filename in experience:
                shutil.move(filename, pth_experience + 'Exp_Save//' + filename)

            do_self_play(19,output_file,output_file,num_games=num_games,temperature=0,
                         experience_filename=experience_filename,chunk=chunk)
    else:
        model_file = input('Файл с существующей моделью = ')
        num_games = int(input('Количество игр для генерации = '))
        chunk = int(input('Количество игр в одном файле-порции = '))
        model_file = workdir + model_file + '.h5'
        do_self_play(19, model_file, model_file, num_games=num_games, temperature=0,
                         experience_filename=experience_filename, chunk=chunk)
Beispiel #20
0
from keras.models import Model
from keras.layers import Conv2D, Dense, Flatten, Input

from dlgo import encoders

board_size = 19
encoder = encoders.get_encoder_by_name('simple', board_size)
board_input = Input(shape=encoder.shape(), name='board_input')

conv1 = Conv2D(64, (3, 3), padding='same', activation='relu')(board_input)
conv2 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv1)
conv3 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv2)

flat = Flatten()(conv3)
processed_board = Dense(512)(flat)

policy_hidden_layer = Dense(512, activation='relu')(processed_board)
policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer)

value_hidden_layer = Dense(512, activation='relu')(processed_board)
value_output = Dense(1, activation='tanh')(value_hidden_layer)
model = Model(inputs=board_input, outputs=[policy_output, value_output])
def main():
    """
    board_input = Input(shape=encoder.shape(), name='board_input')

    # Add as many convolutional layers as you like
    conv1 = Conv2D(64, (3, 3),
                   padding='same',
                   activation='relu')(board_input)
    conv2 = Conv2D(64, (3, 3),
                   padding='same',
                   activation='relu')(conv1)
    conv3 = Conv2D(64, (3, 3),
                   padding='same',
                   activation='relu')(conv2)

    flat = Flatten()(conv3)
    # This example uses hidden layers of size 512.
    # Experiment to find the best size.
    # The three hidden layers don't need to be the same size
    processed_board = Dense(512)(flat)

    # This output yields the policy function
    policy_hidden_layer = Dense(512, activation='relu')(processed_board)
    policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer)

    # This output yields the value function
    value_hidden_layer = Dense(512, activation='relu')(processed_board)
    value_output = Dense(1, activation='tanh')(value_hidden_layer)

    model = Model(inputs=board_input,
                  outputs=[policy_output, value_output])
    """
    # added from gh repo
    parser = argparse.ArgumentParser()
    parser.add_argument('--board-size', type=int, default=19)
    parser.add_argument('--network', default='large')
    parser.add_argument('--hidden-size', type=int, default=512)
    parser.add_argument('output_file')
    args = parser.parse_args()

    encoder = encoders.get_encoder_by_name('sevenplane', args.board_size)
    board_input = Input(shape=encoder.shape(), name='board_input')

    processed_board = board_input
    network = getattr(dlgo.networks, args.network)
    for layer in network.layers(encoder.shape()):
        processed_board = layer(processed_board)

    policy_hidden_layer = Dense(args.hidden_size,
                                activation='relu')(processed_board)
    policy_output = Dense(encoder.num_points(),
                          activation='softmax')(policy_hidden_layer)

    value_hidden_layer = Dense(args.hidden_size,
                               activation='relu')(processed_board)
    value_output = Dense(1, activation='tanh')(value_hidden_layer)

    model = Model(inputs=[board_input], outputs=[policy_output, value_output])

    new_agent = rl.ACAgent(model, encoder)
    with h5py.File(args.output_file, 'w') as outf:
        new_agent.serialize(outf)