Beispiel #1
0
    def test_4_alphago_mcts(self):

        print("TEST 4\n=====================================================")
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            # Restrict TensorFlow to only use the first GPU
            try:
                tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
                tf.config.experimental.set_memory_growth(gpus[0], True)
                tf.config.set_soft_device_placement(True)
            except RuntimeError as e:
                print(e)

        fast_policy = load_prediction_agent(
            h5py.File('test_alphago_sl_policy.h5', 'r'))
        strong_policy = load_policy_agent(
            h5py.File('test_alphago_rl_policy.h5', 'r'))
        value = load_value_agent(h5py.File('test_alphago_value.h5', 'r'))

        alphago = AlphaGoMCTS(strong_policy,
                              fast_policy,
                              value,
                              num_simulations=20,
                              depth=5,
                              rollout_limit=10)
        start = GameState.new_game(19)
        alphago.select_move(start)
Beispiel #2
0
    def test_4_alphago_mcts(self):
        fast_policy = load_prediction_agent(
            h5py.File('test_alphago_sl_policy.h5', 'r'))
        strong_policy = load_policy_agent(
            h5py.File('test_alphago_rl_policy.h5', 'r'))
        value = load_value_agent(h5py.File('test_alphago_value.h5', 'r'))

        alphago = AlphaGoMCTS(strong_policy,
                              fast_policy,
                              value,
                              num_simulations=20,
                              depth=5,
                              rollout_limit=10)
        start = GameState.new_game(19)
        alphago.select_move(start)
Beispiel #3
0
def load_agent(filename):
    with h5py.File(filename, 'r') as h5file:
        return rl.load_value_agent(h5file)
# tag::run_alphago[]
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent
import h5py

fast_policy = load_prediction_agent(h5py.File('alphago_sl_policy.h5', 'r'))
strong_policy = load_policy_agent(h5py.File('alphago_rl_policy.h5', 'r'))
value = load_value_agent(h5py.File('alphago_value.h5', 'r'))

alphago = AlphaGoMCTS(strong_policy, fast_policy, value)
# end::run_alphago[]

# TODO: register in frontend
Beispiel #5
0
def main():
    pth = '//home//nail//Code_Go//checkpoints//'
    pth_experience = '//home//nail//Experience//'
    experience = []
    os.chdir(pth_experience)
    lst_files = os.listdir(pth_experience)
    pattern = input('Паттерн для выборки файлов для обучения: ')
    if len(pattern) == 0:
        pattern = "exp*.h5"

    for entry in lst_files:
        if fnmatch.fnmatch(entry, pattern):
            experience.append(entry)

    experience.sort()
    learning_agent = input('learning_agent:')
    learning_agent = pth + learning_agent + '.h5'
    print('learning_agent: ', learning_agent)
    agent_out = input('agent_out:')
    agent_out = pth + agent_out + '.h5'
    board_size = 19
    print('agent_out: ', agent_out)
    try:
        lr = float(input('lr = '))
    except:
        lr = 0.000001
    try:
        bs = int(input('bs = '))
    except:
        bs = 1024

    # ==================================================
    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.98
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    # ==================================================
    encoder = SimpleEncoder((board_size, board_size))
    try:
        h5file = h5py.File(learning_agent, "r")
        learning_agent = rl.load_value_agent(h5file)
        model_v = kerasutil.load_model_from_hdf5_group(h5file['model'])

    except:

        learning_agent = create_v_model(lr=lr)
    i = 1
    num_files = len(experience)
    for exp_filename in experience:
        print(50 * '=')
        print('Файл для обучения: %s...' % exp_filename)
        print(50 * '=')
        exp_buffer = rl.load_experience(h5py.File(exp_filename, "r"))
        model_v = my_train_v(model_v,
                             encoder,
                             exp_buffer,
                             lr=lr,
                             batch_size=bs)

        print('Обработано файлов: ', i, ' из ', num_files)
        i += 1

    learning_agent = rl.ValueAgent(model_v, encoder)
    with h5py.File(agent_out, 'w') as updated_agent_outf:
        learning_agent.serialize(updated_agent_outf)
Beispiel #6
0
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent
import h5py

fast_policy = load_prediction_agent(h5py.File('agents/GHGHbot1_sl_policy.h5', 'r'))
strong_policy = load_policy_agent(h5py.File('agents/GHGHbot1_rl_policy.h5', 'r'))
value = load_value_agent(h5py.File('agents/GHGHbot1_value.h5', 'r'))

alphago = AlphaGoMCTS(strong_policy, fast_policy, value)


# TODO: register in frontend
Beispiel #7
0
from dlgo import goboard_fast as goboard
from dlgo.goboard_fast import Move
from dlgo.gotypes import Player, Point
from dlgo.utils import print_board, print_move

from dlgo import agent
from dlgo.agent.predict import DeepLearningAgent, load_prediction_agent
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS
from dlgo.rl import load_value_agent

# Load policy agent and value agent
fast_policy = load_prediction_agent(
    h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r'))
strong_policy = load_policy_agent(
    h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r'))
value = load_value_agent(
    h5py.File('models/AlphaGo/alphago_valuev1-0-1.h5', 'r'))

# Create AlphaGo MCTS agent based on the policy agent and the value agent
alphago = AlphaGoMCTS(strong_policy,
                      fast_policy,
                      value,
                      depth=10,
                      rollout_limit=50,
                      num_simulations=100)

# Test duration for selecting a move
game_state = goboard.GameState.new_game(19)
start_time = time.time()
next_move = alphago.select_move(game_state)
exec_time = time.time() - start_time
print(exec_time)