def test_4_alphago_mcts(self): print("TEST 4\n=====================================================") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_soft_device_placement(True) except RuntimeError as e: print(e) fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def test_4_alphago_mcts(self): fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def load_agent(filename): with h5py.File(filename, 'r') as h5file: return rl.load_value_agent(h5file)
# tag::run_alphago[] from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent import h5py fast_policy = load_prediction_agent(h5py.File('alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent(h5py.File('alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value) # end::run_alphago[] # TODO: register in frontend
def main(): pth = '//home//nail//Code_Go//checkpoints//' pth_experience = '//home//nail//Experience//' experience = [] os.chdir(pth_experience) lst_files = os.listdir(pth_experience) pattern = input('Паттерн для выборки файлов для обучения: ') if len(pattern) == 0: pattern = "exp*.h5" for entry in lst_files: if fnmatch.fnmatch(entry, pattern): experience.append(entry) experience.sort() learning_agent = input('learning_agent:') learning_agent = pth + learning_agent + '.h5' print('learning_agent: ', learning_agent) agent_out = input('agent_out:') agent_out = pth + agent_out + '.h5' board_size = 19 print('agent_out: ', agent_out) try: lr = float(input('lr = ')) except: lr = 0.000001 try: bs = int(input('bs = ')) except: bs = 1024 # ================================================== import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.98 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== encoder = SimpleEncoder((board_size, board_size)) try: h5file = h5py.File(learning_agent, "r") learning_agent = rl.load_value_agent(h5file) model_v = kerasutil.load_model_from_hdf5_group(h5file['model']) except: learning_agent = create_v_model(lr=lr) i = 1 num_files = len(experience) for exp_filename in experience: print(50 * '=') print('Файл для обучения: %s...' % exp_filename) print(50 * '=') exp_buffer = rl.load_experience(h5py.File(exp_filename, "r")) model_v = my_train_v(model_v, encoder, exp_buffer, lr=lr, batch_size=bs) print('Обработано файлов: ', i, ' из ', num_files) i += 1 learning_agent = rl.ValueAgent(model_v, encoder) with h5py.File(agent_out, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent import h5py fast_policy = load_prediction_agent(h5py.File('agents/GHGHbot1_sl_policy.h5', 'r')) strong_policy = load_policy_agent(h5py.File('agents/GHGHbot1_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('agents/GHGHbot1_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value) # TODO: register in frontend
from dlgo import goboard_fast as goboard from dlgo.goboard_fast import Move from dlgo.gotypes import Player, Point from dlgo.utils import print_board, print_move from dlgo import agent from dlgo.agent.predict import DeepLearningAgent, load_prediction_agent from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent # Load policy agent and value agent fast_policy = load_prediction_agent( h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r')) strong_policy = load_policy_agent( h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r')) value = load_value_agent( h5py.File('models/AlphaGo/alphago_valuev1-0-1.h5', 'r')) # Create AlphaGo MCTS agent based on the policy agent and the value agent alphago = AlphaGoMCTS(strong_policy, fast_policy, value, depth=10, rollout_limit=50, num_simulations=100) # Test duration for selecting a move game_state = goboard.GameState.new_game(19) start_time = time.time() next_move = alphago.select_move(game_state) exec_time = time.time() - start_time print(exec_time)