def main(): parser = argparse.ArgumentParser() parser.add_argument('--agent1', required=True) parser.add_argument('--agent2', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) args = parser.parse_args() agent1 = agent.load_policy_agent(h5py.File(args.agent1)) agent2 = agent.load_policy_agent(h5py.File(args.agent2)) num_games = args.num_games wins = 0 losses = 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player) if game_record.winner == color1: wins += 1 else: losses += 1 color1 = color1.other print('Agent 1 record: %d/%d' % (wins, wins + losses))
def play_games(args): agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) with h5py.File(agent1_fname, 'r') as agent1f: agent1 = agent.load_policy_agent(agent1f) with h5py.File(agent2_fname, 'r') as agent2f: agent2 = agent.load_policy_agent(agent2f) wins, losses = 0, 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins') wins += 1 else: print('Agent 2 wins') losses += 1 print('Agent 1 record: %d/%d' % (wins, wins + losses)) color1 = color1.other return wins, losses
def main(): # 10.8 # PLACEHOLDER VARIABLES (CHANGE BEFORE RUNNING SCRIPT) agent1filepath = "" agent2filepath = "" agent1 = agent.load_policy_agent(h5py.File(agent1filepath)) agent2 = agent.load_policy_agent(h5py.File(agent2filepath)) num_games = 50 # wins = 0 # This script tracks wins and losses from the point of view of agent1 losses = 0 color1 = Player.black # color1 = black, color2 = white for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player) if game_record.winner == color1: wins += 1 else: losses += 1 color1 = color1.other # swap colors after each game, in case either agent plays better depending on color print('Agent 1 record: %d/%d' % (wins, wins + losses))
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('--agent1', required=True) # parser.add_argument('--agent2', required=True) # parser.add_argument('--num-games', '-n', type=int, default=10) # # args = parser.parse_args() # # agent1 = agent.load_policy_agent(h5py.File(args.agent1)) # agent2 = agent.load_policy_agent(h5py.File(args.agent2)) # ================================================== import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== pth = '//home//nail//Code_Go//checkpoints//' num_games = int(input("Количество игр :")) agent1 = input('Игрок(агент) №1:') agent2 = input('Игрок(агент) №2:') agent1 = pth + agent1 + ".h5" agent2 = pth + agent2 + ".h5" agent1 = agent.load_policy_agent(h5py.File(agent1, "r")) agent2 = agent.load_policy_agent(h5py.File(agent2, "r")) wins = 0 losses = 0 color1 = Player.black for i in range(num_games): print('Симуляция игры %d/%d...' % (i + 1, num_games)) # args.num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 print('Агент №1 - играет Черными, Агент №2 - играет Белыми') else: white_player, black_player = agent1, agent2 print('Агент №1 - играет Белыми, Агент №2 - играет Черными') game_record = simulate_game(black_player, white_player) if game_record.winner == color1: wins += 1 else: losses += 1 color1 = color1.other print('Agent 1 record: %d/%d' % (wins, wins + losses))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-agent', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) parser.add_argument('--game-log-out', required=True) parser.add_argument('--experience-out', required=True) parser.add_argument('--temperature', type=float, default=0.0) args = parser.parse_args() agent1 = agent.load_policy_agent(h5py.File(args.learning_agent)) agent2 = agent.load_policy_agent(h5py.File(args.learning_agent)) agent1.set_temperature(args.temperature) agent2.set_temperature(args.temperature) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black logf = open(args.game_log_out, 'a') logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) for i in range(args.num_games): print('Simulating game %d/%d...' % (i + 1, args.num_games)) collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent2, agent1 game_record = simulate_game(black_player, white_player) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Agent 2 wins.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1, collector2]) logf.write('Saving experience buffer to %s\n' % args.experience_out) with h5py.File(args.experience_out, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--bind-address', default='127.0.0.1') parser.add_argument('--port', '-p', type=int, default=5000) parser.add_argument('--pg-agent') parser.add_argument('--predict-agent') parser.add_argument('--q-agent') parser.add_argument('--ac-agent') args = parser.parse_args() bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)} if args.pg_agent: bots['pg'] = agent.load_policy_agent(h5py.File(args.pg_agent)) if args.predict_agent: bots['predict'] = agent.load_prediction_agent( h5py.File(args.predict_agent)) if args.q_agent: q_bot = rl.load_q_agent(h5py.File(args.q_agent)) q_bot.set_temperature(0.01) bots['q'] = q_bot if args.ac_agent: ac_bot = rl.load_ac_agent(h5py.File(args.ac_agent)) ac_bot.set_temperature(0.05) bots['ac'] = ac_bot web_app = httpfrontend.get_web_app(bots) web_app.run(host=args.bind_address, port=args.port, threaded=False)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-agent', required=True) parser.add_argument('--agent-out', required=True) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--clipnorm', type=float, default=1.0) parser.add_argument('--bs', type=int, default=512) parser.add_argument('experience', nargs='+') args = parser.parse_args() learning_agent_filename = args.learning_agent experience_files = args.experience updated_agent_filename = args.agent_out learning_rate = args.lr clipnorm = args.clipnorm batch_size = args.bs learning_agent = agent.load_policy_agent(h5py.File(learning_agent_filename)) for exp_filename in experience_files: exp_buffer = rl.load_experience(h5py.File(exp_filename)) learning_agent.train( exp_buffer, lr=learning_rate, clipnorm=clipnorm, batch_size=batch_size) with h5py.File(updated_agent_filename, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def test_4_alphago_mcts(self): print("TEST 4\n=====================================================") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_soft_device_placement(True) except RuntimeError as e: print(e) fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def do_self_play(board_size, agent1_filename, agent2_filename, num_games, temperature, experience_filename, gpu_frac): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) except RuntimeError as e: print(e) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) with h5py.File(agent1_filename, 'r') as agent1f: agent1 = agent.load_policy_agent(agent1f) agent1.set_temperature(temperature) with h5py.File(agent2_filename, 'r') as agent2f: agent2 = agent.load_policy_agent(agent2f) collector1 = rl.ExperienceCollector() color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) else: print('Agent 2 wins.') collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def train_worker(learning_agent, output_file, experience_file, lr, batch_size): with h5py.File(learning_agent, 'r') as learning_agentf: learning_agent = agent.load_policy_agent(learning_agentf) with h5py.File(experience_file, 'r') as expf: exp_buffer = rl.load_experience(expf) learning_agent.train(exp_buffer, lr=lr, batch_size=batch_size) with h5py.File(output_file, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def do_self_play(board_size, agent_filename, num_games, temperature, experience_filename, gpu_frac): kerasutil.set_gpu_memory_target(gpu_frac) random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent1.set_temperature(temperature) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) agent2.set_temperature(temperature) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Agent 2 wins.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other experience = rl.combine_experience([collector1, collector2]) print('Saving experience buffer to %s\n' % experience_filename) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): pth = '//home//nail//Code_Go//checkpoints//' pth_experience = '//home//nail//Experience//' experience = [] os.chdir(pth_experience) lst_files = os.listdir(pth_experience) pattern = input('Паттерн для выборки файлов для обучения: ') if len(pattern) == 0: pattern = "exp*.h5" for entry in lst_files: if fnmatch.fnmatch(entry, pattern): experience.append(entry) experience.sort() learning_agent = input('learning_agent:') learning_agent = pth + learning_agent+'.h5' print('learning_agent: ', learning_agent) agent_out = input('agent_out:') agent_out = pth + agent_out+'.h5' print('agent_out: ', agent_out) try: lr = float(input('lr = ')) except: lr = 0.000001 try: bs = int(input('bs = ')) except: bs = 1024 # ================================================== import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.98 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== learning_agent = agent.load_policy_agent(h5py.File(learning_agent, "r")) i = 1 num_files = len(experience) for exp_filename in experience: print(50*'=') print('Файл для обучения: %s...' % exp_filename) print(50 * '=') exp_buffer = rl.load_experience(h5py.File(exp_filename, "r")) learning_agent.train(exp_buffer, lr=lr, batch_size=bs) print('Обработано файлов: ', i, ' из ', num_files) i += 1 with h5py.File(agent_out, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('--learning-agent', required=True) # parser.add_argument('--num-games', '-n', type=int, default=10) # parser.add_argument('--game-log-out', required=True) # parser.add_argument('--experience-out', required=True) # parser.add_argument('--temperature', type=float, default=0.0) # # args = parser.parse_args() # 9.19 agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() agent1.set_collector(collector1) agent2.set_collector(collector2) # color1 = Player.black # logf = open(args.game_log_out, 'a') # logf.write('Begin training at %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'),)) # 9.20 for i in range(num_games): collector1.begin_episode() collector2.begin_episode() game_record = simulate_game(agent1, agent2) if game_record.winner == Player.black: collector1.complete_episode(reward=1) # Agent1 won the game, so +reward collector2.complete_episode(reward=-1) else: collector2.complete_episode(reward=1) # Agent2 won the game collector1.complete_episode(reward=-1) experience = rl.combine_experience([collector1, collector2]) # logf.write('Saving experience buffer to %s\n' % args.experience_out) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def play_games(args): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) except RuntimeError as e: print(e) agent1_fname, agent2_fname, num_games, board_size, gpu_frac = args random.seed(int(time.time()) + os.getpid()) np.random.seed(int(time.time()) + os.getpid()) with h5py.File(agent1_fname, 'r') as agent1f: agent1 = agent.load_policy_agent(agent1f) with h5py.File(agent2_fname, 'r') as agent2f: agent2 = agent.load_policy_agent(agent2f) wins, losses = 0, 0 color1 = Player.black for i in range(num_games): print('Simulating game %d/%d...' % (i + 1, num_games)) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent1, agent2 game_record = simulate_game(black_player, white_player, board_size) if game_record.winner == color1: print('Agent 1 wins') wins += 1 else: print('Agent 2 wins') losses += 1 print('Agent 1 record: %d/%d' % (wins, wins + losses)) color1 = color1.other return wins, losses
def test_4_alphago_mcts(self): fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, required=True) parser.add_argument('--learning-agent', required=True) parser.add_argument('--num-games', '-n', type=int, default=10) parser.add_argument('--experience-out', required=True) args = parser.parse_args() agent_filename = args.learning_agent experience_filename = args.experience_out num_games = args.num_games global BOARD_SIZE BOARD_SIZE = args.board_size agent1 = agent.load_policy_agent(h5py.File(agent_filename)) agent2 = agent.load_policy_agent(h5py.File(agent_filename)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() agent1.set_collector(collector1) agent2.set_collector(collector2) for i in range(num_games): collector1.begin_episode() collector2.begin_episode() game_record = simulate_game(agent1, agent2) if game_record.winner == Player.black: collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) experience = rl.combine_experience([collector1, collector2]) with h5py.File(experience_filename, 'w') as experience_outf: experience.serialize(experience_outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-agent', required=True) parser.add_argument('--agent-out', required=True) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--bs', type=int, default=512) parser.add_argument('experience', nargs='+') args = parser.parse_args() learning_agent = agent.load_policy_agent(h5py.File(args.learning_agent)) for exp_filename in args.experience: print('Training with %s...' % exp_filename) exp_buffer = rl.load_experience(h5py.File(exp_filename)) learning_agent.train(exp_buffer, lr=args.lr, batch_size=args.bs) with h5py.File(args.agent_out, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def train_worker(learning_agent, output_file, experience_file, lr, batch_size): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) except RuntimeError as e: print(e) with h5py.File(learning_agent, 'r') as learning_agentf: learning_agent = agent.load_policy_agent(learning_agentf) with h5py.File(experience_file, 'r') as expf: exp_buffer = rl.load_experience(expf) learning_agent.train(exp_buffer, lr=lr, batch_size=batch_size) with h5py.File(output_file, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def main(): # PLACEHOLDER VARIABLES (CHANGE BEFORE TRAINING MODEL) learning_agent_filename = h5py.File("") experience_files = ["", ""] learning_rate = 0.0001 clipnorm = 0.5 batchsize = 1024 updated_agent_filename = "something" # learning_agent = agent.load_policy_agent( h5py.File(learning_agent_filename)) # 10.7 for exp_filename in experience_files: exp_buffer = rl.load_experience(h5py.File(exp_filename)) learning_agent.train(exp_buffer, lr=learning_rate, clipnorm=clipnorm, batch_size=batchsize) with h5py.File(updated_agent_filename, 'w') as updated_agent_outf: learning_agent.serialize(updated_agent_outf)
def main(): workdir = '//home/nail//Code_Go//checkpoints//' os.chdir(workdir) bind_address = '127.0.0.1' port = 5000 predict_agent, pg_agent, q_agent, ac_agent = '', '', '', '' agent_type = input('Агент(pg/predict/q/ac = ').lower() if agent_type == 'pg': pg_agent = input( 'Введите имя файла для игры с ботом политика градиентов =') pg_agent = workdir + pg_agent + '.h5' if agent_type == 'predict': predict_agent = input( 'Введите имя файла для игры с ботом предсказания хода =') predict_agent = workdir + predict_agent + '.h5' if agent_type == 'q': q_agent = input( 'Введите имя файла для игры с ботом ценность действия =') q_agent = workdir + q_agent + '.h5' if agent_type == 'ac': ac_agent = input('Введите имя файла для игры с ботом актор-критик =') ac_agent = workdir + ac_agent + '.h5' bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)} if agent_type == 'pg': bots['pg'] = agent.load_policy_agent(h5py.File(pg_agent, 'r')) if agent_type == 'predict': bots['predict'] = agent.load_prediction_agent( h5py.File(predict_agent, 'r')) if agent_type == 'q': q_bot = rl.load_q_agent(h5py.File(q_agent, 'r')) q_bot.set_temperature(0.01) bots['q'] = q_bot if agent_type == 'ac': ac_bot = rl.load_ac_agent(h5py.File(ac_agent, 'r')) ac_bot.set_temperature(0.05) bots['ac'] = ac_bot web_app = httpfrontend.get_web_app(bots) web_app.run(host=bind_address, port=port, threaded=False)
from dlgo import rl from dlgo import scoring from dlgo import goboard_fast as goboard from dlgo.goboard_fast import Move from dlgo.gotypes import Player, Point from dlgo.utils import print_board, print_move from dlgo import agent from dlgo.agent.predict import DeepLearningAgent, load_prediction_agent from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent # Load policy agent and value agent fast_policy = load_prediction_agent( h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r')) strong_policy = load_policy_agent( h5py.File('models/AlphaGo/alphago_policyv0-0-0.h5', 'r')) value = load_value_agent( h5py.File('models/AlphaGo/alphago_valuev1-0-1.h5', 'r')) # Create AlphaGo MCTS agent based on the policy agent and the value agent alphago = AlphaGoMCTS(strong_policy, fast_policy, value, depth=10, rollout_limit=50, num_simulations=100) # Test duration for selecting a move game_state = goboard.GameState.new_game(19) start_time = time.time() next_move = alphago.select_move(game_state)
# tag::run_alphago[] from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent import h5py fast_policy = load_prediction_agent(h5py.File('alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent(h5py.File('alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value) # end::run_alphago[] # TODO: register in frontend
def main(): learning_agent = input("Бот: ") temperature = float(input('Температура = ')) game_log = input('game_log: ') experience_out = input('experience_out: ') num_games = int(input('Количество игр = ')) try: chunk_size = int(input('Количество игр в "порции" =')) except: chunk_size = 100 pth = '//media//nail//SSD_Disk//Experience//' learning_agent = '//media//nail//SSD_Disk//Models//' + learning_agent + '.h5' game_log = pth + game_log + '_' + str(num_games) experience_out = pth + experience_out + '_' + str(num_games) + '_' #+'.h5' #args = parser.parse_args() # ================================================== os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== agent1 = agent.load_policy_agent(h5py.File(learning_agent, "r")) agent2 = agent.load_policy_agent(h5py.File(learning_agent, "r")) agent1.set_temperature(temperature) agent2.set_temperature(temperature) k = 0 j = 0 for i in range(num_games + 1): if j == 0: game_log_out = game_log + '_' + str((k + 1) * chunk_size) + ".txt" logf = open(game_log_out, 'a') logf.write('Начало игр в %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.write( str((k + 1) * chunk_size) + ' из количества игр: ' + str(num_games) + '\n') print('Моделируемая игра %d/%d...' % (i + 1, num_games)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black j += 1 collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent2, agent1 game_record = simulate_game(black_player, white_player) print(" № игры : ", i + 1) if game_record.winner == color1: print('Агент 1 выигрывает.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Агент 2 выигрывает.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other if i >= chunk_size and i % chunk_size == 0: experience = rl.combine_experience([collector1, collector2]) experience_out_file = experience_out + str( (k + 1) * chunk_size) + ".h5" logf.write('Сохранение буфера в файл %s\n' % experience_out_file) logf.write('Завершение игр %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.close() with h5py.File(experience_out_file, 'w') as experience_outf: experience.serialize(experience_outf) print('Записано игр: ', (k + 1) * chunk_size, ' из ', num_games, ' игр.') k += 1 j = 0
from dlgo.agent import load_prediction_agent, load_policy_agent, AlphaGoMCTS from dlgo.rl import load_value_agent import h5py fast_policy = load_prediction_agent(h5py.File('agents/GHGHbot1_sl_policy.h5', 'r')) strong_policy = load_policy_agent(h5py.File('agents/GHGHbot1_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('agents/GHGHbot1_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value) # TODO: register in frontend