def main(): parser = argparse.ArgumentParser() parser.add_argument('--bind-address', default='127.0.0.1') parser.add_argument('--port', '-p', type=int, default=5000) parser.add_argument('--pg-agent') parser.add_argument('--predict-agent') parser.add_argument('--q-agent') parser.add_argument('--ac-agent') args = parser.parse_args() bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)} if args.pg_agent: bots['pg'] = agent.load_policy_agent(h5py.File(args.pg_agent)) if args.predict_agent: bots['predict'] = agent.load_prediction_agent( h5py.File(args.predict_agent)) if args.q_agent: q_bot = rl.load_q_agent(h5py.File(args.q_agent)) q_bot.set_temperature(0.01) bots['q'] = q_bot if args.ac_agent: ac_bot = rl.load_ac_agent(h5py.File(args.ac_agent)) ac_bot.set_temperature(0.05) bots['ac'] = ac_bot web_app = httpfrontend.get_web_app(bots) web_app.run(host=args.bind_address, port=args.port, threaded=False)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--num-games', '-g', type=int) parser.add_argument('--board-size', '-b', type=int) parser.add_argument('agents', nargs='+') args = parser.parse_args() agents = [ # agent.load_policy_agent(h5py.File(filename)) rl.load_q_agent(h5py.File(filename)) for filename in args.agents ] for a in agents: a.set_temperature(0.02) ratings = elo.calculate_ratings(agents, args.num_games, args.board_size) for filename, rating in zip(args.agents, ratings): print("%s %d" % (filename, rating))
def main(): workdir = '//home/nail//Code_Go//checkpoints//' os.chdir(workdir) bind_address = '127.0.0.1' port = 5000 predict_agent, pg_agent, q_agent, ac_agent = '', '', '', '' agent_type = input('Агент(pg/predict/q/ac = ').lower() if agent_type == 'pg': pg_agent = input( 'Введите имя файла для игры с ботом политика градиентов =') pg_agent = workdir + pg_agent + '.h5' if agent_type == 'predict': predict_agent = input( 'Введите имя файла для игры с ботом предсказания хода =') predict_agent = workdir + predict_agent + '.h5' if agent_type == 'q': q_agent = input( 'Введите имя файла для игры с ботом ценность действия =') q_agent = workdir + q_agent + '.h5' if agent_type == 'ac': ac_agent = input('Введите имя файла для игры с ботом актор-критик =') ac_agent = workdir + ac_agent + '.h5' bots = {'mcts': mcts.MCTSAgent(800, temperature=0.7)} if agent_type == 'pg': bots['pg'] = agent.load_policy_agent(h5py.File(pg_agent, 'r')) if agent_type == 'predict': bots['predict'] = agent.load_prediction_agent( h5py.File(predict_agent, 'r')) if agent_type == 'q': q_bot = rl.load_q_agent(h5py.File(q_agent, 'r')) q_bot.set_temperature(0.01) bots['q'] = q_bot if agent_type == 'ac': ac_bot = rl.load_ac_agent(h5py.File(ac_agent, 'r')) ac_bot.set_temperature(0.05) bots['ac'] = ac_bot web_app = httpfrontend.get_web_app(bots) web_app.run(host=bind_address, port=port, threaded=False)
def load_agent(filename): with h5py.File(filename, 'r') as h5file: return rl.load_q_agent(h5file)
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('--learning-agent', required=True) # parser.add_argument('--num-games', '-n', type=int, default=10) # parser.add_argument('--game-log-out', required=True) # parser.add_argument('--experience-out', required=True) # parser.add_argument('--temperature', type=float, default=0.0) learning_agent = input("Бот: ") temperature = float(input('Температура = ')) game_log = input('game_log: ') experience_out = input('experience_out: ') num_games = int(input('Количество игр = ')) board_size = 19 try: chunk_size = int(input('Количество игр в "порции" =')) except: chunk_size = 100 pth = "//home//nail//Experience//" learning_agent = '//home//nail//Code_Go//checkpoints//' + learning_agent + '.h5' game_log = pth + game_log + '_' + str(num_games) experience_out = pth + experience_out + '_' + str(num_games) + '_' #+'.h5' #args = parser.parse_args() # ================================================== os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf config = tf.compat.v1.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.gpu_options.allow_growth = True config.log_device_placement = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) # ================================================== agent1 = rl.load_q_agent(h5py.File(learning_agent, "r")) agent2 = rl.load_q_agent(h5py.File(learning_agent, "r")) agent1.set_temperature(temperature) agent2.set_temperature(temperature) k = 0 j = 0 for i in range(num_games + 1): if j == 0: game_log_out = game_log + '_' + str((k + 1) * chunk_size) + ".txt" logf = open(game_log_out, 'a') logf.write('Начало игр в %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.write( str((k + 1) * chunk_size) + ' из количества игр: ' + str(num_games) + '\n') print('Моделируемая игра %d/%d...' % (i + 1, num_games)) collector1 = rl.ExperienceCollector() collector2 = rl.ExperienceCollector() color1 = Player.black j += 1 collector1.begin_episode() agent1.set_collector(collector1) collector2.begin_episode() agent2.set_collector(collector2) if color1 == Player.black: black_player, white_player = agent1, agent2 else: white_player, black_player = agent2, agent1 game_record = simulate_game_q(black_player, white_player, board_size) print(" № игры : ", i + 1) if game_record.winner == color1: print('Агент 1 выигрывает.') collector1.complete_episode(reward=1) collector2.complete_episode(reward=-1) else: print('Агент 2 выигрывает.') collector2.complete_episode(reward=1) collector1.complete_episode(reward=-1) color1 = color1.other if i >= chunk_size and i % chunk_size == 0: experience = rl.combine_experience([collector1, collector2]) experience_out_file = experience_out + str( (k + 1) * chunk_size) + ".h5" logf.write('Сохранение буфера в файл %s\n' % experience_out_file) logf.write('Завершение игр %s\n' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), )) logf.close() with h5py.File(experience_out_file, 'w') as experience_outf: experience.serialize(experience_outf) print('Записано игр: ', (k + 1) * chunk_size, ' из ', num_games, ' игр.') k += 1 j = 0