def main(): log.info('Loading %s...', Game.__name__) g = Game(4, 9, 4) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file[0], args.load_folder_file[1]) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
nnet=dqn, lr=LR) num_improvements = 0 for _ in range(NUM_EPOCHS): ## Saving a copy of the current network weights to pnet torch.save(coach.nnet.state_dict(), PATH) coach.pnet.load_state_dict(torch.load(PATH)) for _ in tqdm(range(NUM_TRAIN_GAMES), desc="Self play"): data = coach.execute_episode( epsilon=TRAIN_EPSILON, gamma=GAMMA) data = torch.stack(data) coach.learn(data) arena = Arena( player1=lambda x: torch.argmax(coach.nnet(x)), player2=lambda x: torch.argmax(coach.pnet(x)), game=game) ## Introduce small randomness into evaluation games ## Both agents take random moves with small probability p1_won = 0 p2_won = 0 for _ in tqdm(range(HALF), desc="Evaluation 1"): looser = arena.play_game(epsilon=EVAL_EPSILON) if looser == 'red': p2_won += 1 else:
dest='load_model', action='store_true') parser.add_argument('-loadf', '--load_folder_file', dest='load_folder_file', type=str) parser.add_argument('-iterexamp', '--num_iters_example', dest='numItersForTrainExamplesHistory', type=int, default=20) args = parser.parse_args() fh = open(os.path.join("..", "data", "puzzle1.txt")) fcontent = fh.read() fh.close() sys.setrecursionlimit(10000) g = game(fcontent) nnet = nn(g, args) if args.load_model: nnet.load_checkpoint(args.load_folder_file) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
def main(): log.info('Loading Game') number_of_nodes = 5 # Initialize edges edges = [(0, 1, {"type": "taxi"}), (1, 2, {"type": "taxi"}), (2, 0, {"type": "taxi"}), (3, 4, {"type": "bus"}), (3, 4, {"type": "taxi"}), (3, 4, {"type": "metro"}), (3, 2, {"type": "ferry"}), (0, 4, {"type": "taxi"}), (1, 3, {"type": "taxi"}), (2, 4, {"type": "taxi"})] # Assign colors and widths to edges for visualization for edge in edges: edge_attributes = edge[2] if edge_attributes["type"] == 'taxi': edge_color = (255 / 255, 205 / 255, 66 / 255) edge_width = 10.0 elif edge_attributes["type"] == 'bus': edge_color = (23 / 255, 160 / 255, 93 / 255) edge_width = 6.0 elif edge_attributes["type"] == 'metro': edge_color = (221 / 255, 80 / 255, 68 / 255) edge_width = 3.0 elif edge_attributes["type"] == "ferry": edge_color = 'black' edge_width = 0.5 else: edge_color = 'black' edge_width = 10.0 edge_attributes["color"] = edge_color edge_attributes["width"] = edge_width # Initialize players number_of_detectives = 2 # Set the colors for the different roles (for visualization purposes) detective_colors = ['green', 'red'] mister_x_color = 'grey' game = Game(number_of_nodes, edges, number_of_detectives, mister_x_color, detective_colors) neural_net_mister_x = NNet(game, player=game.mister_x) game.mister_x.neural_net = neural_net_mister_x neural_net_detective = NNet(game, game.detectives[0]) for detective in game.detectives: detective.neural_net = neural_net_detective """if args['load_model']: log.info('Loading checkpoint "%s/%s"...', args['load_folder_file']) nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1]) else: log.warning('Not loading a checkpoint!') """ log.info('Loading the Coach...') c = Coach(game, neural_net_mister_x, neural_net_detective, args) """if args['load_model']: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() """ log.info('Starting the learning process 🎉') c.learn()
network_architecture=configs.network_architecture) coach = Coach(game=game, nnet=nnet, pnet=pnet, num_iters=configs.num_iters, root_noise=configs.root_noise, board_size=configs.board_size) if configs.load_model: logging.info("Loading training examples") coach.loadTrainExamples() if configs.web_server: web = WebServer(game=game, nnet=nnet, checkpoint_folder=configs.checkpoint_dir, c_puct=configs.c_puct, num_mcst_sims=configs.num_mcts_sims) web.start_web_server() exit(0) coach.learn(num_train_episodes=configs.num_episodes, num_training_examples_to_keep=configs.maxlenOfQueue, checkpoint_folder=configs.checkpoint_dir, arena_tournament_size=configs.arena_size, model_update__win_threshold=configs.update_threshold, num_mcst_sims=configs.num_mcts_sims, c_puct=configs.c_puct, know_nothing_training_iters=configs.tempThreshold, max_cpus=num_threads)