Beispiel #1
0
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game(4, 9, 4)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file[0],
                 args.load_folder_file[1])
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
Beispiel #2
0
	nnet=dqn,
	lr=LR)

num_improvements = 0
for _ in range(NUM_EPOCHS):
	## Saving a copy of the current network weights to pnet
	torch.save(coach.nnet.state_dict(), PATH)
	coach.pnet.load_state_dict(torch.load(PATH))

	for _ in tqdm(range(NUM_TRAIN_GAMES), desc="Self play"):
		data = coach.execute_episode(
			epsilon=TRAIN_EPSILON,
			gamma=GAMMA)

		data = torch.stack(data)
		coach.learn(data)

	arena = Arena(
		player1=lambda x: torch.argmax(coach.nnet(x)),
		player2=lambda x: torch.argmax(coach.pnet(x)),
		game=game)

	## Introduce small randomness into evaluation games
	## Both agents take random moves with small probability
	p1_won = 0
	p2_won = 0
	for _ in tqdm(range(HALF), desc="Evaluation 1"):
		looser = arena.play_game(epsilon=EVAL_EPSILON)
		if looser == 'red':
			p2_won += 1
		else:
Beispiel #3
0
                        dest='load_model',
                        action='store_true')
    parser.add_argument('-loadf',
                        '--load_folder_file',
                        dest='load_folder_file',
                        type=str)
    parser.add_argument('-iterexamp',
                        '--num_iters_example',
                        dest='numItersForTrainExamplesHistory',
                        type=int,
                        default=20)
    args = parser.parse_args()

    fh = open(os.path.join("..", "data", "puzzle1.txt"))
    fcontent = fh.read()
    fh.close()

    sys.setrecursionlimit(10000)
    g = game(fcontent)

    nnet = nn(g, args)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file)

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Beispiel #4
0
def main():
    log.info('Loading Game')
    number_of_nodes = 5
    # Initialize edges
    edges = [(0, 1, {"type": "taxi"}),
             (1, 2, {"type": "taxi"}),
             (2, 0, {"type": "taxi"}),
             (3, 4, {"type": "bus"}),
             (3, 4, {"type": "taxi"}),
             (3, 4, {"type": "metro"}),
             (3, 2, {"type": "ferry"}),
             (0, 4, {"type": "taxi"}),
             (1, 3, {"type": "taxi"}),
             (2, 4, {"type": "taxi"})]

    # Assign colors and widths to edges for visualization
    for edge in edges:
        edge_attributes = edge[2]
        if edge_attributes["type"] == 'taxi':
            edge_color = (255 / 255, 205 / 255, 66 / 255)
            edge_width = 10.0
        elif edge_attributes["type"] == 'bus':
            edge_color = (23 / 255, 160 / 255, 93 / 255)
            edge_width = 6.0
        elif edge_attributes["type"] == 'metro':
            edge_color = (221 / 255, 80 / 255, 68 / 255)
            edge_width = 3.0
        elif edge_attributes["type"] == "ferry":
            edge_color = 'black'
            edge_width = 0.5
        else:
            edge_color = 'black'
            edge_width = 10.0

        edge_attributes["color"] = edge_color
        edge_attributes["width"] = edge_width

    # Initialize players
    number_of_detectives = 2

    # Set the colors for the different roles (for visualization purposes)
    detective_colors = ['green', 'red']
    mister_x_color = 'grey'
    game = Game(number_of_nodes, edges, number_of_detectives, mister_x_color, detective_colors)

    neural_net_mister_x = NNet(game, player=game.mister_x)
    game.mister_x.neural_net = neural_net_mister_x
    neural_net_detective = NNet(game, game.detectives[0])
    for detective in game.detectives:
        detective.neural_net = neural_net_detective
    """if args['load_model']:
        log.info('Loading checkpoint "%s/%s"...', args['load_folder_file'])
        nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1])
    else:
        log.warning('Not loading a checkpoint!')
    """

    log.info('Loading the Coach...')
    c = Coach(game, neural_net_mister_x, neural_net_detective, args)

    """if args['load_model']:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()
    """

    log.info('Starting the learning process 🎉')
    c.learn()
Beispiel #5
0
                network_architecture=configs.network_architecture)

    coach = Coach(game=game,
                  nnet=nnet,
                  pnet=pnet,
                  num_iters=configs.num_iters,
                  root_noise=configs.root_noise,
                  board_size=configs.board_size)
    if configs.load_model:
        logging.info("Loading training examples")
        coach.loadTrainExamples()

    if configs.web_server:
        web = WebServer(game=game,
                        nnet=nnet,
                        checkpoint_folder=configs.checkpoint_dir,
                        c_puct=configs.c_puct,
                        num_mcst_sims=configs.num_mcts_sims)
        web.start_web_server()
        exit(0)

    coach.learn(num_train_episodes=configs.num_episodes,
                num_training_examples_to_keep=configs.maxlenOfQueue,
                checkpoint_folder=configs.checkpoint_dir,
                arena_tournament_size=configs.arena_size,
                model_update__win_threshold=configs.update_threshold,
                num_mcst_sims=configs.num_mcts_sims,
                c_puct=configs.c_puct,
                know_nothing_training_iters=configs.tempThreshold,
                max_cpus=num_threads)