Beispiel #1
0
 def __init__(self, args, init_train_info={}, sub_dir=None):
     self.args = args
     misc.ensure_dir(args.logdir)
     sub_dir = args.continue_from or sub_dir or misc.datetimestr()
     self.logdir = os.path.join(args.logdir, sub_dir)
     misc.ensure_dir(self.logdir)
     self._setup_log_file()
     self._create_train_info(args, init_train_info)
Beispiel #2
0
def exploration(board,
                models,
                gpu_id,
                tau_func=default_tau_func,
                policy_noise_ratio=0,
                resign=None,
                logger=None):
    import misc, json, sys
    from ccboard import action_index
    history = []
    cur_node = Node(board)
    winner = None
    # from tqdm import trange
    game_name = misc.datetimestr()
    for step in range(args().max_game_steps):
        # misc.progress_bar(step, args().max_game_steps, game_name)

        policy, actions = _mcts_policy(cur_node, models[step % 2], gpu_id,
                                       tau_func(step))
        pos, action = _next_action(cur_node.s, policy, actions,
                                   policy_noise_ratio)
        #  history.append( Experience(cur_node.s, policy, cur_node.v) ) # $v$ is here for resignation check later
        history.append(
            Experience(cur_node.s, action_index(pos, action), cur_node.v))
        if resign is not None and v < resign:
            winner = (step + 1) % 2  # current player losses
            # sys.stderr.write('\n')
            break

        if logger is not None:
            logger.log_game_action(game_name, pos, board.action_list()[action])

        cur_node = cur_node.next_edges[(pos, action)].next_node
        cur_node.prev_edge = None
        if cur_node.s.is_terminated():
            # history.append( Experience(cur_node.s, null, cur_node.v) )
            # the current step loss if the next step wins
            next_step_wins = cur_node.s.is_winner()
            winner = (step + 1 if next_step_wins else step) % 2
            # sys.stderr.write('\n')
            break

    if logger is not None:
        logger.end_log_game_action(game_name)

    # fill scores
    min_winner_score = 1
    for step in range(len(history)):
        if winner is None:
            history[step].v = 0
        elif step % 2 == winner:
            if min_winner_score > history[step].v:
                min_winner_score = history[step].v
            history[step].v = 1
        else:
            history[step].v = -1
    return history, winner, min_winner_score
Beispiel #3
0
def try_reload_model(model_path, best_model, gpu_id, proc_id, iters):
	import os
	from log import Logger
	cur_model_path = Logger.lastest_model_path(args().logdir)
	if best_model is None or model_path != cur_model_path:
		model_path = cur_model_path
		best_model = Logger._load_model(model_path, gpu_id)
		print('Exploration GPU%d-%d it:%d %s model updated' % (gpu_id, proc_id, iters, misc.datetimestr()))
	return model_path, best_model 
Beispiel #4
0
def compare_models(iters, eval_model, best_model, gpu_id, logger):
	from mcts import exploration
	wins = 0
	has_cuda = torch.cuda.is_available()
	iters2 = 0
	for i in range(args().evaluation_games):
		_, winner, _ = exploration(ccboard.ChessBoard(), [best_model, eval_model], gpu_id, policy_noise_ratio=args().policy_noise_ratio)
		if winner is not None:
			wins += winner
		iters2 += 1
		logger.info("Evaluation %d:%d %s New model %s" % (iters, iters2, misc.datetimestr(), 'wins' if winner == 1 else 'lose'))
	return wins / float(args().evaluation_games)
Beispiel #5
0
 def save_model(self, model, model_path=None):
     if isinstance(model, torch.nn.DataParallel):
         model = model.module
     if model_path is None:
         if 'model_path' in self.train_info and os.path.exists(
                 os.join(self.logdir, self.train_info['model_path'])):
             print("Removing old model {}".format(
                 self.train_info['model_path']))
             os.remove(os.join(self.logdir, self.train_info['model_path']))
         model_path = misc.datetimestr() + '.model.pth'
         self.train_info['model_path'] = model_path
     print("Saving model to {}".format(model_path))
     model.save(os.path.join(self.logdir, model_path))
Beispiel #6
0
def exploration_process_func(gpu_id, proc_id, queue):
	import numpy
	numpy.random.seed(gpu_id * 101 + proc_id)
	from mcts import exploration
	model_path = None
	best_model = None
	has_cuda = torch.cuda.is_available()
	iters = 0
	while True:
		iters += 1
		model_path, best_model = try_reload_model(model_path, best_model, gpu_id, proc_id, iters)
		history, _, _ = exploration(ccboard.ChessBoard(), [best_model, best_model], gpu_id, policy_noise_ratio=args().policy_noise_ratio)
		print('Exploration GPU%d-%d it:%d %s history_size:%d' % (gpu_id, proc_id, iters, misc.datetimestr(), len(history)))
		queue.put(history)
Beispiel #7
0
def evaluation(iters, best_model, model, model_lock, logger):
	has_cuda = torch.cuda.is_available()
	gpu_id = args().train_gpu
	model_lock.acquire()
	eval_model = logger.clone_model(model, gpu_id)
	model_lock.release()
	wins = compare_models(iters, eval_model, best_model, gpu_id, logger)
	logger.info(">>> Evaluation %d %s New model wins %.0f%%" % (iters, misc.datetimestr(), wins * 100))
	if wins < 0.55: return best_model
	global epoch
	best_model = eval_model
	logger.train_info['epoch'] = epoch
	logger.save_train_info()
	logger.save_model(best_model)
	return best_model
Beispiel #8
0
def train(replay_buffer, model, queue):
    parameters = model.parameters()
    optimizer = torch.optim.SGD(parameters,
                                lr=1e-4,
                                momentum=0.9,
                                nesterov=True)
    max_norm = 400
    loss_history = []
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    eval_iters = 1
    best_model = model
    torch.save(best_model, 'best_model.pth')
    for epoch in tqdm(range(500), desc="Training epoch"):
        if (epoch + 1) % 100 == 0:
            optim_state = optimizer.state_dict()
            optim_state['param_groups'][0][
                'lr'] = optim_state['param_groups'][0]['lr'] / 1.01
            optimizer.load_state_dict(optim_state)

        _history, _, _ = explore(ChessBoard(), [best_model, best_model],
                                 max_steps, 0.25)
        replay_buffer.extend(_history)
        # while not queue.empty():
        #    replay_buffer.extend(queue.get())
        while len(replay_buffer) > 100000:
            replay_buffer.pop()
        loss = train_epoch(replay_buffer, optimizer, model, max_norm)
        import misc
        print('\nTrain epoch: %d, Buffer:%d, Time: %s, Loss: %.5f' %
              (epoch, len(replay_buffer), misc.datetimestr(), loss))
        loss_history.append(loss)
        plt.clf()
        plt.plot(loss_history)
        plt.savefig("loss.png")

        if (epoch + 1) % evaluation_interval == 0:
            eval_iters += 1
            best_model = evaluation(eval_iters, best_model, model)
        with open('loss.his', 'wb') as f:
            import pickle
            pickle.dump(loss, f)
Beispiel #9
0
 def save_model(self, model, model_path=None):
     to_remove = None
     if model_path is None:
         # remove old
         if 'model_path' in self.train_info:
             model_path = os.path.join(self.logdir,
                                       self.train_info['model_path'])
             if os.path.exists(model_path):
                 to_remove = model_path
                 print("Removing old model {}".format(
                     self.train_info['model_path']))
         # new path name
         model_path = misc.datetimestr() + '.model.pth'
         self.train_info['model_path'] = model_path
     print("Saving model to {}".format(model_path))
     package = {
         'config': self._model_config(),
         'state_dict': model.state_dict(),
     }
     torch.save(package, os.path.join(self.logdir, model_path))
     if to_remove is not None:
         os.remove(to_remove)
Beispiel #10
0
def train(replay_buffer, queue, model, model_lock, logger):
	global epoch
	parameters = model.parameters()
	optimizer = torch.optim.SGD(parameters, lr=args().lr, momentum=args().momentum, nesterov=True)
	has_cuda = torch.cuda.is_available()
	gpu_id = args().train_gpu
	max_norm = args().max_norm
	for ep in range(args().epochs):
		epoch = ep
		if (epoch + 1) % args().anneal_interval == 0:
			anneal_lr(optimizer, logger)
		if epoch < logger.train_info['epoch']: continue
		while not queue.empty():
			replay_buffer.extend(queue.get())
		while len(replay_buffer) > args().replay_buffer_size: replay_buffer.pop()
		loss = train_epoch(replay_buffer, has_cuda, gpu_id, optimizer, model, model_lock, max_norm)
		logger.info('Train epoch: %d, Buffer:%d, Time: %s, Loss: %.5f' % (epoch, len(replay_buffer), misc.datetimestr(), loss))
		logger.train_info['loss'].append(loss)
		if args().plot:
			logger.plot_progress()