def main(): # args.rows = np.random.randint(2,6) # args.columns = np.random.rand]int(2,args.rows) args = parser.parse_args() print("Rows: " + str(args.rows)) print("Columns: " + str(args.columns)) g = Game(args.rows,args.columns) nnet = nn(g,args) args.checkpoint += "dim" + str(args.rows) + 'x' + str(args.columns) + "/" args.load_folder += "dim" + str(args.rows) + 'x' + str(args.columns) + "/" if args.load_model == "true": nnet.load_checkpoint(args.load_folder, args.load_file) c = Coach(g, nnet, args) if args.load_model == "true": print("Load trainExamples from file") c.loadTrainExamples() c.learn()
def __init__(self, gateway): self.gateway = gateway self.frames = frames self.frame_count = 0 # save short period of data to make 4 channel data self.capacity = resolution[2] * 5 self.screens = list() self.actions = list() self.hps = list() self.energy = list() self.controllable = list() self.rewards = list() # FTGEnv object set this # use this to avoid change API of this bot on_train = getattr(self, '_on_train', False) # when start game, subscribe self to Coach object # and override act, and memorise if on_train: Coach().trainees.append(self) self.act = functools.partial(Coach().act, eps=1.0) self.memorize = Coach().memorize else: self.act = act_with_np # always do best action, epsilon == -1 self.memorize = lambda s1, a, s2, done, r, energy: None # do nothing
def selfPlayOnly(args): g = nimGame(config) nnet = nn(g) coach_0 = Coach(g, nnet, args) for i in range(args.numIters): print("Self-play iteration: " + str(i)) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) coach_0.selfPlay()
def main(): # Create a 9x9 board game = GobangGame(n=9, nir=9) network = NNetWrapper(game) if args.load_model: network.load_checkpoint(args.checkpoint) coach = Coach(game, network, args) coach.learn()
def add_team(): data = request.get_json() coach = data.get("coach") name = data.get("name") # Retrieves coach and name variables sent from react POST request. These variable names must match key names in React JSON post request. new_coach = Coach(name=coach) # Create new coach for given team-- create team functions assumes you have a coach object created (as you require a coach_id (pk)) new_coach.insert() coach_id = new_coach.pk new_team = Team(name=name, coach_id=coach_id) new_team.insert() # Create new team and insert into DB. return jsonify({"success": True})
def main(): log.info('Loading %s...', DotsAndBoxesGame.__name__) g = DotsAndBoxesGame(n=3) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) log.info('Starting the learning process 🎉') c.learn()
def generate_data_debug(self, model_file): nnet = nn(self.game) if len(model_file) > 1: nnet.load_model(filename=model_file) else: print('random state') c = Coach(self.game, nnet, self.args) train_example = c.execute_episode() l_sum_up = [(np.sum(i[0]), i[2]) for i in train_example] print(sum(i == (0, -1) or i == (-1, 1) for i in l_sum_up)) # second hand win print(sum(i == (0, 1) or i == (-1, -1) for i in l_sum_up)) # first hand win
def main(config): game = config.game() # Set up model nnet = NNetWrapper(game, config, tensorboard=config.tensorboardX) # load model from checkpoint if config.load_model: nnet.load_checkpoint(folder=config.load_model_file[0], filename=config.load_model_file[1]) if config.use_multiprocessing: # Required for multiprocessing try: mp.set_start_method('spawn', force=True) except RuntimeError: pass import warnings # Disables semaphore warning (bug in pytorch) warnings.filterwarnings("ignore", message="semaphore_tracker", category=UserWarning) coach = CoachMP(game, nnet, config) else: coach = Coach(game, nnet, config) # load training examples if config.load_train_examples: print("Load trainExamples from file") coach.loadTrainExamples() coach.learn()
def parse_coaches(self): with open( "C:/Users/milop/SciOlyScheduler/SciOlyScheduler/utility/coaches.csv", 'r', newline='') as file: reader = csv.reader(file) next(reader) i = 1 # parse the infomation line by line, fill in the dictionary for row in reader: if (row[0] == ''): continue team = self.teams[row[0].lower()] coach = Coach(team.get_number(), row[1], i + self.total_events) team.add_coach(coach) i += 1 self.coaches.append(coach) # creates two copies of the event with the same infomation but with different times (Morning and Afternoon) for j in range(2, len(row), 2): event_list = self.events[row[j].lower()] if row[j + 1].lower() == "no": pair = (int(j / 2), coach) else: pair = (0, coach) for event in event_list: event.add_potential_coach(pair) for event_list in self.events.values(): for event in event_list: event.sort_coaches()
def generate_data(l): g = Game(args.goBang_n) nnet = nn(g) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + "train_examples_4") with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def save_transaction(self, actions): # save state and action tuple using Coach's memorize try: if len(self.screens) > self.capacity: del self.screens[0] del self.actions[0] del self.hps[0] del self.energy[0] del self.rewards[0] if len(self.screens) == self.capacity: assert(len(self.screens) == len(self.actions) == len(self.hps) == len(self.energy)) s1 = np.stack( [self.screens[0], # 1st frame self.screens[4], # 2nd frame self.screens[8], # 3rd frame self.screens[12]], axis=2) # 4th frame s2 = np.stack( [self.screens[4], # 2nd frame self.screens[8], # 3rd frame self.screens[12], # 4th frame self.screens[16]], axis=2) # 5th frame a = self.actions[0] # reward calculation my_hp_1, opp_hp_1 = self.hps[0] my_hp_2, opp_hp_2 = self.hps[4] energy = self.energy[0] r = (opp_hp_1 - opp_hp_2) - (my_hp_1 - my_hp_2) if opp_hp_1 >= my_hp_1: r -= 0.1 self.rewards.append(r) if self.controllable[0]: # memorize sample when character is controllable # do not use game end in this platform # done is always false self.memorize(s1, a, s2, False, r, energy) # for calculate score Coach().add_reward(r) # if there are debug port (process queue for monitoring code) # send current state and action through this if hasattr(self, 'debug_port'): cnt = getattr(self, '_debug_port_cnt', 0) if cnt == 0: screens, energy = self._get_recent_state(axis=0) info = dict(action=str(actions[self.actions[-1]]), energy=int(energy * energy_scale), reward=r) self.debug_port.put((screens, info)) setattr(self, '_debug_port_cnt', (cnt + 1) % 4) except Exception as exc: logger.error(traceback.format_exc())
def generate_data(l, model_iter): g = Game(8) nnet = nn(g) nnet.load_model(filename=("model_auto_" + str(model_iter + 1))) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + ("train_examples_auto_" + str(model_iter + 1))) with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def main(): game = Connect4Game() c = Coach(game, args) if args.load_model: logger.info('Loading checkpoint {}...'.format(args.load_folder_file)) c.loadModel() logger.info("Loading 'trainExamples' from file {}...".format( args.load_folder_file)) c.loadTrainExamples() c.learn()
def generate_data(self, l, model_file, train_example_filename): nnet = nn(self.game) if len(model_file) > 2: nnet.load_model(filename=model_file) else: print('random nn model') c = Coach(self.game, nnet, self.args) train_example = c.execute_episode() l.acquire() try: folder = self.args['checkpoint'] if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + train_example_filename) with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def main(): log.info('Loading %s...', Game.__name__) g = Game(8) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() run = wandb.init(project=args.wandb_project,config=args,reinit=True) log.info('Starting the learning process 🎉') c.learn() run.finish()
def main(): # Make sure OS supports synchronization primitives required for # Python 3 multiprocessing Queues, else don't use parallelization os_supported = check_platform() if os_supported == False: args.mcts_workers = 1 args.nnet_workers = 1 g = Game() nnet = NNetManager(args.nnet_workers, os_supported) for i in range(args.nnet_workers): mp.Process(target=NNetWorker, args=(g, nnet.nsync(i), i)).start() if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
def main(): """ nnet = nn(Game()) nnet.load_checkpoint(folder='ashogickpt', filename='best.pth.tar') nmcts = MCTS(Game(), nnet, args) arena = Arena(HumanAnimalShogiPlayer(Game()).play, lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), Game(), Game().display) rwins, nwins, draws = arena.playGames(50) print('%d : %d (%d Draws)' % (nwins, rwins, draws)) """ log.info('Loading %s...', Game.__name__) g = Game() log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): np.random.seed(123) log.info('Loading %s...', Game.__name__) g = Game(7) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): # log.info('Loading %s...', Game.__name__) print('Loading Inception TicTacToe') # g = Game(6) # log.info('Loading %s...', nn.__name__) nnet = nn() if args['load_model']: # log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) print('Loading checkpoint: ', args['load_folder_file']) nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1]) else: # log.warning('Not loading a checkpoint!') print('Not loading a checkpoint') # log.info('Loading the Coach...') print('Loading the Coach') c = Coach(g, nnet, args) if args['load_model']: # log.info("Loading 'trainExamples' from file...") print('Loading \'trainExamples\' from file...') c.loadTrainExamples() # log.info('Starting the learning process 🎉') print('Starting the learning process 🎉') c.learn()
def TrainNewNet(): pred = OthelloPredictor(6, 'trainedModels/othello/pred_othello_073.pth', 100000) g = Game(6, predictor=pred) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
def main(): log.info('Loading %s...', HexGame.__name__) game = HexGame(7) log.info('Loading %s...', NNetWrapper.__name__) nnet = NNetWrapper(game) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(game, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): if args.create_log_file: full_path = os.path.join(args.log_file_location, args.log_file_name) if os.path.exists(full_path): log.info(f'Log file {full_path} already exists') else: if os.path.exists(args.log_file_location): with open(full_path, 'w') as fp: fp.write(f"Log file initiated for {args.log_run_name} \n") fp.close() else: os.mkdir(args.log_file_location) with open(full_path, 'w') as fp: fp.write(f"Log file initiated for {args.log_run_name} \n") fp.close() log.info('Loading %s...', Game.__name__) g = Game(5, 4) log.info('Loading %s...', nn.__name__) nnet = nn(g, args=nn_args) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args, nn_args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): log.info('Loading %s...', TicTacToeGame.__name__) g = TicTacToeGame() log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info(f'Loading checkpoint "{args.load_folder_file}" ...') nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): log.info('Cuda enabled: %s', torch.cuda.is_available()) log.info('Loading Curling...') g = CurlingGame() log.info('Loading nn...') nnet = nn(g) if args.load_model: log.info('Loading checkpoint...') nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Load trainExamples from file") c.loadTrainExamples() log.info('Learning...') c.learn()
def main(): log.info('Loading %s...', Game.__name__) g = Game(7) # Changed from 6 to 7 because we are playing Hex ... # ... on a 7x7 grid. log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def main(): log.info('GPU availability: %s', torch.cuda.is_available()) log.info('Loading %s...', Game.__name__) g = Game(6) log.info('Loading %s...', nn.__name__) nnet = nn(g, None) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
#SET beta = 1 DURING TESTING SINCE x SHOULD BE UNKNOWN DURING TESTING. 'tempThreshold': 10, #dictates when the MCTS starts returning deterministic polices (vector of 0 and 1's). See Coach.py for more details. 'alpha': 1e-5, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller alpha is, the more weight the algorithm gives in selecting a sparse solution. 'gamma': 1, #note that reward for a terminal state is -alpha||x||_0 - gamma*||A_S*x-y||_2^2. The smaller gamma is, the more likely algorithm is going to choose stopping action earlier(when ||x||_0 is small). gamma enforces how much we want to enforce Ax is close to y. #choice of gamma is heavily dependent on the distribution of our signal and the distribution of entries of A. gamma should be apx. bigger than m/||A_Sx^* - y||_2^2, where y = Ax, and x^* is the solution to the l2 regression problem. 'epsilon': 1e-5, #If x is the optimal solution to l2, and the residual of l2 regression ||A_Sx-y||_2^2 is less than epsilon, then the state corresponding to indices S is a terminal state in MCTS. } #START ALPHAZERO TRAINING: #Initialize Game_args, nnet, Game, and Alphazero Game_rules = Game_args() Game = CSGame() nnet = NNetWrapper(args) if args['load_nn_model'] == True: filename = 'best' nnet.load_checkpoint(args['network_checkpoint'], filename) Alphazero_train = Coach(Game, nnet, args, Game_rules) if args['load_training'] == True: print('Load trainExamples from file') Alphazero_train.loadTrainExamples() #Start Training Alphazero Alphazero_train.learn()
500, 'arenaTemp': 0.1, 'arenaMCTS': False, 'randomCompareFreq': 1, 'compareWithPast': True, 'pastCompareFreq': 3, 'expertValueWeight': dotdict({ 'start': 0, 'end': 0, 'iterations': 35 }), 'cpuct': 3, 'checkpoint': 'checkpoint', 'data': 'data', }) if __name__ == "__main__": g = Game() nnet = nn(g) c = Coach(g, nnet, args) c.learn()
if serialFlag: if gameChoice == 0: g = Game(6) elif gameChoice == 1: g = TicTacToeGame() elif gameChoice == 2: g = nimGame(nimConfig) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn() else: def selfPlayOnly(args): g = nimGame(config) nnet = nn(g) coach_0 = Coach(g, nnet, args) for i in range(args.numIters): print("Self-play iteration: " + str(i)) nnet.load_checkpoint(args.load_folder_file[0],
args = dotdict({ 'numIters': 1000, #number of rounds the traning will be 'numEps': 100, #number of self-play in each round 'tempThreshold': 15, 'updateThreshold': 0.6, #if new nnet beat wins old nnet above this ration, update to new nnet 'maxlenOfQueue': 200000, #TODO: maximum length of the history in memory?? 'numMCTSSims': 25, #number of MCTS simulation rounds 'arenaCompare': 40, #number of games between old and new 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) if __name__ == "__main__": g = Game(6) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
for n in range(args.n_cpu): disable_window = not (args.render == 'on' or args.render == 'single' and n == 0) env = FTGEnv(n, BasicBot, opponent, port=args.port + n, inverted_player=1, disable_window=disable_window, starts_with_energy=args.starts_with_energy, train='train' in args.train_or_test, verbose=False) env.run(block=False, ai_monitor=ai_monitor) envs.append(env) while Coach().memory.size < batch_size: time.sleep(5) pass epoch = 0 start_time = timer() highest_score = -1e-10 while True: Coach().training(epsilon) q_values = np.zeros(step_per_training) losses = np.zeros(step_per_training) for step in trange(step_per_training, desc='Training {}'.format(epoch)): max_q, loss = Coach().learn() q_values[step] = max_q.mean()