def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 elif gameResult==-1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 elif gameResult==1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): #numIters = 1 # bookkeeping # print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: # print ('Coach.py==>learn ', 'self.skipFirstSelfPlay: ', self.skipFirstSelfPlay) iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): #number of epiodes=2 self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.gamecount += 1 iterationTrainExamples += self.executeEpisode() # print ('Coach.py==>learn ', 'added to iterationTrainExamples deque self.executeEpisode(): ', self.executeEpisode()) # bookkeeping + plot progress :surag eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len( self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: #numItersForTrainExamplesHistory: # print('Coach.py==>learn ',' BEFORE REMOVING self.trainExamplesHistory: ', self.trainExamplesHistory) # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # print('Coach.py==>learn ',' AFTER REMOVING self.trainExamplesHistory: ', self.trainExamplesHistory) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) print("TOTAL GAMES PLAYED: ", self.gamecount) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ vlosss_hist = [] ploss_hist = [] for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.env, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.env, self.pnet, self.args) ploss, vloss = self.nnet.train(trainExamples) ploss_hist += ploss vlosss_hist += vloss nmcts = MCTS(self.env, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') example_pmcts = self.executeEpisode(mcts2=pmcts) example_nmcts = self.executeEpisode(mcts2=nmcts) pwins = 0 nwins = 0 for x in example_pmcts: if x[0] in range(self.args.left_agent): if x[3] == 1: pwins += 1 for x in example_nmcts: if x[0] in range(self.args.left_agent): if x[3] == 1: nwins += 1 print('NEW/PREV WINS : %d / %d' % (nwins, pwins)) if pwins + nwins == 0 or float(nwins) / (pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') return vlosss_hist, ploss_hist
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) || past data """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list( zip(*[examples[i] for i in sample_ids]) ) #boards,possible winning on each position, winning result # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def main(): print("Initializing...") # global args args = parser.parse_args() now = datetime.datetime.now() current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' mask = args.common_emb_size assert mask <= args.hidden_size cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) result_path = args.result_path model_path = args.model_path + current_date + "/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): print("Creating model path on", model_path) os.makedirs(model_path) # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = Embeddings(emb_size, len(vocab.word2idx), vocab.word2idx["<pad>"]) glove_emb.word_lut.weight.data.copy_(emb) glove_emb.word_lut.weight.requires_grad = False # glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Setting up the Networks...") encoder_Img = ImageEncoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension=args.hidden_size) if cuda: encoder_Img = encoder_Img.cuda() decoder_Img = decoder_Img.cuda() # Losses and Optimizers print("Setting up the Objective Functions...") img_criterion = nn.MSELoss() # txt_criterion = nn.MSELoss(size_average=True) if cuda: img_criterion = img_criterion.cuda() # txt_criterion = nn.CrossEntropyLoss() # gen_params = chain(generator_A.parameters(), generator_B.parameters()) print("Setting up the Optimizers...") # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters()) img_params = list(decoder_Img.parameters()) + list( encoder_Img.parameters()) # ATTENTION: Check betas and weight decay # ATTENTION: Check why valid_params fails on image networks with out of memory error img_optim = optim.Adam( img_params, lr=0.001) #,betas=(0.5, 0.999), weight_decay=0.00001) # img_enc_optim = optim.Adam(encoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5, 0.999), weight_decay=0.00001) # img_dec_optim = optim.Adam(decoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001) train_images = False # Reverse 2 for epoch in range(args.num_epochs): # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() img_losses = AverageMeter() txt_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) # Set training mode encoder_Img.train() decoder_Img.train() train_images = True for i, (images, captions, lengths) in enumerate(data_loader): # ATTENTION REMOVE if i == 6450: break # Set mini-batch dataset images = to_var(images) captions = to_var(captions) # target = pack_padded_sequence(captions, lengths, batch_first=True)[0] # captions, lengths = pad_sequences(captions, lengths) # images = torch.FloatTensor(images) captions = captions.transpose(0, 1).unsqueeze(2) lengths = torch.LongTensor(lengths) # print(captions.size()) # Forward, Backward and Optimize # img_optim.zero_grad() # img_dec_optim.zero_grad() # img_enc_optim.zero_grad() encoder_Img.zero_grad() decoder_Img.zero_grad() # txt_params.zero_grad() # txt_dec_optim.zero_grad() # txt_enc_optim.zero_grad() # Image Auto_Encoder Forward img_encoder_outputs, Iz = encoder_Img(images) IzI = decoder_Img(img_encoder_outputs) img_rc_loss = img_criterion(IzI, images) # Text Auto Encoder Forward # target = target[:-1] # exclude last target from inputs img_loss = img_rc_loss img_losses.update(img_rc_loss.data[0], args.batch_size) txt_losses.update(0, args.batch_size) cm_losses.update(0, args.batch_size) # Image Network Training and Backpropagation img_loss.backward() img_optim.step() if i % args.image_save_interval == 0: subdir_path = os.path.join(result_path, str(i / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(3): im_or = (images[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 im = (IzI[im_idx].cpu().data.numpy().transpose(1, 2, 0) / 2 + .5) * 255 filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave(filename_prefix + '.A.jpg', im) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() bar.finish() # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save( decoder_Img.state_dict(), os.path.join(model_path, 'decoder-img-%d-' % (epoch + 1)) + current_date + ".pkl") torch.save( encoder_Img.state_dict(), os.path.join(model_path, 'encoder-img-%d-' % (epoch + 1)) + current_date + ".pkl")
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ self.game.prune_prob = self.args.prune_starting_prob train_black = self.args.train_black_first for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.args.skip_first_self_play or i>1: iterationTrainExamples_white = deque([], maxlen=self.args.maxlenOfQueue) iterationTrainExamples_black = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() if self.args.profile_coach: prof = cProfile.Profile() prof.enable() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.white_nnet, self.black_nnet, self.args) # reset search tree white_examples, black_examples = self.executeEpisode() iterationTrainExamples_white += white_examples iterationTrainExamples_black += black_examples # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() if self.args.profile_coach: prof.disable() prof.print_stats(sort=2) # save the iteration examples to the history self.trainExamplesHistory_white.append(iterationTrainExamples_white) self.trainExamplesHistory_black.append(iterationTrainExamples_black) while len(self.trainExamplesHistory_white) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory_white), " => remove the oldest trainExamples") self.trainExamplesHistory_white.pop(0) self.trainExamplesHistory_black.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # training new network, keeping a copy of the old one self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar') self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar') self.white_pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar') self.black_pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar') pmcts = MCTS(self.game, self.white_pnet, self.black_pnet, self.args) if not self.args.train_both: if train_black: # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory_black: trainExamples.extend(e) shuffle(trainExamples) self.black_nnet.train(trainExamples) else: # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory_white: trainExamples.extend(e) shuffle(trainExamples) self.white_nnet.train(trainExamples) else: # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory_black: trainExamples.extend(e) shuffle(trainExamples) self.black_nnet.train(trainExamples) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory_white: trainExamples.extend(e) shuffle(trainExamples) self.white_nnet.train(trainExamples) nmcts = MCTS(self.game, self.white_nnet, self.black_nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda board, turn_player: np.argmax(pmcts.getActionProb(board, turn_player, temp=0)), lambda board, turn_player: np.argmax(nmcts.getActionProb(board, turn_player, temp=0)), self.game) pwins, nwins, draws, pwins_white, pwins_black, nwins_white, nwins_black \ = arena.playGames(self.args.arenaCompare, self.args.profile_arena) print('NEW/PREV WINS (white, black) : (%d,%d) / (%d,%d) ; DRAWS : %d' % (nwins_white, nwins_black, pwins_white, pwins_black, draws)) if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold \ or nwins_black < pwins_black or nwins_white < pwins_white: print('REJECTING NEW MODEL') if not self.args.train_both: if train_black: self.black_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar') else: self.white_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar') else: self.black_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar') self.white_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar') else: print('ACCEPTING NEW MODEL') if not self.args.train_both: if train_black: # self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i, Player.black)) self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_black.pth.tar') # if nwins_white == 0 or nwins_black / nwins_white >= self.args.train_other_network_threshold: # train_black = False print("training white neural net next") train_black = False else: # self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i, Player.white)) self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_white.pth.tar') # if nwins_black == 0 or nwins_white / nwins_black > self.args.train_other_network_threshold: # train_black = True print("training black neural net next") train_black = True else: self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_black.pth.tar') self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_white.pth.tar') self.game.prune_prob += self.args.prune_prob_gain_per_iteration self.args.arenaCompare = math.floor(self.args.arenaCompare * 1.05) # self.args.numEps = math.floor(self.args.numEps * 1.1) self.args.numMCTSSims = math.floor(self.args.numMCTSSims * 1.1) print("prune probability: " + str(self.game.prune_prob) + ", episodes: " + str(self.args.numEps) + ", sims: " + str(self.args.numMCTSSims) + ", arena compare: " + str(self.args.arenaCompare))
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): #for number of rounds # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque( [], maxlen=self.args.maxlenOfQueue ) #remove the previous training example eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range( self.args.numEps): #for each self-play of this rounds self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)] # v is the result selfPlayResult = self.executeEpisode() #play one game, adding the gaming history iterationTrainExamples += selfPlayResult # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) #self-play finished, updating the move history if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop( 0) #remove the oldest gaming history # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) #adding new move record shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #save the previous net self.pnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #read the previous net pmcts = MCTS(self.game, self.pnet, self.args) #reset previous models' mcts #using new data to train the new model self.nnet.train( trainExamples) #trin the network with new move record nmcts = MCTS(self.game, self.nnet, self.args) #rest new models' mcts #OLD VS NEW print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( lambda board, turn: np.argmax( pmcts.getActionProb(board, turn, temp=0)), lambda board, turn: np.argmax( nmcts.getActionProb(board, turn, temp=0)), self.game) pwins, nwins, draws = arena.playGames( self.args.arenaCompare) #playing new mode against old models print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: #OLD WIN! print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar' ) #using previous mode, as it beat new model else: #NEW WIN! print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='best.pth.tar' ) #save the new model, as this is the best
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): # print('EPOCH ::: ' + str(epoch+1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() # bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1
def train(self, batches, train_steps): self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time() #print(f'Current LR: {self.scheduler.get_lr()[0]}') bar = Bar(f'Training Net', max=train_steps) current_step = 0 while current_step < train_steps: for batch_idx, batch in enumerate(batches): if current_step == train_steps: break current_step += 1 boards, target_pis, target_vs = batch # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda( ), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() # measure data loading time data_time.update(time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.item(), boards.size(0)) v_losses.update(l_v.item(), boards.size(0)) # compute gradient and do SGD step self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() # measure elapsed time batch_time.update(time() - end) end = time() # plot progress bar.suffix = '({step}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( step=current_step, size=train_steps, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() self.scheduler.step(pi_losses.avg+v_losses.avg) bar.finish() print() return pi_losses.avg, v_losses.avg
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v, turn) || past data """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs, turns = list( zip(*[examples[i] for i in sample_ids]) ) #boards,possible winning on each position, winning result turns = [[turn] for turn in turns] # for i in range(len(boards)): # actual_turn = turns[i][0] # player = 1 if actual_turn %2 ==0 else -1 # board = boards[i] # if player == 1: # board[0:2, :][board[0:2, :]==0] = 3 # else: # board[6:8, :][board[0:2, :]==0] = 3 # # # print(actual_turn) # # # print(np.array(boards[i]).reshape(8,8)) # # # a = input() # predict and compute gradient and do SGD step train_input_dict = { self.nnet.input_boards: boards, #input X self.nnet.turn: turns, self.nnet.target_pis: pis, #for calculating loss self.nnet.target_vs: vs, #for calculating loss self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # record loss and do the training #training self.sess.run(self.nnet.train_step, feed_dict=train_input_dict) #record loss value pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=train_input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) oneWon = 0 twoWon = 0 draws = 0 gameResults = [] self.player1, self.player2 = self.player1, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) #if gameResult==1: # oneWon+=1 #elif gameResult==-1: # twoWon+=1 #else: # draws+=1 # bookkeeping + plot progress gameResults.append(gameResult) eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player2 gameResults2 = [] for _ in range(num): gameResult2 = self.playGame(verbose=verbose) #if gameResult==-1: # oneWon+=1 #elif gameResult==1: # twoWon+=1 #else: # draws+=1 # bookkeeping + plot progress gameResults2.append(gameResult2) eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() finalScore1 = np.sum(gameResults) / float(len(gameResults)) finalScore2 = np.sum(gameResults2) / float(len(gameResults2)) return finalScore1, finalScore2 #oneWon, twoWon, draws
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True} # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def train(self, examples): """ This function trains the neural network with examples obtained from self-play. Input: examples: a list of training examples, where each example is of form (board, pi, v). pi is the MCTS informed policy vector for the given board, and v is its value. The examples has board in its canonical form. """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs) data_time.update(time.time() - end) out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) #loss function schrijven l_v = self.loss_v(target_vs, out_v) #loss function schrijven total_loss = l_pi + l_v pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) optimizer.zero_grad() total_loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() batch_idx += 1 bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish() pass
def train(model, dm, loss_criterion, optimizer, args): model.train() # switch to train mode batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() p_micro = AverageMeter() r_micro = AverageMeter() f_micro = AverageMeter() p_macro = AverageMeter() r_macro = AverageMeter() f_macro = AverageMeter() s_macro = AverageMeter() mAP_micro = AverageMeter() mAP_macro = AverageMeter() acc = AverageMeter() end = time.time() bar = Bar('Processing', max=args.batches_per_epoch) batch_idx = 0 while batch_idx < args.batches_per_epoch: # sample batch (des, des_unsort, ind, ind_unsort, act, act_unsort, targets) = dm.sample_train_batch(batch_size=args.batch_size, embed1=model.glove_embed, embed2=model.other_embed, use_cuda=args.cuda) encoder_init_hidden = model.encoder.initHidden( batch_size=args.batch_size) if args.cuda: model = model.cuda() targets = targets.cuda() if len(encoder_init_hidden): encoder_init_hidden = [x.cuda() for x in encoder_init_hidden] else: encoder_init_hidden = encoder_init_hidden.cuda() loss_criterion = loss_criterion.cuda() # measure data loading timeult data_time.update(time.time() - end) # compute output logit_output = model(des_embed=des, des_unsort=des_unsort, ind_embed=ind, ind_unsort=ind_unsort, act_embed=act, act_unsort=act_unsort, encoder_init_hidden=encoder_init_hidden, batch_size=args.batch_size) loss = loss_criterion(logit_output, targets) # measure precision, recall, fscore, support and record loss batch_p_micro, batch_r_micro, batch_f_micro, batch_s_micro, batch_p_macro, batch_r_macro, batch_f_macro\ , batch_s_macro, batch_mAP_micro, batch_mAP_macro, batch_acc = compute_metrics(logit=logit_output, target=targets) p_macro.update(batch_p_macro, args.batch_size) p_micro.update(batch_p_micro, args.batch_size) r_macro.update(batch_r_macro, args.batch_size) r_micro.update(batch_r_micro, args.batch_size) f_macro.update(batch_f_macro, args.batch_size) f_micro.update(batch_f_micro, args.batch_size) s_macro.update(batch_s_macro, args.batch_size) mAP_micro.update(batch_mAP_micro, args.batch_size) mAP_macro.update(batch_mAP_macro, args.batch_size) acc.update(batch_acc, args.batch_size) losses.update(loss.item(), args.batch_size) # compute gradient optimizer.zero_grad() loss.backward() # optimizer step optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s' \ '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f} ' \ '| P: {p:.3f}| R: {r:.3f}| F: {f:.3f}| mAP mic: {mAP:.3f}|' \ .format( batch=batch_idx, size=args.batches_per_epoch, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acc.avg, p=p_micro.avg, r=r_micro.avg, f=f_micro.avg, mAP=mAP_micro.avg, ) bar.next() bar.finish() return losses.avg, acc.avg
def main(): # global args args = parser.parse_args() # <editor-fold desc="Initialization"> if args.comment == "test": print("WARNING: name is test!!!\n\n") # now = datetime.datetime.now() # current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge", "NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0 #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize((.5,.5,.5), # (.5, .5, .5)) transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.word_embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # Freeze weighs if args.fixed_embeddings == "true": glove_emb.weight.requires_grad = False # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # </editor-fold> # <editor-fold desc="Network Initialization"> print("Setting up the trainer...") model_trainer = trainer(args, glove_emb, vocab) # <\editor-fold desc="Network Initialization"> for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) for i, (images, captions, lengths) in enumerate(data_loader): if i == len(data_loader) - 1: break images = to_var(images) captions = to_var(captions) lengths = to_var( torch.LongTensor(lengths)) # print(captions.size()) img_rc_loss, txt_rc_loss = model_trainer.train( images, captions, lengths, not i % args.image_save_interval) txt_losses.update(txt_rc_loss.data[0], args.batch_size) img_losses.update(img_rc_loss.data[0], args.batch_size) # cm_losses.update(cm_loss.data[0], args.batch_size) batch_time.update(time.time() - end) end = time.time() # plot progress bar_suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, ) bar.next() # </editor-fold desc = "Logging"> bar.finish() model_trainer.save_losses(epoch, img_losses.avg, txt_losses.avg) model_trainer.save_models(epoch)
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): #sample one sentences from the database (yelp) self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) print(np.shape(trainExamples)) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) #pwins, nwins, draws = arena.playGames(self.args.arenaCompare) finalScore1, finalScore2 = arena.playGames(self.args.arenaCompare) with open("output.txt", "a") as text_file: text_file.write('Score NN1 : %.2f ; Score NN2 : %.2f\n' % (finalScore1, finalScore1)) print('Score NN1 : %.2f ; Score NN2 : %.2f' % (finalScore1, finalScore1)) if finalScore1 > finalScore2: #and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def train(self, iteration=None, board=None, numeps=None): # bookkeeping # examples of the iteration numeps = self.args.numEps if not self.skipFirstSelfPlay or iteration > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=numeps) end = time.time() #for clif_state in self.board for eps in range(numeps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree if board is None: iterationTrainExamples += self.executeEpisode() else: iterationTrainExamples += self.executeEpisode(board) #print iterationTrainExamples[0] # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=numeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(iteration - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args.checkpoint, filename=self.getCheckpointFile(iteration)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ iterHistory = {'ITER': [], 'ITER_DETAIL': [], 'PITT_RESTULT': []} for i in range(1, self.args.numIters + 1): iterHistory['ITER'].append(i) # bookkeeping print( '###########################ITER:{}###########################' .format(str(i))) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() if self.display == 1: bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # print("{}th Episode:".format(eps+1)) self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() if self.display == 1: bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if self.display == 1: bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) trainLog = self.nnet.train(trainExamples) if self.keepLog: trainLog.to_csv(self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) iterHistory['ITER_DETAIL'].append( self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') iterHistory['PITT_RESTULT'].append('R') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') iterHistory['PITT_RESTULT'].append('A') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') pd.DataFrame(data=iterHistory).to_csv(self.logPath + 'ITER_LOG.csv')
def train(trainloader, model, criterion, optimizer, epoch, sample_wts, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = [AverageMeter() for i in range(args.n_heads)] losses_avg = AverageMeter() top1 = [AverageMeter() for i in range(args.n_heads)] top5 = [AverageMeter() for i in range(args.n_heads)] top1_avg = AverageMeter() top5_avg = AverageMeter() end = time.time() bar = Bar('Processing', max=len(trainloader)) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time # print('.', end='') data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) optimizer.zero_grad() for head_idx in range(args.n_heads): loss = criterion(outputs[head_idx], targets) loss = (loss * sample_wts[head_idx][:loss.shape[0]] / sample_wts[head_idx][:loss.shape[0]].sum()).sum() # measure accuracy and record loss prec1, prec5 = accuracy(outputs[head_idx].data, targets.data, topk=(1, 5)) if float(torch.__version__[:3]) < 0.5: losses[head_idx].update(loss.data[0], inputs.size(0)) top1[head_idx].update(prec1[0], inputs.size(0)) top5[head_idx].update(prec5[0], inputs.size(0)) else: losses[head_idx].update(loss.data, inputs.size(0)) top1[head_idx].update(prec1, inputs.size(0)) top5[head_idx].update(prec5, inputs.size(0)) # compute gradient and do SGD step loss.backward(retain_graph=True) losses_avg.update( sum([h.avg for h in losses]) / len(losses), inputs.size(0)) top1_avg.update(sum([h.avg for h in top1]) / len(top1), inputs.size(0)) top5_avg.update(sum([h.avg for h in top5]) / len(top5), inputs.size(0)) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_avg: {loss:.4f} | top1_avg: {top1: .4f} | top5_avg: {top5: .4f}'.format( batch=batch_idx + 1, size=len(trainloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses_avg.avg, top1=top1_avg.avg, top5=top5_avg.avg, ) bar.next() bar.finish() wandb.log( { "top1": [h.avg for h in top1], "top1_avg": top1_avg.avg, "top5": [h.avg for h in top5], "top5_avg": top5_avg.avg, "losses": [h.avg for h in losses], "losses_avg": losses_avg.avg }, step=epoch) return (losses_avg.avg, top1_avg.avg)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ trainExamples = deque([], maxlen=self.args.maxlenOfQueue) for i in range(self.args.numIters): # bookkeeping print('------ITER ' + str(i + 1) + '------') eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): trainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pnet = self.nnet.__class__(self.game) pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins)) if float(nwins) / (pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet = pnet else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
def test(testloader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=len(testloader)) with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # compute output outputs = model(inputs) outputs_sum = outputs[0].cuda() for i in range(1, args.n_heads): outputs_sum = torch.add(outputs_sum, outputs[i].cuda()) outputs = outputs_sum / args.n_heads loss = criterion(outputs, targets) loss = torch.mean(loss) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) if float(torch.__version__[:3]) < 0.5: losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) else: losses.update(loss.data, inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(testloader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() wandb.log( { "top1 test": top1.avg, "top5 test": top5.avg, "losses test": losses.avg }, step=epoch) return (losses.avg, top1.avg)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ #Generate a fixed sensing matrix if option is toggled to True. #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. #2)the folder which saves the fixed sensing matrix is empty if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') self.arena_game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type'] self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix #Save the fixed matrix self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- for i in range(1, self.args['numIters'] + 1): print('------ITER ' + str(i) + '------') if not self.skipFirstSelfPlay or i > 1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples iterationTrainExamples = deque( [], maxlen=self.args['maxlenOfQueue']) #bookkeeping objects contained in pytorch_classification.utils eps_time = AverageMeter() bar = Bar('Self Play', max=self.args['numEps']) end = time.time() #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. #----------------------------------------------------- for eps in range(self.args['numEps']): #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode() if self.args[ 'fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. self.game_args.generateSensingMatrix( self.args['m'], self.args['n'], self.args['matrix_type'] ) #generate a new sensing matrix self.game_args.generateNewObsVec( self.args['x_type'], self.args['sparsity'] ) #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!! self.mcts = MCTS( self.game, self.nnet, self.args, self.game_args, self.skip_nnet ) #create new search tree for each game we play #TESTING------------------------- #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter)) #-------------------------------- #TESTING-------------------------- #print('Starting self-play game iteration: ' + str(eps)) #start_game = time.time() #-------------------------------- iterationTrainExamples += self.executeEpisode( ) #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game #TESTING-------------------------- #end_game = time.time() #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game)) #----------------------------------------------------- # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below. if len(self.trainExamplesHistory ) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file by calling saveTrainExamples method # The examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples( i - 1 ) #save examples to self.args['checkpoint'] folder with given iteration name of i-1 # shuffle examples before training #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques, #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training #samples in a single list, shuffled trainExamples = [] for e in self.trainExamplesHistory: #Each e is a deque trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train( trainExamples[0], trainExamples[1] ) #Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( self.pnet, self.nnet, self.game, self.args, self.arena_game_args ) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') else: #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('trainExamples feature arrays: ' + str(trainExamples[0])) #print('trainExamples label arrays: ' + str(trainExamples[1])) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder=self.args['network_checkpoint'], filename='trainHistDict' + str(i - 1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # shuffle(np.transpose(trainExamples ,(0,2,3,1))) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) # print(trainExamples ,np.shape(trainExamples)) loss = self.nnet.train(trainExamples) print(loss, "loosss") losses = np.load("losses_array.npy") self.losses = np.hstack( (losses, [[sum(loss[0]) / len(loss[0])], [sum(loss[1]) / len(loss[1])], [(sum(loss[0]) + sum(loss[1])) / len(loss[0])]])) # clear_output(wait=True) print("================================================") print(self.losses) plt.plot(self.losses[2], 'k') plt.plot(self.losses[1], 'k:') plt.plot(self.losses[0], 'k--') plt.legend([ 'train_overall_loss', 'train_value_loss', 'train_policy_loss' ], loc='lower left') display.clear_output(wait=True) display.display(pl.gcf()) pl.gcf().clear() time.sleep(1.0) print('\n') np.save("losses_array.npy", self.losses) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') # self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ learn_time = time.time() for i in range(1, self.args.numIters + 1): # bookkeeping learn_iter_time = time.time() print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') #self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.nnet.train(trainExamples) print('Following is played by PureNNt:') purenntplayer = PureNNtPlayer(self.game, self.nnet, self.args, temp=0).play arena = Arena(purenntplayer, self.game) print('PureNNt Real Performance:', arena.playGames(self.args.arenaCompare)) print('Following is played by NNtBasedMCTS:') nntbasedmctsplayer = NNtBasedMCTSPlayer( self.game, self.nnet, self.args, temp=0, percentile=self.r_percentile).play arena = Arena(nntbasedmctsplayer, self.game) print('NNtBasedMCTS Real Performance:', arena.playGames(self.args.arenaCompare)) print('ACCEPTING NEW MODEL DIRECTLY') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ if self.log_summary: tf.gfile.MakeDirs(FLAGS_log_dir) summary_writer = tf.summary.FileWriter( FLAGS_log_dir + ('/train/iter-%02d' % self.global_iter), self.nnet.graph) self.global_iter += 1 for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) if self.log_summary: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary_str, _ = self.sess.run( [self.nnet.summary_merged, self.nnet.train_step], feed_dict=input_dict, options=run_options, run_metadata=run_metadata) summary_writer.add_run_metadata( run_metadata, 'step%05d' % self.global_batch_idx) summary_writer.add_summary(summary_str, self.global_batch_idx) else: self.sess.run(self.nnet.train_step, feed_dict=input_dict) # record loss pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 self.global_batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish() if self.log_summary: summary_writer.close()
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def train(model, optimizer, epoch, di, args, loss_criterion): model.train() # switch to train mode batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() end = time.time() bar = Bar('Processing', max=args.batches_per_epoch) batch_idx = 0 while batch_idx < args.batches_per_epoch: # sample batch if args.encoder_type == 'transformer': sent1, sent1_posembinput, sent2, sent2_posembinput, targets = \ di.sample_train_batch(use_cuda=args.cuda) unsort1, unsort2 = None, None encoder_init_hidden = None elif args.encoder_type == 'rnn': sent1, sent2, unsort1, unsort2, targets = di.sample_train_batch( encoder_embed=model.embed, decoder_embed=model.embed, use_cuda=args.cuda, ) sent1_posembinput, sent2_posembinput = None, None encoder_init_hidden = model.encoder.initHidden( batch_size=args.batch_size) elif args.encoder_type == 'decomposable': sent1, sent2, targets = \ di.sample_train_batch(use_cuda=args.cuda) unsort1, unsort2 = None, None encoder_init_hidden = None if args.cuda: model = model.cuda() targets = targets.cuda(async=True) if args.encoder_type == 'transformer': sent1 = sent1.cuda() sent2 = sent2.cuda() sent1_posembinput = sent1_posembinput.cuda() sent2_posembinput = sent2_posembinput.cuda() elif args.encoder_type == 'decomposable': sent1 = sent1.cuda() sent2 = sent2.cuda() if args.encoder_type == 'rnn': if len(encoder_init_hidden): encoder_init_hidden = [ x.cuda() for x in encoder_init_hidden ] else: encoder_init_hidden = encoder_init_hidden.cuda() loss_criterion = loss_criterion.cuda() # measure data loading timeult data_time.update(time.time() - end) # compute output if args.encoder_type == 'decomposable': softmax_outputs = model( sent1=sent1, sent2=sent2, ) else: softmax_outputs = model( encoder_init_hidden=encoder_init_hidden, encoder_input=sent1, encoder_pos_emb_input=sent1_posembinput, encoder_unsort=unsort1, decoder_input=sent2, decoder_pos_emb_input=sent2_posembinput, decoder_unsort=unsort2, batch_size=args.batch_size, ) loss = loss_criterion(softmax_outputs, targets) # measure accuracy and record loss acc_batch = compute_accuracy( outputs=softmax_outputs.data, targets=targets.data, ) acc.update(acc_batch, args.batch_size) losses.update(loss.data[0], len(sent1)) # compute gradient optimizer.zero_grad() loss.backward() if args.encoder_type == 'decomposable': grad_norm = 0. para_norm = 0. for m in model.modules(): if isinstance(m, nn.Linear): grad_norm += m.weight.grad.data.norm()**2 para_norm += m.weight.data.norm()**2 if m.bias is not None: grad_norm += m.bias.grad.data.norm()**2 para_norm += m.bias.data.norm()**2 grad_norm**0.5 para_norm**0.5 shrinkage = args.max_norm / grad_norm if shrinkage < 1: for m in model.modules(): if isinstance(m, nn.Linear): m.weight.grad.data = m.weight.grad.data * shrinkage # optimizer step optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s'\ '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f}'\ .format( batch=batch_idx, size=args.batches_per_epoch, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acc.avg, ) bar.next() bar.finish() return losses.avg, acc.avg
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == 1: oneWon += 1 elif gameResult == -1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == -1: oneWon += 1 elif gameResult == 1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def train_squad(model, optimizer, epoch, di, args, loss_criterion): model.train() # switch to train mode batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() end = time.time() bar = Bar('Processing', max=args.batches_per_epoch) batch_idx = 0 while batch_idx < args.batches_per_epoch: # sample batch if args.encoder_type == 'rnn': ( a1_packed_tensor, a1_idx_unsort, a2_packed_tensor, a2_idx_unsort, q_packed_tensor, q_idx_unsort, targets, ) = di.sample_train_batch( encoder_embed=model.embed, decoder_embed=model.embed, use_cuda=args.cuda, ) encoder_init_hidden = model.encoder.initHidden( batch_size=args.batch_size) else: raise Exception("{} not supported".format(args.encoder_type)) if args.cuda: model = model.cuda() targets = targets.cuda(async=True) if len(encoder_init_hidden): encoder_init_hidden = [x.cuda() for x in encoder_init_hidden] else: encoder_init_hidden = encoder_init_hidden.cuda() loss_criterion = loss_criterion.cuda() # measure data loading timeult data_time.update(time.time() - end) # compute output softmax_outputs = model( a1_packed_tensor=a1_packed_tensor, a1_idx_unsort=a1_idx_unsort, a2_packed_tensor=a2_packed_tensor, a2_idx_unsort=a2_idx_unsort, q_packed_tensor=q_packed_tensor, q_idx_unsort=q_idx_unsort, encoder_init_hidden=encoder_init_hidden, batch_size=args.batch_size, ) loss = loss_criterion(softmax_outputs, targets) # measure accuracy and record loss acc_batch = compute_accuracy( outputs=softmax_outputs.data, targets=targets.data, ) acc.update(acc_batch, args.batch_size) losses.update(loss.data[0], args.batch_size) # compute gradient optimizer.zero_grad() loss.backward() # optimizer step optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = \ '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | '\ 'Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | '\ 'Acc: {acc:.3f}'\ .format( batch=batch_idx, size=args.batches_per_epoch, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acc.avg, ) bar.next() bar.finish() return losses.avg, acc.avg
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximum length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) print('PITTING AGAINST PREVIOUS VERSION - RANDOM') #arena = Arena(lambda xt: np.argmax(pmcts.getActionProb(xt, temp=0)), # lambda xt: np.argmax(pmcts.getActionProb(xt, temp=0)), self.game) n_actions = self.game.n * self.game.n + 1 arena = Arena( lambda xt: np.random.choice( n_actions, 1, p=pmcts.getActionProb(xt, temp=0.2)), lambda xt: np.random.choice( n_actions, 1, p=pmcts.getActionProb(xt, temp=-0.2)), self.game) arena.display = self.game.display for i in range(7): arena.playGame(verbose=False, rnd=0) print() for i in range(7): arena.playGame(verbose=False, rnd=0) print() for i in range(7): arena.playGame(verbose=False, rnd=0) print() for i in range(7): arena.playGame(verbose=False, rnd=0) exit(0) for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( lambda x: np.argmax(pmcts.getActionProb(x, temp=0.3)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0.3)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) print('PITTING AGAINST PREVIOUS VERSION - RANDOM') arena = Arena( lambda x: np.argmax(pmcts.getActionProb(x, temp=0.3)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0.3)), self.game) for i in range(24): arena.playGame(verbose=False, rnd=4 * i) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def test(model, epoch, di, args, loss_criterion): global best_acc # switch to evaluate mode model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() end = time.time() bar = Bar('Processing', max=args.test_batches_per_epoch) batch_idx = 0 while batch_idx < args.test_batches_per_epoch: # sample batch if args.encoder_type == 'transformer': sent1, sent1_posembinput, sent2, sent2_posembinput, targets = \ di.sample_dev_batch(use_cuda=args.cuda) unsort1, unsort2 = None, None encoder_init_hidden = None elif args.encoder_type == 'rnn': sent1, sent2, unsort1, unsort2, targets = di.sample_dev_batch( encoder_embed=model.embed, decoder_embed=model.embed, use_cuda=args.cuda, ) sent1_posembinput, sent2_posembinput = None, None encoder_init_hidden = model.encoder.initHidden( batch_size=args.batch_size) elif args.encoder_type == 'decomposable': sent1, sent2, targets = \ di.sample_dev_batch(use_cuda=args.cuda) unsort1, unsort2 = None, None encoder_init_hidden = None if args.cuda: model = model.cuda() targets = targets.cuda(async=True) if args.encoder_type == 'transformer': sent1 = sent1.cuda() sent2 = sent2.cuda() sent1_posembinput = sent1_posembinput.cuda() sent2_posembinput = sent2_posembinput.cuda() elif args.encoder_type == 'decomposable': sent1 = sent1.cuda() sent2 = sent2.cuda() if args.encoder_type == 'rnn': if len(encoder_init_hidden): encoder_init_hidden = [ x.cuda() for x in encoder_init_hidden ] else: encoder_init_hidden = encoder_init_hidden.cuda() # measure data loading time data_time.update(time.time() - end) # compute output if args.encoder_type == 'decomposable': softmax_outputs = model( sent1=sent1, sent2=sent2, ) else: softmax_outputs = model( encoder_init_hidden=encoder_init_hidden, encoder_input=sent1, encoder_pos_emb_input=sent1_posembinput, encoder_unsort=unsort1, decoder_input=sent2, decoder_pos_emb_input=sent2_posembinput, decoder_unsort=unsort2, batch_size=args.batch_size, ) loss = loss_criterion(softmax_outputs, targets) # measure accuracy and record loss acc_batch = compute_accuracy( outputs=softmax_outputs.data, targets=targets.data, ) acc.update(acc_batch, args.batch_size) losses.update(loss.data[0], len(sent1)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s'\ '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f}'\ .format( batch=batch_idx, size=args.test_batches_per_epoch, data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, acc=acc.avg, ) bar.next() bar.finish() return losses.avg, acc.avg
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') print(str(self.game.innerN) + "x" + str(self.game.innerM)) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts = MCTS(self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) tempfile = 'temp.pth.tar' bestfile = 'best.pth.tar' # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile) self.nnet.train(trainExamples) if self.arenaEnabled: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) pmcts = MCTS(self.pnet, self.args) nmcts = MCTS(self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0), lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ epochswin = [] # count the number of wins at every epoch of the network against the preceding version epochdraw = [] # count the number of draws at every epoch of the network against the preceding version epochswingreedy = [] # count the number of wins against greedy at every epoch epochswinrandom = [] # count the number of wins against random at every epoch epochsdrawgreedy = [] # count the number of draws against greedy at every epoch epochsdrawrandom = [] # count the number of wins against random at every epoch epochswinminmax = [] # count the number of wins against minmax at every epoch epochsdrawminmax = [] # count the number of draws against minmax at every epoch begining=1 if self.args.load_model == True: file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ".txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswin.append(word) elif index == 1: epochdraw.append(word) file.close() file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str( self.args.numEps) + ":dim" + str(self.game.n) + ":greedyrandom.txt", "r+") lines = file.readlines() for index, line in enumerate(lines): for word in line.split(): if index == 0: epochswingreedy.append(word) elif index == 1: epochsdrawgreedy.append(word) elif index == 2: epochswinrandom.append(word) elif index == 3: epochsdrawrandom.append(word) elif index == 4: epochswinminmax.append(word) elif index == 5: epochsdrawminmax.append(word) file.close() self.loadTrainExamples() file=open(self.args.trainExampleCheckpoint+"loopinformation","r+") lines=file.readlines() begining=lines[0] file.close() for i in range(int(begining), self.args.numIters + 1): fileLoopInformation = open(self.args.trainExampleCheckpoint + "loopinformation", "w+") fileLoopInformation.write(str(i)) fileLoopInformation.close() # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \ ":dim" + str(self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" print("path with filename "+filename) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) exists = os.path.isfile(filenameBest) if exists: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) filenameCurrent="currentforprocess:temp:iter" + str(self.args.numIters) + \ ":eps" + str(self.args.numEps) + ":dim" + str(self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint,filename=filenameCurrent) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game,nmcts,pmcts,evaluate=True, name=self.args.name) pwins, nwins, draws = arena.playGames(self.args.arenaCompare, False) pmcts.clear() nmcts.clear() del pmcts del nmcts print(' ') print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if i == 1: epochswin.append(pwins) epochdraw.append(0) epochswin.append(nwins) epochdraw.append(draws) self.writeLogsToFile(epochswin, epochdraw) ''' Get all the players and then pit them against the network. You need to modify here if you implement more players ''' (gp, rp, mp) = self.decidePlayers() if self.args.parallel == 0: nmcts1 = MCTS(self.game, self.nnet, self.args) nmcts2 = MCTS(self.game, self.nnet, self.args) nmcts3 = MCTS(self.game, self.nnet, self.args) arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, self.game,nmcts1 ,name=self.args.name) arenarandom = Arena(lambda x: np.argmax(nmcts2.getActionProb(x, temp=0)), rp, self.game,nmcts2 ,name=self.args.name) arenaminmax = Arena(lambda x: np.argmax(nmcts3.getActionProb(x, temp=0)), mp, self.game,nmcts3, evaluate=True,name=self.args.name) pwinsminmax, nwinsminmax, drawsminmax = arenaminmax.playGames(self.args.arenaCompare) print("minmax - "+str(pwinsminmax)+" "+str(nwinsminmax)+" "+str(drawsminmax)) pwinsgreedy, nwinsgreedy, drawsgreedy = arenagreedy.playGames(self.args.arenaCompare) print("greedy - "+str(pwinsgreedy)+" "+str(nwinsgreedy)+" "+str(drawsgreedy)) pwinsreandom, nwinsrandom, drawsrandom = arenarandom.playGames(self.args.arenaCompare) print("random - "+str(pwinsreandom)+" "+str(nwinsrandom)+" "+str(drawsrandom)) nmcts1.clear() nmcts2.clear() nmcts3.clear() del nmcts1 del nmcts2 del nmcts3 else: ''' This will be used if you want to evaluate the network against the benchmarks in a parallel way ''' self.args.update({'index': str(i)}) p = self.parallel(self.args.arenaCompare) (pwinsminmax, nwinsminmax, drawsminmax) = p[0] # self.parallel("minmax", self.args.arenaCompare) (pwinsgreedy, nwinsgreedy, drawsgreedy) = p[1] # self.parallel("greedy",self.args.arenaCompare) (pwinsreandom, nwinsrandom, drawsrandom) = p[2] # self.parallel("random",self.args.arenaCompare) epochsdrawgreedy.append(drawsgreedy) epochsdrawrandom.append(drawsrandom) epochswinrandom.append(pwinsreandom) epochswingreedy.append(pwinsgreedy) epochswinminmax.append(pwinsminmax) epochsdrawminmax.append(drawsminmax) self.writeLogsToFile(epochswingreedy, epochsdrawgreedy, epochswinrandom, epochsdrawrandom, epochswinminmax, epochsdrawminmax, training=False) if pwins + nwins == 0 or float(nwins) / (pwins + nwins) <= self.args.updateThreshold: print('REJECTING NEW MODEL') filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" exists = os.path.isfile(filenameBest) if exists: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest) else: self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filename) else: print('ACCEPTING NEW MODEL') filename = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str( self.game.n) + ".pth.tar" self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename) self.mcts.clear() del self.mcts self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True) # reset search tree self.writeLogsToFile(epochswin, epochdraw, training=True)