def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num/2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==1: oneWon+=1 elif gameResult==-1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult==-1: oneWon+=1 elif gameResult==1: twoWon+=1 else: draws+=1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True} # measure data loading time data_time.update(time.time() - end) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
eps = 0 while test_list: a = np.random.binomial(1, 0.5) b = np.random.binomial(1, 0.5) if a == 1 and b == 1: test_list.pop() test_list.pop() eps_completed += 2 bar.suffix = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list) bar.next() #bar.suffix = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list) time.sleep(3) elif a == 0 and b == 0: test_list.pop() eps_completed += 1 #bar.suffix only controls the suffix output only bar.suffix = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list) #bar.next() controls the drawing of the completion bar only bar.next() #bar.suffix = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list) time.sleep(3) bar.finish()
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters(), lr=self.lr) for epoch in range(self.epochs): print('EPOCH ::: ' + str(epoch + 1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / self.batch_size)) batch_idx = 0 while batch_idx < int(len(examples) / self.batch_size): sample_ids = np.random.randint(len(examples), size=self.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) fronts, mids, backs, cards = [], [], [], [] for b in boards: fronts.append(b[0]) mids.append(b[1]) backs.append(b[2]) cards.append(b[3]) fronts = [b[0] for b in boards] mids = [b[1] for b in boards] backs = [b[2] for b in boards] cards = [b[3] for b in boards] fronts = torch.FloatTensor(fronts).to(device) mids = torch.FloatTensor(mids).to(device) backs = torch.FloatTensor(backs).to(device) cards = torch.FloatTensor(cards).to(device) target_pis = torch.FloatTensor(np.array(pis)).to(device) target_vs = torch.FloatTensor(np.array(vs).astype( np.float64)).to(device) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(fronts, mids, backs, cards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.item(), fronts.size(0)) v_losses.update(l_v.item(), fronts.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / self.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda( ), target_pis.contiguous().cuda(), target_vs.contiguous( ).cuda() boards, target_pis, target_vs = Variable(boards), Variable( target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output #print(boards.size()) out_pi = self.nnet(boards) out_v = self.vnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() l_pi.backward() optimizer.step() optimizer.zero_grad() l_v.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 # self.sess.run(tf.local_variables_initializer()) while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) # predict and compute gradient and do SGD step input_dict = { self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True } # measure data loading time data_time.update(time.time() - end) # print("dimension of boards: ", len(boards[0])) # print("pis l: ", len(pis[0])) # print("pis t: ", type(pis)) # record loss self.sess.run(self.nnet.train_step, feed_dict=input_dict) # print("n1") pi_loss, v_loss = self.sess.run( [self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict) pi_losses.update(pi_loss, len(boards)) v_losses.update(v_loss, len(boards)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples) / args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def main(): # global args args = parser.parse_args() # <editor-fold desc="Initialization"> if args.comment == "test": print("WARNING: name is test!!!\n\n") # now = datetime.datetime.now() # current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE", "Cosine", "Hinge", "NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE", "Cosine", "Hinge"), 'Invalid Loss Function' assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0 mask = int(args.common_emb_ratio * args.hidden_size) cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False if args.load_model == "NONE": keep_loading = False # model_path = args.model_path + current_date + "/" model_path = args.model_path + args.comment + "/" else: keep_loading = True model_path = args.model_path + args.load_model + "/" result_path = args.result_path if result_path == "NONE": result_path = model_path + "results/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) inv_normalize = transforms.Normalize( mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255], std=[1 / 0.229, 1 / 0.224, 1 / 0.255]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.word_embedding_size emb_path = args.embedding_path if args.embedding_path[-1] == '/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) # glove_emb = Embeddings(emb_size,len(vocab.word2idx),vocab.word2idx["<pad>"]) # glove_emb.word_lut.weight.data.copy_(emb) # glove_emb.word_lut.weight.requires_grad = False glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # </editor-fold> # <editor-fold desc="Network Initialization"> print("Setting up the Networks...") vae_Txt = SentenceVAE(glove_emb, len(vocab), hidden_size=args.hidden_size, latent_size=args.latent_size, batch_size=args.batch_size) vae_Img = ImgVAE(img_dimension=args.crop_size, hidden_size=args.hidden_size, latent_size=args.latent_size) if cuda: vae_Txt = vae_Txt.cuda() vae_Img = vae_Img.cuda() # </editor-fold> # <editor-fold desc="Losses"> # Losses and Optimizers print("Setting up the Objective Functions...") img_criterion = nn.MSELoss() # txt_criterion = nn.MSELoss(size_average=True) if args.text_criterion == 'MSE': txt_criterion = nn.MSELoss() elif args.text_criterion == "Cosine": txt_criterion = nn.CosineEmbeddingLoss(size_average=False) elif args.text_criterion == "NLLLoss": txt_criterion = nn.NLLLoss() else: txt_criterion = nn.HingeEmbeddingLoss(size_average=False) if args.cm_criterion == 'MSE': cm_criterion = nn.MSELoss() elif args.cm_criterion == "Cosine": cm_criterion = nn.CosineEmbeddingLoss() else: cm_criterion = nn.HingeEmbeddingLoss() if cuda: img_criterion = img_criterion.cuda() txt_criterion = txt_criterion.cuda() cm_criterion = cm_criterion.cuda() # txt_criterion = nn.CrossEntropyLoss() # </editor-fold> # <editor-fold desc="Optimizers"> print("Setting up the Optimizers...") img_optim = optim.Adam(vae_Img.parameters(), lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) txt_optim = optim.Adam(vae_Txt.parameters(), lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001) # </editor-fold desc="Optimizers"> train_images = True # Reverse 2 step = 0 for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() txt_losses = AverageMeter() img_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) if keep_loading: suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl" try: vae_Img.load_state_dict( torch.load( os.path.join(args.model_path, 'vae-img' + suffix))) vae_Txt.load_state_dict( torch.load( os.path.join(args.model_path, 'vae-txt' + suffix))) except FileNotFoundError: print("Didn't find any models switching to training") keep_loading = False if not keep_loading: # Set training mode vae_Txt.train() vae_Img.train() # </editor-fold desc = "Epoch Initialization"? # train_images = not train_images for i, (images, captions, lengths) in enumerate(data_loader): if i == len(data_loader) - 1: break # <editor-fold desc = "Training Parameters Initiliazation"? # Set mini-batch dataset images = to_var(images) captions = to_var(captions) # captions = captions.transpose(0,1).unsqueeze(2) lengths = to_var( torch.LongTensor(lengths)) # print(captions.size()) # Forward, Backward and Optimize img_optim.zero_grad() txt_optim.zero_grad() # </editor-fold desc = "Training Parameters Initiliazation"? # <editor-fold desc = "Forward passes"? img_out, img_mu, img_logv, img_z = vae_Img(images) txt_out, txt_mu, txt_logv, txt_z = vae_Txt(captions, lengths) img_rc_loss = img_vae_loss( img_out, images, img_mu, img_logv) / (args.batch_size * args.crop_size**2) NLL_loss, KL_loss, KL_weight = seq_vae_loss( txt_out, captions, lengths, txt_mu, txt_logv, "logistic", step, 0.0025, 2500) txt_rc_loss = (NLL_loss + KL_weight * KL_loss) / torch.sum(lengths).float() cm_loss = crossmodal_loss(txt_z, img_z, mask, args.cm_criterion, cm_criterion, args.negative_samples, epoch) # cm_loss += crossmodal_loss(txt_logv, img_logv, mask, # args.cm_criterion, cm_criterion, # args.negative_samples, epoch) # Computes the loss to be back-propagated img_loss = img_rc_loss * ( 1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight txt_loss = txt_rc_loss * ( 1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight # txt_loss = txt_rc_loss + cm_loss * args.cm_loss_weight # img_loss = img_rc_loss + cm_loss * args.cm_loss_weight txt_losses.update(txt_rc_loss.data[0], args.batch_size) img_losses.update(img_rc_loss.data[0], args.batch_size) cm_losses.update(cm_loss.data[0], args.batch_size) # </editor-fold desc = "Loss accumulation"? # <editor-fold desc = "Back Propagation"> # Half of the times we update one pipeline the others the other one if train_images: # Image Network Training and Backpropagation img_loss.backward() img_optim.step() else: # Text Nextwork Training & Back Propagation txt_loss.backward() txt_optim.step() step += 1 # train_images = not train_images # </editor-fold desc = "Back Propagation"> # <editor-fold desc = "Logging"> if i % args.image_save_interval == 0: subdir_path = os.path.join( result_path, str(i / args.image_save_interval)) if os.path.exists(subdir_path): pass else: os.makedirs(subdir_path) for im_idx in range(3): # im_or = (inv_normalize([im_idx]).cpu().data.numpy().transpose(1,2,0))*255 # im = (inv_normalize([im_idx]).cpu().data.numpy().transpose(1,2,0))*255 im_or = (images[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 im = (img_out[im_idx].cpu().data.numpy().transpose( 1, 2, 0) / 2 + .5) * 255 # im = img_out[im_idx].cpu().data.numpy().transpose(1,2,0)*255 filename_prefix = os.path.join(subdir_path, str(im_idx)) scipy.misc.imsave(filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave(filename_prefix + '.A.jpg', im) txt_or = " ".join([ vocab.idx2word[c] for c in captions[im_idx].cpu().data.numpy() ]) _, generated = torch.topk(txt_out[im_idx], 1) txt = " ".join([ vocab.idx2word[c] for c in generated[:, 0].cpu().data.numpy() ]) with open(filename_prefix + "_captions.txt", "w") as text_file: text_file.write("Original: %s\n" % txt_or) text_file.write("Generated: %s" % txt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() # </editor-fold desc = "Logging"> bar.finish() # <editor-fold desc = "Saving the models"? # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save( vae_Img.state_dict(), os.path.join(model_path, 'vae-img-%d-' % (epoch + 1)) + ".pkl") torch.save( vae_Txt.state_dict(), os.path.join(model_path, 'vae-txt-%d-' % (epoch + 1)) + ".pkl") # </editor-fold desc = "Saving the models"? if args.validate == "true": validate(vae_Img, vae_Txt, val_loader, mask, 10)
def train(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ optimizer = optim.Adam(self.nnet.parameters()) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch+1)) self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples)/args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) boards = torch.FloatTensor(np.array(boards).astype(np.float64)) target_pis = torch.FloatTensor(np.array(pis)) target_vs = torch.FloatTensor(np.array(vs).astype(np.float64)) # predict if args.cuda: boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda() boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_losses.update(l_pi.data[0], boards.size(0)) v_losses.update(l_v.data[0], boards.size(0)) # compute gradient and do SGD step optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def main(): # global args args = parser.parse_args() # <editor-fold desc="Initialization"> now = datetime.datetime.now() current_date = now.strftime("%m-%d-%H-%M") assert args.text_criterion in ("MSE","Cosine","Hinge","NLLLoss"), 'Invalid Loss Function' assert args.cm_criterion in ("MSE","Cosine","Hinge"), 'Invalid Loss Function' mask = int(args.common_emb_percentage * args.hidden_size) assert mask <= args.hidden_size cuda = args.cuda if cuda == 'true': cuda = True else: cuda = False if args.load_model == "NONE": keep_loading = True model_path = args.model_path + current_date + "/" else: keep_loading = False model_path = args.model_path + args.load_model + "/" result_path = args.result_path if result_path == "NONE": result_path = model_path + "results/" if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_path): os.makedirs(model_path) #</editor-fold> # <editor-fold desc="Image Preprocessing"> # Image preprocessing //ATTENTION # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) #</editor-fold> # <editor-fold desc="Creating Embeddings"> # Load vocabulary wrapper. print("Loading Vocabulary...") with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load Embeddings emb_size = args.embedding_size emb_path = args.embedding_path if args.embedding_path[-1]=='/': emb_path += 'glove.6B.' + str(emb_size) + 'd.txt' print("Loading Embeddings...") emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size) # glove_emb = Embeddings(emb_size,len(vocab.word2idx),vocab.word2idx["<pad>"]) # glove_emb.word_lut.weight.data.copy_(emb) # glove_emb.word_lut.weight.requires_grad = False glove_emb = nn.Embedding(emb.size(0), emb.size(1)) # glove_emb = embedding(emb.size(0), emb.size(1)) # glove_emb.weight = nn.Parameter(emb) # Freeze weighs # if args.fixed_embeddings == "true": # glove_emb.weight.requires_grad = False # </editor-fold> # <editor-fold desc="Data-Loaders"> # Build data loader print("Building Data Loader For Test Set...") data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) print("Building Data Loader For Validation Set...") val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # </editor-fold> # <editor-fold desc="Network Initialization"> print("Setting up the Networks...") encoder_Txt = TextEncoder(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size) decoder_Txt = TextDecoder(glove_emb, len(vocab), num_layers=1, bidirectional=False, hidden_size=args.hidden_size) # decoder_Txt = TextDecoder(encoder_Txt, glove_emb) # decoder_Txt = DecoderRNN(glove_emb, hidden_size=args.hidden_size) encoder_Img = ImageEncoder(img_dimension=args.crop_size,feature_dimension= args.hidden_size) decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension= args.hidden_size) if cuda: encoder_Txt = encoder_Txt.cuda() decoder_Img = decoder_Img.cuda() encoder_Img = encoder_Img.cuda() decoder_Txt = decoder_Txt.cuda() # </editor-fold> # <editor-fold desc="Losses"> # Losses and Optimizers print("Setting up the Objective Functions...") img_criterion = nn.MSELoss() # txt_criterion = nn.MSELoss(size_average=True) if args.text_criterion == 'MSE': txt_criterion = nn.MSELoss() elif args.text_criterion == "Cosine": txt_criterion = nn.CosineEmbeddingLoss(size_average=False) elif args.text_criterion == "NLLLoss": txt_criterion = nn.NLLLoss() else: txt_criterion = nn.HingeEmbeddingLoss(size_average=False) if args.cm_criterion == 'MSE': cm_criterion = nn.MSELoss() elif args.cm_criterion == "Cosine": cm_criterion = nn.CosineEmbeddingLoss() else: cm_criterion = nn.HingeEmbeddingLoss() if cuda: img_criterion = img_criterion.cuda() txt_criterion = txt_criterion.cuda() cm_criterion = cm_criterion.cuda() # txt_criterion = nn.CrossEntropyLoss() # </editor-fold> # <editor-fold desc="Optimizers"> # gen_params = chain(generator_A.parameters(), generator_B.parameters()) print("Setting up the Optimizers...") # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters()) # txt_params = chain(decoder_Txt.decoder.parameters(), encoder_Txt.encoder.parameters()) # img_params = list(decoder_Img.parameters()) + list(encoder_Img.parameters()) # txt_params = list(decoder_Txt.decoder.parameters()) + list(encoder_Txt.encoder.parameters()) # ATTENTION: Check betas and weight decay # ATTENTION: Check why valid_params fails on image networks with out of memory error # img_optim = optim.Adam(img_params, lr=0.0001, betas=(0.5, 0.999), weight_decay=0.00001) # txt_optim = optim.Adam(valid_params(txt_params), lr=0.0001,betas=(0.5, 0.999), weight_decay=0.00001) img_enc_optim = optim.Adam(encoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5, 0.999), weight_decay=0.00001) img_dec_optim = optim.Adam(decoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001) txt_enc_optim = optim.Adam(valid_params(encoder_Txt.parameters()), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001) txt_dec_optim = optim.Adam(valid_params(decoder_Txt.parameters()), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001) # </editor-fold desc="Optimizers"> train_images = False # Reverse 2 for epoch in range(args.num_epochs): # <editor-fold desc = "Epoch Initialization"? # TRAINING TIME print('EPOCH ::: TRAINING ::: ' + str(epoch + 1)) batch_time = AverageMeter() txt_losses = AverageMeter() img_losses = AverageMeter() cm_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=len(data_loader)) if keep_loading: suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl" try: encoder_Img.load_state_dict(torch.load(os.path.join(args.model_path, 'encoder-img' + suffix))) encoder_Txt.load_state_dict(torch.load(os.path.join(args.model_path, 'encoder-txt' + suffix))) decoder_Img.load_state_dict(torch.load(os.path.join(args.model_path, 'decoder-img' + suffix))) decoder_Txt.load_state_dict(torch.load(os.path.join(args.model_path, 'decoder-txt' + suffix))) except FileNotFoundError: print("Didn't find any models switching to training") keep_loading = False if not keep_loading: # Set training mode encoder_Img.train() decoder_Img.train() encoder_Txt.train() decoder_Txt.train() # </editor-fold desc = "Epoch Initialization"? train_images = not train_images for i, (images, captions, lengths) in enumerate(data_loader): if i == len(data_loader)-1: break # <editor-fold desc = "Training Parameters Initiliazation"? # Set mini-batch dataset images = to_var(images) captions = to_var(captions) # target = pack_padded_sequence(captions, lengths, batch_first=True)[0] # captions, lengths = pad_sequences(captions, lengths) # images = torch.FloatTensor(images) captions = captions.transpose(0,1).unsqueeze(2) lengths = to_var(torch.LongTensor(lengths)) # print(captions.size()) # Forward, Backward and Optimize # img_optim.zero_grad() img_dec_optim.zero_grad() img_enc_optim.zero_grad() # encoder_Img.zero_grad() # decoder_Img.zero_grad() # txt_params.zero_grad() txt_dec_optim.zero_grad() txt_enc_optim.zero_grad() # encoder_Txt.encoder.zero_grad() # decoder_Txt.decoder.zero_grad() # </editor-fold desc = "Training Parameters Initiliazation"? # <editor-fold desc = "Image AE"? # Image Auto_Encoder Forward mu, logvar = encoder_Img(images) Iz = logvar # Iz = reparametrize(mu, logvar) IzI = decoder_Img(mu) img_rc_loss = img_criterion(IzI,images) # </editor-fold desc = "Image AE"? # <editor-fold desc = "Seq2Seq AE"? # Text Auto Encoder Forward # target = target[:-1] # exclude last target from inputs teacher_forcing_ratio = 0.5 encoder_hidden = encoder_Txt.initHidden(args.batch_size) input_length = captions.size(0) target_length = captions.size(0) if cuda: encoder_outputs = Variable(torch.zeros(input_length, args.batch_size, args.hidden_size).cuda()) decoder_outputs = Variable(torch.zeros(input_length, args.batch_size, len(vocab)).cuda()) else: encoder_outputs = Variable(torch.zeros(input_length, args.batch_size, args.hidden_size)) decoder_outputs = Variable(torch.zeros(input_length, args.batch_size, len(vocab))) txt_rc_loss = 0 for ei in range(input_length): encoder_output, encoder_hidden = encoder_Txt( captions[ei,:], encoder_hidden) encoder_outputs[ei] = encoder_output decoder_input = Variable(torch.LongTensor([vocab.word2idx['<start>']])).cuda()\ .repeat(args.batch_size,1) decoder_hidden = encoder_hidden use_teacher_forcing = True #if np.random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length-1): decoder_output, decoder_hidden = decoder_Txt( decoder_input, decoder_hidden) #, encoder_outputs) # txt_rc_loss += txt_criterion(decoder_output, captions[di].unsqueeze(1)) decoder_outputs[di] = decoder_output decoder_input = captions[di+1] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length-1): decoder_outputs, decoder_hidden = decoder_Txt( decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach() # detach from history as input txt_rc_loss += txt_criterion(decoder_output, captions[di]) # if decoder_input.item() == ("<end>"): # break # Check start tokens etc txt_rc_loss, _, _, _ = masked_cross_entropy( decoder_outputs[:target_length-1].transpose(0, 1).contiguous(), captions[1:,:,0].transpose(0, 1).contiguous(), lengths - 1 ) # captions = captions[:-1,:,:] # lengths = lengths - 1 # dec_state = None # Computes Cross-Modal Loss # Tz = encoder_hidden[0] Tz = encoder_output[:,0,:] txt = Tz.narrow(1,0,mask) im = Iz.narrow(1,0,mask) if args.cm_criterion == 'MSE': # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask)) cm_loss = mse_loss(txt, im) else: cm_loss = cm_criterion(txt, im, \ Variable(torch.ones(im.size(0)).cuda())) # K - Negative Samples k = args.negative_samples neg_rate = (20-epoch)/20 for _ in range(k): if cuda: perm = torch.randperm(args.batch_size).cuda() else: perm = torch.randperm(args.batch_size) # if args.criterion == 'MSE': # cm_loss -= mse_loss(txt, im[perm])/k # else: # cm_loss -= cm_criterion(txt, im[perm], \ # Variable(torch.ones(Tz.narrow(1,0,mask).size(0)).cuda()))/k # sim = (F.cosine_similarity(txt,txt[perm]) - 0.5)/2 if args.cm_criterion == 'MSE': sim = (F.cosine_similarity(txt,txt[perm]) - 1)/(2*k) # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask)) cm_loss += mse_loss(txt, im[perm], sim) else: cm_loss += neg_rate * cm_criterion(txt, im[perm], \ Variable(-1*torch.ones(txt.size(0)).cuda()))/k # cm_loss = Variable(torch.max(torch.FloatTensor([-0.100]).cuda(), cm_loss.data)) # Computes the loss to be back-propagated img_loss = img_rc_loss * (1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight txt_loss = txt_rc_loss * (1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight # txt_loss = txt_rc_loss + 0.1 * cm_loss # img_loss = img_rc_loss + cm_loss txt_losses.update(txt_rc_loss.data[0],args.batch_size) img_losses.update(img_rc_loss.data[0],args.batch_size) cm_losses.update(cm_loss.data[0], args.batch_size) # </editor-fold desc = "Loss accumulation"? # <editor-fold desc = "Back Propagation"> # Half of the times we update one pipeline the others the other one if train_images: # Image Network Training and Backpropagation img_loss.backward() # img_optim.step() img_enc_optim.step() img_dec_optim.step() else: # Text Nextwork Training & Back Propagation txt_loss.backward() # txt_optim.step() txt_enc_optim.step() txt_dec_optim.step() train_images = not train_images # </editor-fold desc = "Back Propagation"> # <editor-fold desc = "Logging"> if i % args.image_save_interval == 0: subdir_path = os.path.join( result_path, str(i / args.image_save_interval) ) if os.path.exists( subdir_path ): pass else: os.makedirs( subdir_path ) for im_idx in range(3): im_or = (images[im_idx].cpu().data.numpy().transpose(1,2,0)/2+.5)*255 im = (IzI[im_idx].cpu().data.numpy().transpose(1,2,0)/2+.5)*255 filename_prefix = os.path.join (subdir_path, str(im_idx)) scipy.misc.imsave( filename_prefix + '_original.A.jpg', im_or) scipy.misc.imsave( filename_prefix + '.A.jpg', im) txt_or = " ".join([vocab.idx2word[c] for c in list(captions[:,im_idx].view(-1).cpu().data)]) txt = " ".join([vocab.idx2word[c] for c in list(decoder_outputs[:,im_idx].view(-1).cpu().data)]) print("Original: ", txt_or) print(txt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format( batch=i, size=len(data_loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, img_l=img_losses.avg, txt_l=txt_losses.avg, cm_l=cm_losses.avg, ) bar.next() # </editor-fold desc = "Logging"> bar.finish() # <editor-fold desc = "Saving the models"? # Save the models print('\n') print('Saving the models in {}...'.format(model_path)) torch.save(decoder_Img.state_dict(), os.path.join(model_path, 'decoder-img-%d-' %(epoch+1)) + current_date + ".pkl") torch.save(encoder_Img.state_dict(), os.path.join(model_path, 'encoder-img-%d-' %(epoch+1)) + current_date + ".pkl") torch.save(decoder_Txt.state_dict(), os.path.join(model_path, 'decoder-txt-%d-' %(epoch+1)) + current_date + ".pkl") torch.save(encoder_Txt.state_dict(), os.path.join(model_path, 'encoder-txt-%d-' %(epoch+1)) + current_date + ".pkl") # </editor-fold desc = "Saving the models"? if args.validate == "true": validate(encoder_Img, encoder_Txt, val_loader, mask, 10)
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') std = 999 # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() reward_list = [] count_list = [] step_list = [] for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree example, step_count = self.executeEpisode() iterationTrainExamples += example step_list.append(step_count) reward_list.append(iterationTrainExamples[-1][2]) count_list.append(eps) # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() plt.scatter(count_list, reward_list, label='rewards_training') plt.savefig("fig/" + str(self.round) + "_rewards_" + str(i) + ".png") plt.close() #plt.scatter(count_list, step_list, label = 'steps_training') #plt.savefig("fig/"+str(self.round)+"_steps_"+str(i)+".png") #plt.close() iterationTrainExamples, std, mean = self.normalizeReward( iterationTrainExamples) # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file #self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') #self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') #pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) self.show = True #nmcts = MCTS(self.game, self.nnet, self.args) """ print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL')""" self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') if std < 100 and mean < self.game.lower / 4: print("stop traing because of identical rewards") break
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory = [] self.trainExamplesHistory.append(iterationTrainExamples) #if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") # self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) # self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.nnet.train(trainExamples) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.mcts = MCTS(self.game, self.nnet, self.args)
def validate(encoder_Img, encoder_Txt, loader, mask, limit=1000, metric="cosine"): cm_criterion = nn.CosineEmbeddingLoss() # VALIDATION TIME print('\033[92mEPOCH ::: VALIDATION ::: ') # Set Evaluation Mode encoder_Img.eval() try: encoder_Txt.encoder.eval() except AttributeError: encoder_Txt.eval() batch_time = AverageMeter() end = time.time() bar = Bar('Computing Validation Set Embeddings', max=len(loader)) cm_losses = AverageMeter() for i, (images, captions, lengths) in enumerate(loader): if i == limit: break # Set mini-batch dataset images = to_var(images) captions = to_var(captions) captions = captions.transpose(0, 1).unsqueeze(2) lengths = torch.LongTensor(lengths) _, img_emb = encoder_Img(images) try: txt_emb, _ = encoder_Txt(captions, lengths) txt_emb = txt_emb[0, :, :mask] except: encoder_hidden = encoder_Txt.initHidden(len(lengths)) for ei in range(lengths[0] - 1): encoder_output, encoder_hidden = encoder_Txt( captions[ei, :], encoder_hidden) txt_emb = txt_emb[:, 0, :mask] img_emb = img_emb[:, :mask] # current_embeddings = torch.cat( \ # (txt_emb.transpose(0,1).data,img_emb.unsqueeze(1).data) # , 1) current_embeddings = np.concatenate( \ (txt_emb.unsqueeze(0).cpu().data.numpy(),\ img_emb.unsqueeze(0).cpu().data.numpy())\ ,0) # current_embeddings = img_emb.data if i: # result_embeddings = torch.cat( \ result_embeddings = np.concatenate( \ (result_embeddings, current_embeddings) \ ,1) else: result_embeddings = current_embeddings cm_loss = cm_criterion(txt_emb, img_emb, \ Variable(torch.ones(img_emb.size(0)).cuda())) cm_losses.update(cm_loss.data[0], img_emb.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | CM_LOSS: {cm_l:.4f}'.format( batch=i, size=len(loader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, cm_l=cm_losses.avg, ) bar.next() bar.finish() a = [((result_embeddings[0][i] - result_embeddings[1][i])**2).mean() for i in range(result_embeddings.shape[0])] print("Validation MSE: ", np.mean(a)) print("Validation MSE: ", np.mean(a)) print("Computing Nearest Neighbors...") i = 0 topk = [] kss = [1, 5, 10] for k in kss: if i: print("Normalized ") result_embeddings[ 0] = result_embeddings[0] / result_embeddings[0].sum() result_embeddings[ 1] = result_embeddings[1] / result_embeddings[1].sum() # k = 5 neighbors = NearestNeighbors(k, metric='cosine') neigh = neighbors neigh.fit(result_embeddings[1]) kneigh = neigh.kneighbors(result_embeddings[0], return_distance=False) ks = set() for n in kneigh: ks.update(set(n)) print(len(ks) / result_embeddings.shape[1]) # a = [((result_embeddings[0][i] - result_embeddings[1][i]) ** 2).mean() for i in range(128)] # rs = result_embeddings.sum(2) # a = (((result_embeddings[0][0]- result_embeddings[1][0])**2).mean()) # b = (((result_embeddings[0][0]- result_embeddings[0][34])**2).mean()) topk.append(np.mean([int(i in nn) for i, nn in enumerate(kneigh)])) print( "Top-{k:},{k2:},{k3:} accuracy for Image Retrieval:\n\n\t\033[95m {tpk: .3f}% \t {tpk2: .3f}% \t {tpk3: .3f}% \n" .format(k=kss[0], k2=kss[1], k3=kss[2], tpk=100 * topk[0], tpk2=100 * topk[1], tpk3=100 * topk[2]))
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ trainExamples = deque([], maxlen=self.args.maxlenOfQueue) for i in range(self.args.numIters): # bookkeeping print('------ITER ' + str(i + 1) + '------') eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): trainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pnet = self.nnet.__class__(self.game) pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins)) if float(nwins) / (pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet = pnet else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): #for number of rounds # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque( [], maxlen=self.args.maxlenOfQueue ) #remove the previous training example eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range( self.args.numEps): #for each self-play of this rounds self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)] # v is the result selfPlayResult = self.executeEpisode() #play one game, adding the gaming history iterationTrainExamples += selfPlayResult # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) #self-play finished, updating the move history if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop( 0) #remove the oldest gaming history # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) #adding new move record shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #save the previous net self.pnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar') #read the previous net pmcts = MCTS(self.game, self.pnet, self.args) #reset previous models' mcts #using new data to train the new model self.nnet.train( trainExamples) #trin the network with new move record nmcts = MCTS(self.game, self.nnet, self.args) #rest new models' mcts #OLD VS NEW print('PITTING AGAINST PREVIOUS VERSION') # rp = RandomPlayer(self.game).play # abp2 = AbpPlayer(self.game, 1, abpDepth=2).play arena = Arena( lambda board, turn: np.argmax( pmcts.getActionProb(board, turn, temp=0)), lambda board, turn: np.argmax( nmcts.getActionProb(board, turn, temp=0)), self.game) # arena = Arena(abp2, # lambda board, turn: np.argmax(nmcts.getActionProb(board, turn, temp=0)), self.game) pwins, nwins, draws = arena.playGames( self.args.arenaCompare) #playing new mode against old models print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: #OLD WIN! print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args.checkpoint, filename='temp.pth.tar' ) #using previous mode, as it beat new model else: #NEW WIN! print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint( folder=self.args.checkpoint, filename='best.pth.tar' ) #save the new model, as this is the best
def playGames(self, num, verbose=False): """ Plays num games in which player1 starts num/2 games and player2 starts num/2 games. Returns: oneWon: games won by player1 twoWon: games won by player2 draws: games won by nobody """ eps_time = AverageMeter() bar = Bar('Arena.playGames', max=num) end = time.time() eps = 0 maxeps = int(num) num = int(num / 2) oneWon = 0 twoWon = 0 draws = 0 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == 1: oneWon += 1 elif gameResult == -1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() self.player1, self.player2 = self.player2, self.player1 for _ in range(num): gameResult = self.playGame(verbose=verbose) if gameResult == -1: oneWon += 1 elif gameResult == 1: twoWon += 1 else: draws += 1 # bookkeeping + plot progress eps += 1 eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps, maxeps=maxeps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() return oneWon, twoWon, draws
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins == 0 or float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ if self.args.load_model: start = self.args.load_folder_file[1] + 1 else: start = 1 for i in range(start, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration greedy = i == 1 and not self.args.load_model if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) num_eps = self.args.numEps if greedy: num_eps = self.args.greedy_eps eps_time = AverageMeter() bar = Bar('Self Play', max=num_eps) end = time.time() for eps in range(num_eps): if greedy: iterationTrainExamples += self.execute_greedy_episode() else: iterationTrainExamples += self.execute_episodes() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=num_eps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history if not greedy: self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i) # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) else: trainExamples = iterationTrainExamples # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') self.nnet.train(trainExamples) if not greedy: pmcts = MCTSSingle(self.game, self.pnet, self.args) nmcts = MCTSSingle(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(pmcts, nmcts, self.game, self.args) scores = arena.playGames(self.args.arenaCompare) if scores[1] == 0 or float( scores[1]) / sum(scores) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.h5') else: self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_1.h5')
def _train_custom_loop(self, examples): """ examples: list of examples, each example is of form (board, pi, v) """ from pytorch_classification.utils import Bar, AverageMeter optimizer = optimizers.Adam(alpha=args.lr) optimizer.setup(self.nnet) for epoch in range(args.epochs): print('EPOCH ::: ' + str(epoch + 1)) # self.nnet.train() data_time = AverageMeter() batch_time = AverageMeter() pi_losses = AverageMeter() v_losses = AverageMeter() end = time.time() bar = Bar('Training Net', max=int(len(examples) / args.batch_size)) batch_idx = 0 while batch_idx < int(len(examples) / args.batch_size): sample_ids = np.random.randint(len(examples), size=args.batch_size) boards, pis, vs = list(zip(*[examples[i] for i in sample_ids])) xp = self.nnet.xp boards = xp.array(boards, dtype=xp.float32) target_pis = xp.array(pis, dtype=xp.float32) target_vs = xp.array(vs, dtype=xp.float32) # measure data loading time data_time.update(time.time() - end) # compute output out_pi, out_v = self.nnet(boards) l_pi = self.loss_pi(target_pis, out_pi) l_v = self.loss_v(target_vs, out_v) total_loss = l_pi + l_v # record loss pi_loss = l_pi.data v_loss = l_v.data pi_losses.update(cuda.to_cpu(pi_loss), boards.shape[0]) v_losses.update(cuda.to_cpu(v_loss), boards.shape[0]) # compute gradient and do SGD step self.nnet.cleargrads() total_loss.backward() optimizer.update() # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_idx += 1 # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} ' \ '| Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format( batch=batch_idx, size=int(len(examples)/args.batch_size), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, lpi=pi_losses.avg, lv=v_losses.avg, ) bar.next() bar.finish()
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ #Generate a fixed sensing matrix if option is toggled to True. #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. #2)the folder which saves the fixed sensing matrix is empty if self.args['fixed_matrix'] == True: if self.args['load_existing_matrix'] == True: self.game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') self.arena_game_args.sensing_matrix = np.load( self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy') #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type'] self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix #Save the fixed matrix self.game_args.save_Matrix(self.args['fixed_matrix_filepath']) #FOR TESTING------------------------------------------------------- #print(self.game_args.sensing_matrix) #END TESTING------------------------------------------------------- for i in range(1, self.args['numIters'] + 1): print('------ITER ' + str(i) + '------') if not self.skipFirstSelfPlay or i > 1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples iterationTrainExamples = deque( [], maxlen=self.args['maxlenOfQueue']) #bookkeeping objects contained in pytorch_classification.utils eps_time = AverageMeter() bar = Bar('Self Play', max=self.args['numEps']) end = time.time() #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. #----------------------------------------------------- for eps in range(self.args['numEps']): #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode() if self.args[ 'fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. self.game_args.generateSensingMatrix( self.args['m'], self.args['n'], self.args['matrix_type'] ) #generate a new sensing matrix self.game_args.generateNewObsVec( self.args['x_type'], self.args['sparsity'] ) #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!! self.mcts = MCTS( self.game, self.nnet, self.args, self.game_args ) #create new search tree for each game we play #TESTING------------------------- #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter)) #-------------------------------- #TESTING-------------------------- #print('Starting self-play game iteration: ' + str(eps)) #start_game = time.time() #-------------------------------- iterationTrainExamples += self.executeEpisode( ) #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game #TESTING-------------------------- #end_game = time.time() #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game)) #----------------------------------------------------- # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games self.trainExamplesHistory.append(iterationTrainExamples) #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below. if len(self.trainExamplesHistory ) > self.args['numItersForTrainExamplesHistory']: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file by calling saveTrainExamples method # The examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples( i - 1 ) #save examples to self.args['checkpoint'] folder with given iteration name of i-1 # shuffle examples before training #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques, #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training #samples in a single list, shuffled trainExamples = [] for e in self.trainExamplesHistory: #Each e is a deque trainExamples.extend(e) shuffle(trainExamples) #The Arena-------------------------------------------------------- if self.args['Arena'] == True: self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one self.pnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') #convert trainExamples into a format recognizable by Neural Network and train trainExamples = self.nnet.constructTraining(trainExamples) self.nnet.train( trainExamples[0], trainExamples[1] ) #Train the new neural network self.nnet. The weights are now updated #Pit the two neural networks self.pnet and self.nnet in the arena print('PITTING AGAINST PREVIOUS VERSION') arena = Arena( self.pnet, self.nnet, self.game, self.args, self.arena_game_args ) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. pwins, nwins, draws = arena.playGames() print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args['updateThreshold']: print('REJECTING NEW MODEL') self.nnet.load_checkpoint( folder=self.args['network_checkpoint'], filename='temp') else: #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5 print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best') #----------------------------------------------------------------- else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1). print('TRAINING NEW NEURAL NETWORK...') trainExamples = self.nnet.constructTraining(trainExamples) #FOR TESTING----------------------------------------------------- #print('trainExamples feature arrays: ' + str(trainExamples[0])) #print('trainExamples label arrays: ' + str(trainExamples[1])) #END TESTING----------------------------------------------------- self.nnet.train(trainExamples[0], trainExamples[1], folder=self.args['network_checkpoint'], filename='trainHistDict' + str(i - 1)) #FOR TESTING----------------------------------------------------- #weights = self.nnet.nnet.model.get_weights() #min_max = [] #for layer_weights in weights: #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape) #layer_weights_min = np.amin(layer_weights) #layer_weights_max = np.amax(layer_weights) #min_max.append([layer_weights_min, layer_weights_max]) #print('') #print('The smallest and largest weights of each layer are: ') #for pair in min_max: #print(pair) #print('') #END TESTING----------------------------------------------------- self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i - 1)) self.nnet.save_checkpoint( folder=self.args['network_checkpoint'], filename='best')
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters+1): # bookkeeping print('------ITER ' + str(i) + '------') print(str(self.game.innerN) + "x" + str(self.game.innerM)) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree self.mcts = MCTS(self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) tempfile = 'temp.pth.tar' bestfile = 'best.pth.tar' # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile) self.nnet.train(trainExamples) if self.arenaEnabled: self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) pmcts = MCTS(self.pnet, self.args) nmcts = MCTS(self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), # lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0), lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile) else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
def train(self, nReDataGeneration=1, nTrainingEpochs=20, batch_size=100): self.network.train() # minCost = 100000. # maxAccuracy = 0. elapsed_time = 0. # current_epoch = 0 # train my model print('Learning Started!') start_time = time.perf_counter() #############################################################3 self.data.getBlockImages(blockH=self.featureH, blockW=self.featureW, nOKperClass=40, nNGperClass=40, classNoList=self.args.classNoList, label_type='index', isTrain=False) ############################################################# current_accuracy = 0 max_accuracy = self.args.optimalAccuracyThreshold for i in range(nReDataGeneration): eps_time = AverageMeter() bar = Bar('Training ' + str(i), max=self.args.nTrainingEpochs) end = time.time() self.data.getBlockImages(blockH=self.featureH, blockW=self.featureW, nOKperClass=160, nNGperClass=160, classNoList=self.args.classNoList, label_type='index', isTrain=True) Xnp = self.data.train.images Ynp = self.data.train.labels x = torch.from_numpy( Xnp.reshape([-1, self.featureC, self.featureW, self.featureH])) y = torch.from_numpy(Ynp) dataset = TensorDataset(data_tensor=x, target_tensor=y) self.train_loader = DataLoader(dataset, batch_size=self.args.batch_size, shuffle=True) for epoch in range(1, nTrainingEpochs + 1): for k, [image, label] in enumerate(self.train_loader): image = Variable(image) label = Variable(label) if self.args.isGPU: image = image.cuda() label = label.cuda() self.optimizer.zero_grad() output = self.network(image) output = self.softmax(output) # print(output.size()) # print(label.size()) loss = self.loss_func(output, label) loss.backward() self.optimizer.step() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=epoch, maxeps=self.args.nTrainingEpochs, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if epoch % 2 == 0: print( '--------------------------------------------------------------------' ) torch.save(self.network, self.saveModelPath) torch.save( self.network.state_dict(), self.saveParamsPath ) # It saves only the model parameters (recommended) torch.save(self.network, self.backupModelPath) torch.save(self.network.state_dict(), self.backupParamsPath) current_accuracy = commander.getCurrentAccuracy( batch_size=256, numIter=1, isTrainData=False) if self.args.isGPU: loss = loss.cpu() # current_accuracy = current_accuracy.cpu() # current_accuracy = current_accuracy.data.numpy()[0] self.network.train() print( '|=====================================================================|' ) print( '|===== Epoch : %04d' % (i * nTrainingEpochs + epoch), "======================|") print('|===== Loss : ', loss.data.numpy()[0], "========================|") print( "|===== Current accuracy : %.1f" % (current_accuracy * 100.), "% =====|") print( '|=====================================================================|' ) if current_accuracy >= max_accuracy: max_accuracy = current_accuracy torch.save(self.network, self.optimalModelPath) torch.save(self.network.state_dict(), self.optimalParamsPath) break if current_accuracy >= max_accuracy: break bar.finish() elapsed_time = (time.perf_counter() - start_time) # accuracy_train = self.getCurrentAccuracy(batch_size=self.args.batch_size, isTrainData=True) # accuracy_test = self.getCurrentAccuracy(batch_size=self.args.batch_size, isTrainData=False) # if args.isGPU: # accuracy_train = accuracy_train.cpu() # accuracy_test = accuracy_test.cpu() print( '=====================================================================' ) # print("Accuracy for training data : %.1f" % (accuracy_train.data.numpy()[0]*100.), "%") # print("Accuracy for test data : %.1f" % (accuracy_test.data.numpy()[0]*100.), "%") print('Elapsed %.3f seconds.' % elapsed_time) print('%.0f h' % (elapsed_time / 3600), '%.0f m' % ((elapsed_time % 3600) / 60), '%.0f s' % (elapsed_time % 60)) print('Learning Finished!') print( '=====================================================================' )