class MeanStd(object): """ plot the mean and std of the layer output """ def __init__(self, layer, env, logger_name, caption): self.layer = layer self.layer.register_forward_hook(hook_forward) self.mean_logger = VisdomPlotLogger('line', env=env, opts={ 'title': 'mean_' + caption + logger_name, 'caption': caption }) self.std_logger = VisdomPlotLogger('line', env=env, opts={ 'title': 'std_' + caption + logger_name, 'caption': caption }) self.iter_n = 0 def plot(self): mean = torch.mean(self.layer.out_) std = torch.std(self.layer.out_) del self.layer.out_ self.mean_logger.log(self.iter_n, mean) self.std_logger.log(self.iter_n, std) self.iter_n += 1
def train( args, model, train_loader, decreasing_lr, wd=0.0001, momentum=0.9, ): if args.seed is not None: torch.manual_seed(args.seed) vis = visdom.Visdom() vis.close(env=args.model) test_acc_logger = VisdomPlotLogger('line', env=args.model, opts={'title': 'Test Accuracy'}) test_loss_logger = VisdomPlotLogger('line', env=args.model, opts={'title': 'Test Loss'}) train_acc_logger = VisdomPlotLogger('line', env=args.model, opts={'title': 'Train Accuracy'}) train_loss_logger = VisdomPlotLogger('line', env=args.model, opts={'title': 'Train Loss'}) lr_logger = VisdomPlotLogger('line', env=args.model, opts={'title': 'Learning Rate'}) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decreasing_lr, gamma=0.1) best_train_loss = 10 for epoch in range(args.nepoch): scheduler.step() _, train_loss, train_acc = train_epoch( model=model, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=args.nepoch, ) _, test_loss, test_acc = test_epoch( loader=train_loader, model=model, epoch=epoch, n_epochs=args.nepoch, ) if best_train_loss > train_loss: best_train_loss = train_loss print('best_loss' + str(best_train_loss)) torch.save(model.state_dict(), args.params_name) print(train_loss) train_loss_logger.log(epoch, 1 - float(train_loss)) train_acc_logger.log(epoch, 1 - float(train_acc)) test_acc_logger.log(epoch, 1 - float(test_acc)) test_loss_logger.log(epoch, float(test_loss)) lr_logger.log(epoch, optimizer.param_groups[0]['lr'])
class Visualier(): """Visulization, plot the logs during training process""" def __init__(self, num_classes=10): port = 8097 self.loss_logger = VisdomPlotLogger('line', port=port, win="Loss", opts={'title': 'Loss Logger'}) self.acc_logger = VisdomPlotLogger('line', port=port, win="acc", opts={'title': 'Accuracy Logger'}) self.confusion_logger = VisdomLogger('heatmap', port=port, win="confusion", opts={ 'title': 'Confusion matrix', 'columnnames': list(range(num_classes)), 'rownames': list(range(num_classes)) }) def plot(self, train_acc, train_err, val_acc, val_err, confusion, epoch): self.loss_logger.log(epoch, train_err, name="train") self.acc_logger.log(epoch, train_acc, name="train") self.loss_logger.log(epoch, val_err, name="val") self.acc_logger.log(epoch, val_acc, name="val") self.confusion_logger.log(confusion) print("epoch: [%d/%d]" % (epoch, args.n_epoches)) print('Training loss: %.4f, accuracy: %.2f%%' % (train_err, train_acc)) print('Validation loss: %.4f, accuracy: %.2f%%' % (val_err, val_acc))
class Norm(object): def __init__(self, para, env, logger_name, caption=''): self.para = para self.logger = VisdomPlotLogger('line', env=env, opts={ 'title': 'norm_' + caption + logger_name, 'caption': caption }) self.iter_n = 0 def plot(self): self.logger.log(self.iter_n, torch.mean(self.para.data**2)) self.iter_n += 1
class statJoints(statBase): def __init__(self, args, scale = [1.,1.,1.]): super(statJoints, self).__init__(args) self.jointErrAvg = tnt.meter.AverageValueMeter() self.joint_logger = VisdomPlotLogger('line', opts={'title': 'Joint error'}, env='PoseCapsules') self.scale = scale def reset(self): super(statJoints, self).reset() self.jointErrAvg.reset() def log(self, pbar, output, labels, stat=None): #err = (output[:,:4,0,0,:-1].data - labels[:,:4,:]) #.abs().mean().item() #err = err.view(err.shape[0], err.shape[1],-1, 3) shp = (labels.shape[0], labels.shape[1], -1, 3) labels_abs = labels.view(shp) output_abs = output.data[...,:-1].view(shp) """ l_labels = [labels_abs[:,:,0,:]] l_output = [output_abs[:,:,0,:]] for i in range(4): l_labels.append( l_labels[i] + labels_abs[:,:,i+1,:] ) l_output.append( l_output[i] + output_abs[:,:,i+1,:] ) labels_abs = torch.stack(l_labels, dim=2) output_abs = torch.stack(l_output, dim=2) """ #err = (output[...,:-1].data.view(shp)[:,:,1:,:] - labels.view(shp)[:,:,1:,:]) err = output_abs - labels_abs sc = torch.from_numpy(self.scale).float().cuda()[None,None,None,:].expand(err.shape) #torch.tensor([self.scale], device=output.device)[None, None, None, :].expand(err.shape) err = err * sc mean = err[:,:,1:,:].norm(dim=3).mean().item() mean1 = err[:,0,0,:].norm(dim=1).mean().item() mean = (20*mean + mean1)/21 self.jointErrAvg.add(mean) dict = OrderedDict() dict['jointErr'] = self.jointErrAvg.value()[0] super(statJoints, self).log(pbar, output, labels, dict, stat) def endTrainLog(self, epoch, groundtruth_image=None, recon_image=None): super(statJoints, self).endTrainLog(epoch, groundtruth_image, recon_image) self.joint_logger.log(epoch, self.jointErrAvg.value()[0], name='train') def endTestLog(self, epoch): super(statJoints, self).endTestLog(epoch) self.joint_logger.log(epoch, self.jointErrAvg.value()[0], name='test')
class BatchLRVisdom(object): def __init__(self, title='TBD'): self._lr = VisdomPlotLogger( 'line', opts={'title': '{:s} lr Curve'.format(title)}) check_visdom_server(self._lr.viz) def log(self, idx, lr, train=None): assert train is not None,\ 'train should be True or False, not {}'.format(train) name = 'train' if train else 'test' try: self._lr.log(idx, lr, name=name) except BaseException as e: check_visdom_server(self._lr.viz) print(e) print("***Retry LossVisdom") self.log(idx, lr, train)
class AccuracyVisdom(object): '''Plot train and test accuracy curve together in a VisdomPlotLogger ''' def __init__(self, title='TBD'): self._acc = VisdomPlotLogger( 'line', opts={'title': '{:s} Accuracy Curve'.format(title)}) check_visdom_server(self._acc.viz) def log(self, epoch, accuracy, train=None): assert train is not None,\ 'train should be True or False, not {}'.format(train) name = 'train' if train else 'test' try: self._acc.log(epoch, accuracy, name=name) except BaseException as e: check_visdom_server(self._acc.viz) print("***Retry AccuracyVisdom") self.log(epoch, accuracy, train)
class statNothing(): def __init__(self): self.lossAvg = tnt.meter.AverageValueMeter() self.train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env='PoseCapsules') self.test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}, env='PoseCapsules') def reset(self): self.lossAvg.reset() def log(self, pbar, output, labels, dict = OrderedDict(), stat=None): dict['loss'] = self.lossAvg.value()[0] pbar.set_postfix(dict, refresh=False) def endTrainLog(self, epoch, groundtruth_image=None, recon_image=None): self.train_loss_logger.log(epoch, self.lossAvg.value()[0], name='loss') def endTestLog(self, epoch): self.test_loss_logger.log(epoch, self.lossAvg.value()[0], name='loss')
class WeightRatio(object): def __init__(self, para, env, logger_name, caption=''): # import pdb;pdb.set_trace() self.para = para self.para.register_hook(lambda grad: grad) self.logger = VisdomPlotLogger('line', env=env, opts={ 'title': caption + '\n' + logger_name, 'caption': caption }) self.iter_n = 0 def plot(self): ratio = torch.norm(self.para.grad.data, 2) / torch.norm( self.para.data, 2) self.logger.log(self.iter_n, ratio) self.iter_n += 1
class statClassification(statBase): def __init__(self, args): super(statClassification, self).__init__(args) self.meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) self.accuracy_logger = VisdomPlotLogger('line', opts={'title': 'accuracy'}, env='PoseCapsules') def reset(self): super(statClassification, self).reset() self.meter_accuracy.reset() def log(self, pbar, output, labels, stat=None): self.meter_accuracy.add(output.squeeze()[:,:,-1:].squeeze().data, labels.data) dict = OrderedDict() dict['acc'] = self.meter_accuracy.value()[0] super(statClassification, self).log(pbar, output, labels, dict, stat) def endTrainLog(self, epoch, groundtruth_image=None, recon_image=None): super(statClassification, self).endTrainLog(epoch, groundtruth_image, recon_image) self.accuracy_logger.log(epoch, self.meter_accuracy.value()[0], name='train') def endTestLog(self, epoch): super(statClassification, self).endTestLog(epoch) self.accuracy_logger.log(epoch, self.meter_accuracy.value()[0], name='test') print ("Test accuracy: ", self.meter_accuracy.value()[0])
def train_correction_network(network, config, data): """train the ShallowCorrectionNet on the same training data used to train the SVM classifier in order to correct the predictions. Parameters ---------- network : neural network A pytorch network. config : yaml The configuration file. data : dict A dictionary comprising of training, validation, and testing data """ # Device to train the model cpu or gpu device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Computation device being used:', device) # An instance of model model = network(config).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() # criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['LEARNING_RATE']) # visual logger visual_logger1 = visual_log( 'Task type classification using self-correction') visual_logger2 = VisdomPlotLogger('line', opts=dict(xlabel='Epochs', ylabel='Error', title='Error plot')) accuracy_log = [] for epoch in range(config['NUM_EPOCHS']): for x_batch, y_batch in data['training']: x_batch = x_batch.to(device) # y_batch = y_batch.to(device) # use this while using MSELoss() otherwise use below y_batch = (torch.max(y_batch, dim=1)[1]).to( device) #convert labels from one hot encoding to normal # Forward propagation net_out = model(x_batch) loss = criterion(net_out, y_batch) # Back propagation optimizer.zero_grad() # For batch gradient optimisation loss.backward() optimizer.step() accuracy = classification_accuracy(model, data) accuracy_log.append(accuracy) # log the accuracies visual_logger1.log(epoch, [accuracy[0], accuracy[1], accuracy[2]]) # log the errors visual_logger2.log(epoch, loss.item()) # Add loss function info to parameter. model_info = create_model_info(config, str(criterion), np.array(accuracy_log)) return model, model_info
class Model(object): def train(self, architecture, fold, lr, batch_size, epochs, epoch_size, validation_size, iter_size, patience=4, optim="adam", ignore_prev_best_loss=False): print("Start training with following params:", f"architecture = {architecture}", f"fold = {fold}", f"lr = {lr}", f"batch_size = {batch_size}", f"epochs = {epochs}", f"epoch_size = {epoch_size}", f"validation_size = {validation_size}", f"iter_size = {iter_size}", f"optim = {optim}", f"patience = {patience}") train_loader, valid_loader, num_classes = get_loaders( batch_size, train_transform=train_augm(), valid_transform=valid_augm(), n_fold=fold) model = get_model(num_classes, architecture) criterion = CrossEntropyLoss(size_average=False) self.ignore_prev_best_loss = ignore_prev_best_loss self.lr = lr self.model = model self.root = Path(f"../results/{architecture}") self.fold = fold self.optim = optim self.train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) self.lr_logger = VisdomPlotLogger( 'line', opts={'title': 'Train Learning Rate'}) self.test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}) self.test_accuracy_logger = VisdomPlotLogger( 'line', opts={'title': 'Test Accuracy'}) train_kwargs = dict(args=dict(iter_size=iter_size, n_epochs=epochs, batch_size=batch_size, epoch_size=epoch_size), model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, validation_size=validation_size, patience=patience) self._train(**train_kwargs) def _init_optimizer(self): if self.optim == "adam": return Adam( self.model.parameters(), lr=self.lr, ) elif self.optim == "sgd": return SGD(self.model.parameters(), lr=self.lr, momentum=0.9) else: raise Exception(f"Unknown optimizer {self.optim}") def _init_files(self): if not self.root.exists(): self.root.mkdir() self.log = self.root.joinpath('train_{}.log'.format(self.fold)).open( 'at', encoding='utf8') self.model_path = self.root / 'model_{}.pt'.format(self.fold) self.best_model_path = self.root / 'best-model_{}.pt'.format(self.fold) def _init_model(self): if self.model_path.exists(): state = torch.load(str(self.model_path)) self.epoch = state['epoch'] self.step = state['step'] if self.ignore_prev_best_loss: self.best_valid_loss = float('inf') else: self.best_valid_loss = state['best_valid_loss'] self.model.load_state_dict(state['model']) print('Restored model, epoch {}, step {:,}'.format( self.epoch, self.step)) else: self.epoch = 1 self.step = 0 self.best_valid_loss = float('inf') def _save_model(self, epoch): torch.save( { 'model': self.model.state_dict(), 'epoch': epoch, 'step': self.step, 'best_valid_loss': self.best_valid_loss }, str(self.model_path)) def _train(self, args, model: nn.Module, criterion, *, train_loader, valid_loader, validation_size, patience=2): lr = self.lr n_epochs = args['n_epochs'] optimizer = self._init_optimizer() self._init_files() self._init_model() report_each = 10 valid_losses = [] lr_reset_epoch = self.epoch batch_size = args['batch_size'] iter_size = args['iter_size'] for epoch in range(self.epoch, n_epochs + 1): model.train() random.seed() tq = tqdm.tqdm( total=(args['epoch_size'] or len(train_loader) * batch_size)) tq.set_description('Epoch {}, lr {}'.format(epoch, lr)) losses = [] tl = train_loader epoch_loss = 0 if args['epoch_size']: tl = islice(tl, args['epoch_size'] // batch_size) try: mean_loss = 0 batches_count = 0 for i, (inputs, targets) in enumerate(tl): batches_count += 1 inputs, targets = variable(inputs), variable(targets) targets = long_tensor(targets) inputs_chunks = inputs.chunk(iter_size) targets_chunks = targets.chunk(iter_size) optimizer.zero_grad() iter_loss = 0 for input, target in zip(inputs_chunks, targets_chunks): outputs = model(input) loss = criterion(outputs, target) loss /= batch_size iter_loss += loss.data[0] loss.backward() optimizer.step() self.step += 1 tq.update(batch_size) epoch_loss += iter_loss losses.append(iter_loss) mean_loss = np.mean(losses[-report_each:]) tq.set_postfix(loss='{:.3f}'.format(mean_loss)) if i and i % report_each == 0: self._write_event(loss=mean_loss) epoch_loss /= batches_count self._write_event(loss=mean_loss) tq.close() self._save_model(epoch + 1) valid_metrics = validate(model, criterion, valid_loader, validation_size, batch_size, iter_size) self._write_event(**valid_metrics) self.lr_logger.log(epoch, lr) self.train_loss_logger.log(epoch, epoch_loss) self.test_loss_logger.log(epoch, valid_metrics['valid_loss']) self.test_accuracy_logger.log(epoch, valid_metrics['valid_acc']) valid_loss = valid_metrics['valid_loss'] valid_losses.append(valid_loss) if valid_loss < self.best_valid_loss: print("Best validation loss improved from {} to {}".format( self.best_valid_loss, valid_loss)) self.best_valid_loss = valid_loss shutil.copy(str(self.model_path), str(self.best_model_path)) elif patience and epoch - lr_reset_epoch > patience and min( valid_losses[-patience:]) > self.best_valid_loss: lr /= 2 lr_reset_epoch = epoch optimizer = self._init_optimizer() except KeyboardInterrupt: tq.close() print('Ctrl+C, saving snapshot') self._save_model(epoch) print('done.') break return def _write_event(self, **data): data['step'] = self.step data['dt'] = datetime.now().isoformat() self.log.write(json.dumps(data, sort_keys=True)) self.log.write('\n') self.log.flush() def predict(self, architecture, fold, tta, batch_size, name="sub"): print("Start predicting with following params:", f"architecture = {architecture}", f"fold = {fold}", f"tta = {tta}") n_classes = 5270 model = get_model(num_classes=n_classes, architecture=architecture) state = torch.load(f"../results/{architecture}/best-model_{fold}.pt") model.load_state_dict(state['model']) test_augm = valid_augm() label_map = pd.read_csv("../data/labels_map.csv") label_map.index = label_map['label_id'] test_dataset = TestDataset(transform=test_augm) with open(f"../results/{architecture}/{name}_{fold}.csv", "w") as f: f.write("_id,category_id\n") for idx in tqdm.tqdm(range(len(test_dataset))): images = torch.stack( [test_dataset[idx][0] for i in range(tta)]) images = variable(images) pred = model(images).data.cpu().numpy() pred = sum(pred) product_id = test_dataset[idx][1] label = np.argmax(pred, 0) cat_id = label_map.ix[label]['category_id'] f.write(f"{product_id},{cat_id}\n") def predict_validation(self, architecture, fold, tta, batch_size): n_classes = 5270 model = get_model(num_classes=n_classes, architecture=architecture) state = torch.load(f"../results/{architecture}/best-model_{fold}.pt") model.load_state_dict(state['model']) test_augm = valid_augm() label_map = pd.read_csv("../data/labels_map.csv") label_map.index = label_map['label_id'] loader = get_valid_loader(fold, batch_size, test_augm) with open(f"../results/{architecture}/validation_{fold}.csv", "w") as f: f.write("_id,category_id\n") for images, product_ids in tqdm.tqdm(loader): images = variable(images) preds = model(images).data.cpu().numpy() for pred, product_id in zip(preds, product_ids): label = np.argmax(pred, 0) cat_id = label_map.ix[label]['category_id'] f.write(f"{product_id},{cat_id}\n")
loss.backward() optimzier.step() schduler.step() loss_.append(loss.item()) del loss if opt.local_rank == 0: print('[Epoch:{}/{}][batch:{}/{}][loss:{}][learning rate:{}]'.format(epoch,opt.n_epochs,batch,b_r,loss_[-1],optimzier.state_dict()['param_groups'][0]['lr'])) a = np.random.randint(0,31,fout.shape[0]) a_ = np.array(range(fout.shape[0])) output = fout.detach().cpu()[a_,a,:,:]*255 output = output[:,None,:,:] output = torch.cat([output,output,output],1) output_ = make_grid(output,nrow=int(5)).numpy() if opt.local_rank == 0: train_logger.log(output_) Loss_logger.log(epoch,np.mean(np.array(loss_))) psnr_g = [] with torch.no_grad(): for hsi,hsi_g,hsi_resize,msi in Hyper_test: spenet.eval() hsi_g = hsi_g.cuda().float() hsi_resize = hsi_resize.cuda().float() hsi_resize = torch.nn.functional.interpolate(hsi_resize,scale_factor=(8,8),mode='bilinear') # hsi = renet(hsi_resize) hsi = hsi_resize # hsi = [a.cuda().float() for a in hsi] msi = [a.cuda().float() for a in msi] hsi_spe,scale,refined = spenet(hsi,msi[-1]) # hsi_e = get_spe_gt(hsi_g) # hsi_2 = hsi_spe[-1][:,:31,:,:] fout = refined
# train model model.train() optimizer.zero_grad() classes = model(data) loss = focal_loss(classes, focal_label) + margin_loss( classes, margin_label) loss.backward() optimizer.step() # save the metrics meter_loss.add(loss.detach().cpu().item()) meter_accuracy.add(classes.detach().cpu(), target) meter_confusion.add(classes.detach().cpu(), target) if current_step % NUM_STEPS == 0: # print the information about train train_loss_logger.log(current_step // NUM_STEPS, meter_loss.value()[0]) train_accuracy_logger.log(current_step // NUM_STEPS, meter_accuracy.value()[0]) train_confusion_logger.log(meter_confusion.value()) results['train_loss'].append(meter_loss.value()[0]) results['train_accuracy'].append(meter_accuracy.value()[0]) print('[Step %d] Training Loss: %.4f Accuracy: %.2f%%' % (current_step // NUM_STEPS, meter_loss.value()[0], meter_accuracy.value()[0])) reset_meters() # test model periodically model.eval() with torch.no_grad(): for data, target in test_iterator: focal_label, margin_label = target, torch.eye(
model.train() optimizer.zero_grad() classes = model(data) loss = sum( [criterion(classes, label) for criterion in loss_criterion]) loss.backward() optimizer.step() # save the metrics meter_loss.add(loss.detach().cpu().item()) meter_accuracy.add(classes.detach().cpu(), target) meter_confusion.add(classes.detach().cpu(), target) if current_step % NUM_STEPS == 0: # print the information about train loss_logger.log(current_step // NUM_STEPS, meter_loss.value()[0], name='train') accuracy_logger.log(current_step // NUM_STEPS, meter_accuracy.value()[0], name='train') train_confusion_logger.log(meter_confusion.value()) results['train_loss'].append(meter_loss.value()[0]) results['train_accuracy'].append(meter_accuracy.value()[0]) print('[Step %d] Training Loss: %.4f Accuracy: %.2f%%' % (current_step // NUM_STEPS, meter_loss.value()[0], meter_accuracy.value()[0])) reset_meters() # test model periodically model.eval() with torch.no_grad():
""" meter_loss = tnt.meter.AverageValueMeter() meter_loss_dae = tnt.meter.AverageValueMeter() setting_logger = VisdomLogger('text', opts={'title': 'Settings'}, env=args.env_name) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env_name) epoch_offset = 0 if args.load_loss: if os.path.isfile('loss.log'): with open("loss.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > args.load_loss: loss_list.pop(0) for loss in loss_list: train_loss_logger.log(epoch_offset, float(loss)) epoch_offset += 1 ground_truth_logger_left = VisdomLogger('image', opts={'title': 'Ground Truth, left'}, env=args.env_name) ground_truth_logger_right = VisdomLogger('image', opts={'title': 'Ground Truth, right'}, env=args.env_name) reconstruction_logger_left = VisdomLogger('image', opts={'title': 'Reconstruction, left'}, env=args.env_name) reconstruction_logger_right = VisdomLogger('image', opts={'title': 'Reconstruction, right'}, env=args.env_name) setting_logger.log(str(args)) """ Load training data """ train_dataset = util.MyImageFolder(root='../../data/dumps/', transform=transforms.ToTensor(), target_transform=transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, drop_last=True) steps = len(train_dataset) // args.batch_size
def train(self): # vis = visdom.Visdom(env='temp_log') train_data, train_label = self._preprocess('train') train_iter = [[train_data[i], train_label[i]] for i in range(len(train_data))] test_data, test_label = self._preprocess('test') val_iter = [[test_data[i], test_label[i]] for i in range(len(test_data))] self.feature_size = train_data[0].shape[2] encoder = Encoder(self.feature_size, self.hidden_size, self.en_cnn_k_s, self.strides, n_layers=1, dropout=0.5) decoder = Decoder(self.hidden_size, 1, n_layers=1, dropout=0.3) seq2seq = Seq2Seq(encoder, decoder).cuda() # seq2seq = torch.load('./model/newest_seq2seq') seq2seq.teacher_forcing_ratio = 0.3 optimizer = optim.Adam(seq2seq.parameters(), lr=self.lr) # optimizer = optim.SparseAdam(seq2seq,lr=self.lr) # optimizer = optim.Adamax(seq2seq.parameters(), lr=self.lr) # optimizer = optim.SGD(seq2seq.parameters(), lr=self.lr) # optimizer = optim.ASGD(seq2seq.parameters(), lr=self.lr) # optimizer = optim.RMSprop(seq2seq.parameters(), lr=self.lr) log = OrderedDict() log['train_loss'] = [] log['val_loss'] = [] log['test_loss'] = [] log['teacher_ratio'] = [] log['mean_er'] = [] log['mean_abs_er'] = [] log['score'] = [] score_logger = VisdomPlotLogger('line', opts={'title': 'score logger'}) loss_logger = VisdomPlotLogger('line', opts={'title': 'loss logger'}) count = 0 count2 = 0 count3 = 0 e0 = 120 best_loss = 1 for e in range(1, self.epochs + 1): train_loss = self._fit(e, seq2seq, optimizer, train_iter, grad_clip=5.0) val_loss = self._evaluate(seq2seq, train_iter) test_loss, er = self._evaluate(seq2seq, val_iter, cal_er=True) score = self._cal_score(er) print( "[Epoch:%d][train_loss:%.4e][val_loss:%.4e][test_loss:%.4e][mean_er:%.4e][mean_abs_er:%.4e][score:%.4f]" % (e, train_loss, val_loss, test_loss, np.mean(er), np.mean(np.abs(er)), np.mean(score))) score_logger.log(e, np.mean(score)) loss_logger.log(e, [train_loss, val_loss, test_loss]) log['train_loss'].append(float(train_loss)) log['val_loss'].append(float(val_loss)) log['test_loss'].append(float(test_loss)) log['teacher_ratio'].append(seq2seq.teacher_forcing_ratio) log['mean_er'].append(float(np.mean(er))) log['mean_abs_er'].append(float(np.mean(np.abs(er)))) log['score'].append(float(np.mean(score))) pd.DataFrame(log).to_csv('./model/log.csv', index=False) if float(val_loss) == min(log['val_loss']): torch.save(seq2seq, './model/seq2seq') if (float(test_loss) * 11 + float(val_loss) * 6) / 17 <= best_loss: torch.save(seq2seq, './model/best_seq2seq') best_loss = (float(test_loss) * 11 + float(val_loss) * 6) / 17 # if float(np.mean(np.abs(er))) == min(log['mean_abs_er']): # torch.save(seq2seq,'./model/lowest_test_seq2seq') if float(np.mean(score)) == max(log['score']): torch.save(seq2seq, './model/best_score_seq2seq') torch.save(seq2seq, './model/newest_seq2seq') count2 += 1 if float(train_loss) <= float(val_loss) * 0.2: count += 1 else: count = 0 if count >= 3 or count2 >= 100: seq2seq.teacher_forcing_ratio *= self.gama count -= 1 count2 = 0
lambda_ = 0.9 train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}) loss_logger_loss = 0 loss_logger_count = 0 epoch_offset = 0 if args.load_loss: if os.path.isfile('loss.log'): with open("loss.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > args.load_loss: loss_list.pop(0) for loss in loss_list: train_loss_logger.log(epoch_offset * args.print_freq, float(loss)) epoch_offset += 1 epoch_offset -= 1 with torch.cuda.device(args.gpu): if use_cuda: print("activating cuda") model.cuda() for epoch in range(args.num_epochs): # Train average_loss = 0 size = 0
class statBase(): def __init__(self, args): self.args = args self.lossAvg = tnt.meter.AverageValueMeter() #self.lossSparseMu = tnt.meter.AverageValueMeter() #self.lossSparseVar = tnt.meter.AverageValueMeter() self.train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env='PoseCapsules') self.test_loss_logger = VisdomPlotLogger('line', opts={'title': 'Test Loss'}, env='PoseCapsules') self.recon_sum = 0 self.rout_id = 1 if not self.args.disable_recon: self.reconLossAvg = tnt.meter.AverageValueMeter() self.ground_truth_logger_left = VisdomLogger('image', opts={'title': 'Ground Truth, left'}, env='PoseCapsules') self.reconstruction_logger_left = VisdomLogger('image', opts={'title': 'Reconstruction, left'}, env='PoseCapsules') if self.args.regularize: self.regularizeLossAvg = tnt.meter.AverageValueMeter() self.logsigAvg = tnt.meter.AverageValueMeter() self.costmeanAvg = tnt.meter.AverageValueMeter() self.costAvg = tnt.meter.AverageValueMeter() self.aAvg = tnt.meter.AverageValueMeter() def reset(self): self.lossAvg.reset() if not self.args.disable_recon: self.reconLossAvg.reset() if self.args.regularize: self.regularizeLossAvg.reset() self.logsigAvg.reset() self.costmeanAvg.reset() self.costAvg.reset() self.aAvg.reset() def log(self, pbar, output, labels, dict = OrderedDict(), stat=None): if not self.args.disable_loss: dict['loss'] = self.lossAvg.value()[0] if stat is not None: self.logsigAvg.add(stat[-self.rout_id*4 + 0]) self.costmeanAvg.add(stat[-self.rout_id*4 + 1]) self.costAvg.add(stat[-self.rout_id*4 + 2]) self.aAvg.add(stat[-self.rout_id*4 + 3]) stat.clear() dict['logsig'] = self.logsigAvg.value()[0] dict['costmean'] = self.costmeanAvg.value()[0] dict['cost'] = self.costAvg.value()[0] dict['a'] = self.aAvg.value()[0] #dict['mloss'] = self.lossSparseMu.value()[0] #dict['vloss'] = self.lossSparseVar.value()[0] if not self.args.disable_recon: #pbar.set_postfix(loss=self.lossAvg.value()[0], refresh=False) #else: dict['reconloss'] = self.reconLossAvg.value()[0] dict['rsum'] = self.recon_sum #pbar.set_postfix(loss=self.lossAvg.value()[0], rloss=self.reconLossAvg.value()[0], rsum=self.recon_sum, refresh=False) if self.args.regularize: dict['reguloss'] = self.regularizeLossAvg.value()[0] pbar.set_postfix(dict, refresh=False) def endTrainLog(self, epoch, groundtruth_image=None, recon_image=None): #self.train_loss = self.lossAvg.value()[0] if not self.args.disable_loss: self.train_loss_logger.log(epoch, self.lossAvg.value()[0], name='loss') with open("train.log", "a") as myfile: myfile.write(str(self.lossAvg.value()[0]) + '\n') if not self.args.disable_recon: if groundtruth_image is not None: self.ground_truth_logger_left.log(make_grid(groundtruth_image, nrow=int(self.args.batch_size ** 0.5), normalize=True, range=(0, 1)).cpu().numpy()) if recon_image is not None: self.reconstruction_logger_left.log(make_grid(recon_image.data, nrow=int(self.args.batch_size ** 0.5), normalize=True, range=(0, 1)).cpu().numpy()) #self.train_recon_loss = self.reconLossAvg.value()[0] self.train_loss_logger.log(epoch, self.reconLossAvg.value()[0], name='recon') #if self.args.regularize: # self.train_regularize_loss = self.regularizeLossAvg.value()[0] def endTestLog(self, epoch): #loss = self.lossAvg.value()[0] if not self.args.disable_loss: self.test_loss_logger.log(epoch, self.lossAvg.value()[0], name='loss') with open("test.log", "a") as myfile: myfile.write(str(self.lossAvg.value()[0]) + '\n') if not self.args.disable_recon: self.test_loss_logger.log(epoch, self.reconLossAvg.value()[0], name='recon') def load_loss(self, history_count): if os.path.isfile('train.log'): with open("train.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > history_count: loss_list.pop(0) epoch = -len(loss_list) for loss in loss_list: self.train_loss_logger.log(epoch, float(loss), name='loss') epoch += 1 if os.path.isfile('test.log'): with open("test.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > history_count: loss_list.pop(0) epoch = -len(loss_list) for loss in loss_list: self.test_loss_logger.log(epoch, float(loss), name='loss') epoch += 1
def trainval_classifier(model, pretrained, modelName, train_loader, valid_loader, exp_name='experiment', lr=0.001, epochs=50, momentum=0.99): if pretrained: if (os.path.isfile('checkpoint\\' + modelName + '_checkpoint.pth')): print('Uso il modello trainato precedentemente') model.load_state_dict( torch.load('checkpoint\\' + modelName + '_checkpoint.pth')['state_dict']) # Funzione di Loss criterion = nn.CrossEntropyLoss() # Stochastic gradient descent optimizer = SGD(model.parameters(), lr, momentum=momentum) # Metriche di valutazione loss_meter = AverageValueMeter() acc_meter = AverageValueMeter() # Plot su Visdom loss_logger = VisdomPlotLogger('line', env=exp_name, opts={ 'title': 'Loss', 'legend': ['train', 'valid'] }) acc_logger = VisdomPlotLogger('line', env=exp_name, opts={ 'title': 'Accuracy', 'legend': ['train', 'valid'] }) visdom_saver = VisdomSaver(envs=[exp_name]) # Se Cuda è presente, usalo device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # definiamo un dizionario contenente i loader di training e testdefiniamo un dizionario contenente i loader di training e test loader = {'train': train_loader, 'valid': valid_loader} # Funzione per salvare il checkpoint def save_checkpoint(model, epoch): if not os.path.exists('checkpoint'): os.makedirs('checkpoint') torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, "{}{}_{}.pth".format('checkpoint\\', exp_name, 'checkpoint')) # Ciclo principale di training for e in range(epochs): # Iteriamo tra due modalità: train e validation for mode in ['train', 'valid']: loss_meter.reset() acc_meter.reset() model.train() if mode == 'train' else model.eval() #abilitiamo i gradienti solo in training with torch.set_grad_enabled(mode == 'train'): for i, batch in enumerate(loader[mode]): # carichiamo sample x ed etichetta y, 32 alla volta x = batch[0].to(device) y = batch[1].to(device) # our theta di x output = model(x) # calcoliamo la loss su Htheta(x) e y l = criterion(output, y) if mode == 'train': # calcoliamo le derivate l.backward() # lanciamo uno step della discesa del gradiente optimizer.step() # azzeriamo il gradiente per non accumularlo optimizer.zero_grad() acc = accuracy_score(y.to('cpu'), output.to('cpu').max(1)[1]) # numero di elementi nel batch n = batch[0].shape[0] loss_meter.add(l.item() * n, n) acc_meter.add(acc * n, n) if mode == 'train': loss_logger.log(e + (i + 1) / len(loader[mode]), loss_meter.value()[0], name=mode) acc_logger.log(e + (i + 1) / len(loader[mode]), acc_meter.value()[0], name=mode) # log su visdom di loss e accuracy loss_logger.log(e + 1, loss_meter.value()[0], name=mode) acc_logger.log(e + 1, acc_meter.value()[0], name=mode) #salviamo solo il corrente, sovrascrivendo il passato print('Epoca no.', e) save_checkpoint(model, e) return model
opts={'title': 'Settings'}, env=args.env_name) train_loss_logger = VisdomPlotLogger('line', opts={'title': 'Train Loss'}, env=args.env_name) epoch_offset = 0 if args.load_loss: if os.path.isfile('loss.log'): with open("loss.log", "r") as lossfile: loss_list = [] for loss in lossfile: loss_list.append(loss) while len(loss_list) > args.load_loss: loss_list.pop(0) for loss in loss_list: train_loss_logger.log(epoch_offset, float(loss)) epoch_offset += 1 ground_truth_logger_left = VisdomLogger( 'image', opts={'title': 'Ground Truth, left'}, env=args.env_name) ground_truth_logger_right = VisdomLogger( 'image', opts={'title': 'Ground Truth, right'}, env=args.env_name) reconstruction_logger_left = VisdomLogger( 'image', opts={'title': 'Reconstruction, left'}, env=args.env_name) reconstruction_logger_right = VisdomLogger( 'image', opts={'title': 'Reconstruction, right'}, env=args.env_name) setting_logger.log(str(args)) """ Load training data """ train_dataset = util.MyImageFolder(root='../../data/dumps/',
add_loss = args.recon_factor * recon_loss( recon, imgs) / args.batch_size loss += add_loss test_loss_recon += add_loss.data.cpu().item() test_loss /= steps_test test_loss_recon /= steps_test print("Test loss: , Test loss recon: {}".format( test_loss_recon)) #(test_loss, test_loss_recon)) """ All train data processed: Do logging """ loss = meter_loss.value()[0] loss_recon /= steps train_loss_logger.log(epoch + epoch_offset, loss - loss_recon, name='loss') test_loss_logger.log(epoch + epoch_offset, test_loss, name='loss') """ loss_relation = loss_recon/(loss-loss_recon) if loss_relation > 0.25 and epoch>15: fac = 0.25/loss_relation print("Loss relation = {}. Recon-factor reduced from {} to {}".format(loss_relation, args.recon_factor, args.recon_factor*fac)) args.recon_factor *= fac """ if not args.disable_recon: ground_truth_logger_left.log( make_grid(imgs, nrow=int(args.batch_size**0.5), normalize=True,
test_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': """%s Test Loss""" % basename}) if args.model_loadname is not None: model_loadpath = os.path.abspath( os.path.join(default_base_savedir, args.model_loadname)) if os.path.exists(model_loadpath): model_dict = torch.load(model_loadpath) pcnn_model.load_state_dict(model_dict['state_dict']) opt.load_state_dict(model_dict['optimizer']) epochs.extend(model_dict['epochs']) train_loss_list.extend(model_dict['train_losses']) test_loss_list.extend(model_dict['test_losses']) for e, tr, te in zip(epochs, train_loss_list, test_loss_list): train_loss_logger.log(e, np.sum(tr)) test_loss_logger.log(e, np.sum(te)) epoch = epochs[-1] print('loaded checkpoint at epoch: {} from {}'.format( epoch, model_loadpath)) epoch = epochs[-1] + 1 else: print('could not find checkpoint at {}'.format(model_loadpath)) embed() else: print('created new model') nr_logistic_mix = 10 vmodel = AutoEncoder(nr_logistic_mix=nr_logistic_mix, num_clusters=num_clusters, encoder_output_size=args.num_z).to(DEVICE)
print("starting training") for e in range(rnn_epoch + 1, rnn_epoch + args.num_epochs): ep_cnt, train_l = train(e, train_data_loader, args.window_size, do_use_cuda=use_cuda) total_passes += ep_cnt test_l = test(e, test_data_loader, args.window_size, do_use_cuda=use_cuda) train_loss.append(np.mean(train_l)) test_loss.append(np.mean(test_l)) train_loss_logger.log(e, train_loss[-1]) test_loss_logger.log(e, test_loss[-1]) print('epoch {} train loss mean {} test loss mean {}'.format( e, train_loss[-1], test_loss[-1])) if ((not e % args.save_every) or (e == rnn_epoch + args.num_epochs)): state = { 'epoch': e, 'train_loss': train_loss, 'test_loss': test_loss, 'state_dict': rnn.state_dict(), 'optimizer': optim.state_dict(), 'total_passes': total_passes, } filename = os.path.join(default_base_savedir, basename + "e%05d.pkl" % e)
recon = recon.view_as(imgs) loss = capsule_loss(imgs, out_labels, labels, m, recon) loss.backward() optimizer.step() meter_accuracy.add(out_labels.data, labels.data) meter_loss.add(loss.data[0]) pbar.set_postfix(loss=meter_loss.value()[0], acc=meter_accuracy.value()[0]) pbar.update() loss = meter_loss.value()[0] acc = meter_accuracy.value()[0] train_loss_logger.log(epoch, loss) train_error_logger.log(epoch, acc) print("Epoch{} Train acc:{:4}, loss:{:4}".format( epoch, acc, loss)) scheduler.step(acc) torch.save(model.state_dict(), "./weights/em_capsules/model_{}.pth".format(epoch)) reset_meters() # Test print('Testing...') correct = 0 for i, data in enumerate(test_loader): imgs, labels = data # b,1,28,28; #b imgs, labels = Variable(imgs, volatile=True), Variable(
def main(opt): cuda = opt.cuda visualize = opt.visualize print(f"cuda = {cuda}, visualize = {opt.visualize}") if visualize: if PRE_EPOCH_GEN > 0: pretrain_G_score_logger = VisdomPlotLogger( 'line', opts={'title': 'Pre-train G Goodness Score'}) if PRE_EPOCH_DIS > 0: pretrain_D_loss_logger = VisdomPlotLogger( 'line', opts={'title': 'Pre-train D Loss'}) adversarial_G_score_logger = VisdomPlotLogger( 'line', opts={ 'title': f'Adversarial G {GD} Goodness Score', 'Y': '{0, 13}', 'X': '{0, TOTAL_BATCH}' }) if CHECK_VARIANCE: G_variance_logger = VisdomPlotLogger( 'line', opts={'title': f'Adversarial G {GD} Variance'}) G_text_logger = VisdomTextLogger(update_type='APPEND') adversarial_D_loss_logger = VisdomPlotLogger( 'line', opts={'title': 'Adversarial Batch D Loss'}) # Define Networks generator = Generator(VOCAB_SIZE, g_emb_dim, g_hidden_dim, cuda) n_gen = Variable(torch.Tensor([get_n_params(generator)])) use_cuda = False if cuda: n_gen = n_gen.cuda() use_cuda = True print('Number of parameters in the generator: {}'.format(n_gen)) discriminator = LSTMDiscriminator(d_num_class, VOCAB_SIZE, d_lstm_hidden_dim, use_cuda) c_phi_hat = AnnexNetwork(d_num_class, VOCAB_SIZE, d_emb_dim, c_filter_sizes, c_num_filters, d_dropout, BATCH_SIZE, g_sequence_len) if cuda: generator = generator.cuda() discriminator = discriminator.cuda() c_phi_hat = c_phi_hat.cuda() # Generate toy data using target lstm print('Generating data ...') # Load data from file gen_data_iter = DataLoader(POSITIVE_FILE, BATCH_SIZE) gen_criterion = nn.NLLLoss(size_average=False) gen_optimizer = optim.Adam(generator.parameters()) if cuda: gen_criterion = gen_criterion.cuda() # 预训练Generator # Pretrain Generator using MLE pre_train_scores = [] if MLE: print('Pretrain with MLE ...') for epoch in range(int(np.ceil(PRE_EPOCH_GEN))): loss = train_epoch(generator, gen_data_iter, gen_criterion, gen_optimizer, PRE_EPOCH_GEN, epoch, cuda) print('Epoch [%d] Model Loss: %f' % (epoch, loss)) samples = generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) eval_iter = DataLoader(EVAL_FILE, BATCH_SIZE) generated_string = eval_iter.convert_to_char(samples) print(generated_string) eval_score = get_data_goodness_score(generated_string, SPACES) if SPACES == False: kl_score = get_data_freq(generated_string) else: kl_score = -1 freq_score = get_char_freq(generated_string, SPACES) pre_train_scores.append(eval_score) print('Epoch [%d] Generation Score: %f' % (epoch, eval_score)) print('Epoch [%d] KL Score: %f' % (epoch, kl_score)) print('Epoch [{}] Character distribution: {}'.format( epoch, list(freq_score))) torch.save( generator.state_dict(), f"checkpoints/MLE_space_{SPACES}_length_{SEQ_LEN}_preTrainG_epoch_{epoch}.pth" ) if visualize: pretrain_G_score_logger.log(epoch, eval_score) else: generator.load_state_dict(torch.load(weights_path)) # Finishing training with MLE if GD == "MLE": for epoch in range(3 * int(GENERATED_NUM / BATCH_SIZE)): loss = train_epoch_batch(generator, gen_data_iter, gen_criterion, gen_optimizer, PRE_EPOCH_GEN, epoch, int(GENERATED_NUM / BATCH_SIZE), cuda) print('Epoch [%d] Model Loss: %f' % (epoch, loss)) samples = generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) eval_iter = DataLoader(EVAL_FILE, BATCH_SIZE) generated_string = eval_iter.convert_to_char(samples) print(generated_string) eval_score = get_data_goodness_score(generated_string, SPACES) if SPACES == False: kl_score = get_data_freq(generated_string) else: kl_score = -1 freq_score = get_char_freq(generated_string, SPACES) pre_train_scores.append(eval_score) print('Epoch [%d] Generation Score: %f' % (epoch, eval_score)) print('Epoch [%d] KL Score: %f' % (epoch, kl_score)) print('Epoch [{}] Character distribution: {}'.format( epoch, list(freq_score))) torch.save( generator.state_dict(), f"checkpoints/MLE_space_{SPACES}_length_{SEQ_LEN}_preTrainG_epoch_{epoch}.pth" ) if visualize: pretrain_G_score_logger.log(epoch, eval_score) # 预训练Discriminator # Pretrain Discriminator dis_criterion = nn.NLLLoss(size_average=False) dis_optimizer = optim.Adam(discriminator.parameters()) if opt.cuda: dis_criterion = dis_criterion.cuda() print('Pretrain Discriminator ...') for epoch in range(PRE_EPOCH_DIS): samples = generate_samples(generator, BATCH_SIZE, GENERATED_NUM, NEGATIVE_FILE) dis_data_iter = DisDataIter(POSITIVE_FILE, NEGATIVE_FILE, BATCH_SIZE, SEQ_LEN) for _ in range(PRE_ITER_DIS): loss = train_epoch(discriminator, dis_data_iter, dis_criterion, dis_optimizer, 1, 1, cuda) print('Epoch [%d], loss: %f' % (epoch, loss)) if visualize: pretrain_D_loss_logger.log(epoch, loss) # 对抗训练 # Adversarial Training rollout = Rollout(generator, UPDATE_RATE) print('#####################################################') print('Start Adversarial Training...\n') gen_gan_loss = GANLoss() gen_gan_optm = optim.Adam(generator.parameters()) if cuda: gen_gan_loss = gen_gan_loss.cuda() gen_criterion = nn.NLLLoss(size_average=False) if cuda: gen_criterion = gen_criterion.cuda() dis_criterion = nn.NLLLoss(size_average=False) dis_criterion_bce = nn.BCELoss() dis_optimizer = optim.Adam(discriminator.parameters()) if cuda: dis_criterion = dis_criterion.cuda() c_phi_hat_loss = VarianceLoss() if cuda: c_phi_hat_loss = c_phi_hat_loss.cuda() c_phi_hat_optm = optim.Adam(c_phi_hat.parameters()) gen_scores = pre_train_scores for total_batch in range(TOTAL_BATCH): # Train the generator for one step for it in range(G_STEPS): samples = generator.sample(BATCH_SIZE, g_sequence_len) # samples has size (BS, sequence_len) # Construct the input to the generator, add zeros before samples and delete the last column zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor) if samples.is_cuda: zeros = zeros.cuda() inputs = Variable( torch.cat([zeros, samples.data], dim=1)[:, :-1].contiguous()) targets = Variable(samples.data).contiguous().view((-1, )) if opt.cuda: inputs = inputs.cuda() targets = targets.cuda() # Calculate the reward rewards = rollout.get_reward(samples, discriminator, VOCAB_SIZE, cuda) rewards = Variable(torch.Tensor(rewards)) if cuda: rewards = torch.exp(rewards.cuda()).contiguous().view((-1, )) rewards = torch.exp(rewards) # rewards has size (BS) prob = generator.forward(inputs) # prob has size (BS*sequence_len, VOCAB_SIZE) # 3.a theta_prime = g_output_prob(prob) # theta_prime has size (BS*sequence_len, VOCAB_SIZE) # 3.e and f c_phi_z_ori, c_phi_z_tilde_ori = c_phi_out( GD, c_phi_hat, theta_prime, discriminator, temperature=DEFAULT_TEMPERATURE, eta=DEFAULT_ETA, cuda=cuda) c_phi_z_ori = torch.exp(c_phi_z_ori) c_phi_z_tilde_ori = torch.exp(c_phi_z_tilde_ori) c_phi_z = torch.sum(c_phi_z_ori[:, 1]) / BATCH_SIZE c_phi_z_tilde = -torch.sum(c_phi_z_tilde_ori[:, 1]) / BATCH_SIZE if opt.cuda: c_phi_z = c_phi_z.cuda() c_phi_z_tilde = c_phi_z_tilde.cuda() c_phi_hat = c_phi_hat.cuda() # 3.i grads = [] first_term_grads = [] # 3.h optimization step # first, empty the gradient buffers gen_gan_optm.zero_grad() # first, re arrange prob new_prob = prob.view((BATCH_SIZE, g_sequence_len, VOCAB_SIZE)) # 3.g new gradient loss for relax batch_i_grads_1 = gen_gan_loss.forward_reward_grads( samples, new_prob, rewards, generator, BATCH_SIZE, g_sequence_len, VOCAB_SIZE, cuda) batch_i_grads_2 = gen_gan_loss.forward_reward_grads( samples, new_prob, c_phi_z_tilde_ori[:, 1], generator, BATCH_SIZE, g_sequence_len, VOCAB_SIZE, cuda) # batch_i_grads_1 and batch_i_grads_2 should be of length BATCH SIZE of arrays of all the gradients # # 3.i batch_grads = batch_i_grads_1 if GD != "REINFORCE": for i in range(len(batch_i_grads_1)): for j in range(len(batch_i_grads_1[i])): batch_grads[i][j] = torch.add(batch_grads[i][j], (-1) * batch_i_grads_2[i][j]) # batch_grads should be of length BATCH SIZE grads.append(batch_grads) # NOW, TRAIN THE GENERATOR generator.zero_grad() for i in range(g_sequence_len): # 3.g new gradient loss for relax cond_prob = gen_gan_loss.forward_reward( i, samples, new_prob, rewards, BATCH_SIZE, g_sequence_len, VOCAB_SIZE, cuda) c_term = gen_gan_loss.forward_reward(i, samples, new_prob, c_phi_z_tilde_ori[:, 1], BATCH_SIZE, g_sequence_len, VOCAB_SIZE, cuda) if GD != "REINFORCE": cond_prob = torch.add(cond_prob, (-1) * c_term) new_prob[:, i, :].backward(cond_prob, retain_graph=True) # 3.h - still training the generator, with the last two terms of the RELAX equation if GD != "REINFORCE": c_phi_z.backward(retain_graph=True) c_phi_z_tilde.backward(retain_graph=True) gen_gan_optm.step() # 3.i if CHECK_VARIANCE: # c_phi_z term partial_grads = [] for j in range(BATCH_SIZE): generator.zero_grad() c_phi_z_ori[j, 1].backward(retain_graph=True) j_grads = [] for p in generator.parameters(): j_grads.append(p.grad.clone()) partial_grads.append(j_grads) grads.append(partial_grads) # c_phi_z_tilde term partial_grads = [] for j in range(BATCH_SIZE): generator.zero_grad() c_phi_z_tilde_ori[j, 1].backward(retain_graph=True) j_grads = [] for p in generator.parameters(): j_grads.append(-1 * p.grad.clone()) partial_grads.append(j_grads) grads.append(partial_grads) # Uncomment the below code if you want to check gradients """ print('1st contribution to the gradient') print(grads[0][0][6]) print('2nd contribution to the gradient') print(grads[1][0][6]) print('3rd contribution to the gradient') print(grads[2][0][6]) """ #grads should be of length 3 #grads[0] should be of length BATCH SIZE # 3.j all_grads = grads[0] if GD != "REINFORCE": for i in range(len(grads[0])): for j in range(len(grads[0][i])): all_grads[i][j] = torch.add( torch.add(all_grads[i][j], grads[1][i][j]), grads[2][i][j]) # all_grads should be of length BATCH_SIZE c_phi_hat_optm.zero_grad() var_loss = c_phi_hat_loss.forward(all_grads, cuda) #/n_gen true_variance = c_phi_hat_loss.forward_variance( all_grads, cuda) var_loss.backward() c_phi_hat_optm.step() print( 'Batch [{}] Estimate of the variance of the gradient at step {}: {}' .format(total_batch, it, true_variance[0])) if visualize: G_variance_logger.log((total_batch + it), true_variance[0]) # Evaluate the quality of the Generator outputs if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1: samples = generate_samples(generator, BATCH_SIZE, GENERATED_NUM, EVAL_FILE) eval_iter = DataLoader(EVAL_FILE, BATCH_SIZE) generated_string = eval_iter.convert_to_char(samples) print(generated_string) eval_score = get_data_goodness_score(generated_string, SPACES) if SPACES == False: kl_score = get_data_freq(generated_string) else: kl_score = -1 freq_score = get_char_freq(generated_string, SPACES) gen_scores.append(eval_score) print('Batch [%d] Generation Score: %f' % (total_batch, eval_score)) print('Batch [%d] KL Score: %f' % (total_batch, kl_score)) print('Epoch [{}] Character distribution: {}'.format( total_batch, list(freq_score))) #Checkpoint & Visualize if total_batch % 10 == 0 or total_batch == TOTAL_BATCH - 1: torch.save( generator.state_dict(), f'checkpoints/{GD}_G_space_{SPACES}_pretrain_{PRE_EPOCH_GEN}_batch_{total_batch}.pth' ) if visualize: [G_text_logger.log(line) for line in generated_string] adversarial_G_score_logger.log(total_batch, eval_score) # Train the discriminator batch_G_loss = 0.0 for b in range(D_EPOCHS): for data, _ in gen_data_iter: data = Variable(data) real_data = convert_to_one_hot(data, VOCAB_SIZE, cuda) real_target = Variable(torch.ones((data.size(0), 1))) samples = generator.sample(data.size(0), g_sequence_len) # bs x seq_len fake_data = convert_to_one_hot( samples, VOCAB_SIZE, cuda) # bs x seq_len x vocab_size fake_target = Variable(torch.zeros((data.size(0), 1))) if cuda: real_target = real_target.cuda() fake_target = fake_target.cuda() real_data = real_data.cuda() fake_data = fake_data.cuda() real_pred = torch.exp(discriminator(real_data)[:, 1]) fake_pred = torch.exp(discriminator(fake_data)[:, 1]) D_real_loss = dis_criterion_bce(real_pred, real_target) D_fake_loss = dis_criterion_bce(fake_pred, fake_target) D_loss = D_real_loss + D_fake_loss dis_optimizer.zero_grad() D_loss.backward() dis_optimizer.step() gen_data_iter.reset() print('Batch [{}] Discriminator Loss at step and epoch {}: {}'. format(total_batch, b, D_loss.data[0])) if visualize: adversarial_D_loss_logger.log(total_batch, D_loss.data[0]) if not visualize: plt.plot(gen_scores) plt.ylim((0, 13)) plt.title('{}_after_{}_epochs_of_pretraining'.format( GD, PRE_EPOCH_GEN)) plt.show()
class_criterion_B, train_loader, test_loader, opt) if opt.pretrained: trained_model = load_weight(fullModel, opt.pretrained, verbose=True) # -- Evaluation nTestImages = reid_set.test_inds # [2 ** (n+1) for n in range(5)] cmc, simMat, _, avgSame, avgDiff = compute_cmc(reid_set, nTestImages, trained_model, 128) print(cmc) print(simMat) print(avgSame, avgDiff) sim_logger = VisdomLogger('heatmap', port=8097, opts={ 'title': 'simMat', 'columnnames': list(range(len(simMat[0]))), 'rownames': list(range(len(simMat))) }) cmc_logger = VisdomPlotLogger("line", win="cmc_curve") for i, v in enumerate(cmc): cmc_logger.log(i, v, name="cmc_curve") sim_logger.log(simMat) log.info("Saving results...") with open("cmc.pkl", 'w') as f: pickle.dump(cmc, f) with open("simMat.pkl", 'w') as f: pickle.dump(simMat, f)
range=(0, 1)).numpy()) fake_B_im_logger.log( make_grid(fake_B.detach().cpu(), nrow=int(opt.batchSize**0.5), normalize=True, range=(0, 1)).numpy()) ################################### # Progress report (http://localhost:8097) """ logger.log({'loss_G': loss_G, 'loss_G_identity': (loss_identity_A + loss_identity_B), 'loss_G_GAN': (loss_GAN_A2B + loss_GAN_B2A), 'loss_G_cycle': (loss_cycle_ABA + loss_cycle_BAB), 'loss_D': (loss_D_A + loss_D_B)}, images={'real_A': real_A, 'real_B': real_B, 'fake_A': fake_A, 'fake_B': fake_B}) """ # Update learning rates for loss_name, meter in loss_meters.items(): loss_logger.log(epoch, meter.value()[0], name=loss_name) meter.reset() lr_scheduler_G.step() lr_scheduler_D_A.step() lr_scheduler_D_B.step() # Save models checkpoints torch.save(netG_A2B.state_dict(), 'output/netG_A2B.pth') torch.save(netG_B2A.state_dict(), 'output/netG_B2A.pth') torch.save(netD_A.state_dict(), 'output/netD_A.pth') torch.save(netD_B.state_dict(), 'output/netD_B.pth') # ##################################
def train_valid_loop(train_loader, dev_loader, test_loader, args, model, fold=None): # -------------------------------------------------------------------------- # TRAIN/VALID LOOP logger.info('-' * 100) stats = { 'timer': utils.Timer(), 'epoch': 0, 'best_valid': 0, 'best_epoch': 0, 'fold': fold } start_epoch = 0 if args.visdom: # add visdom logger code port = args.visdom_port train_loss_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Train Loss'}) train_metric_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Train Class Accuracy'}) idx2label = {i: label for label, i in model.label_dict.items()} label_names = [idx2label[i] for i in range(model.args.label_size)] train_confusion_logger = VisdomLogger( 'heatmap', port=port, opts={ 'title': f'{args.model_name} Train Confusion Matrix', 'columnnames': label_names, 'rownames': label_names }) valid_metric_logger = VisdomPlotLogger( 'line', port=port, opts={'title': f'{args.model_name} Valid Class Accuracy'}) valid_confusion_logger = VisdomLogger( 'heatmap', port=port, opts={ 'title': f'{args.model_name} Valid Confusion Matrix', 'columnnames': label_names, 'rownames': label_names }) train_confusion_meter = tnt.meter.ConfusionMeter(model.args.label_size, normalized=True) valid_confusion_meter = tnt.meter.ConfusionMeter(model.args.label_size, normalized=True) else: train_confusion_meter = None valid_confusion_meter = None try: for epoch in range(start_epoch, args.num_epochs): stats['epoch'] = epoch # Train loss = train(args, train_loader, model, stats) stats['train_loss'] = loss # Validate train train_res, train_cfm = validate( args, train_loader, model, stats, mode='train', confusion_meter=train_confusion_meter) for m in train_res: stats['train_' + m] = train_res[m] # Validate dev val_res, valid_cfm = validate( args, dev_loader, model, stats, mode='dev', confusion_meter=valid_confusion_meter) for m in train_res: stats['dev_' + m] = val_res[m] if args.visdom: train_loss_logger.log(epoch, loss) train_metric_logger.log(epoch, train_res[args.valid_metric]) train_confusion_logger.log(train_cfm) valid_metric_logger.log(epoch, val_res[args.valid_metric]) valid_confusion_logger.log(valid_cfm) train_confusion_meter.reset() valid_confusion_meter.reset() # Save best valid if val_res[args.valid_metric] > stats['best_valid']: logger.info( colored( f'Best valid: {args.valid_metric} = {val_res[args.valid_metric]*100:.2f}% ', 'yellow') + colored( f'(epoch {stats["epoch"]}, {model.updates} updates)', 'yellow')) fold_info = f'.fold_{fold}' if fold is not None else '' model.save(args.model_file + fold_info) stats['best_valid'] = val_res[args.valid_metric] stats['best_epoch'] = epoch logger.info('-' * 100) if args.stats_file: with open(args.stats_file, 'w') as f: out_stats = stats.copy() out_stats['timer'] = out_stats['timer'].time() if fold is None: del out_stats['fold'] f.write(json.dumps(out_stats) + '\n') if epoch - stats['best_epoch'] >= args.early_stopping: logger.info( colored( f'No improvement for {args.early_stopping} epochs, stop training.', 'red')) break except KeyboardInterrupt: logger.info(colored(f'User ended training. stop.', 'red')) logger.info('Load best model...') model = EntityClassifier.load(args.model_file + fold_info, args) # device = torch.device(f"cuda:{args.gpu}" if args.cuda else "cpu") # model.to(device) model.cuda() stats['epoch'] = stats['best_epoch'] if fold is not None: mode = f'fold {fold} test' else: mode = 'test' test_result, _ = validate(args, test_loader, model, stats, mode=mode) return test_result
#embedding_loss.backward(retain_graph=True) print( "Epoch:{}\tIterations: {}/{}\t discriminator loss:{} \t generator loss:{} \t \t " .format( epoch, iteration + 1, num_batch, D_train_loss.cpu().data.numpy()[0], G_train_loss.cpu().data.numpy()[0], )) train_dis_meter_loss.add(D_train_loss.cpu().data[0]) train_gen_meter_loss.add(G_train_loss.cpu().data[0]) if iteration == 0 and args.visdom: viz.images(255. * img_batch.cpu().data.numpy(), win=generated) viz.images(255. * g_res.cpu().data.numpy(), win=gt) if args.visdom and (iteration + 1) % 20 == 0: train_gen_loss_logger.log(total_iter, train_gen_meter_loss.value()[0]) train_dis_loss_logger.log(total_iter, train_dis_meter_loss.value()[0]) total_iter += 1 train_dis_meter_loss.reset() train_gen_meter_loss.reset() state = { 'gen_state_dict': gen.state_dict(), } filename = "params.pth" torch.save(state, filename)