class NonLocalTrainer(object): def __init__(self, args, trainLoader, testLoader): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.out_path = args.out self.sigma = args.sigma self.beta = args.beta self.nClass = args.nClass self.model = MLP().to(self.device) self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) self.criterion = nn.MSELoss() self.trainLoader = trainLoader self.testLoader = testLoader self.run_datetime = datetime.datetime.now() if not os.path.exists(self.out_path): os.makedirs(self.out_path) self.logger = Logger(self.out_path) with open(os.path.join(self.out_path, "para.json"), "w") as f: json.dump(args.__dict__, f) self.epoch = 0 self.iteration = 0 self.test_step = 0 self.max_epoch = args.epochs self.val_interval = args.interval self.res = 0 self.best_error = 1e7; self.best_res_epoch = 0 self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17) self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3) def validate_one_epoch(self): self.model.eval() self.test_step += 1 tsthreas = [0.1, 1, 10] tp = [0] * len(tsthreas) # true positive tn = [0] * len(tsthreas) # true negetive fp = [0] * len(tsthreas) # false positve fn = [0] * len(tsthreas) # false negetive ts = [0] * len(tsthreas) totalRegressionLoss = [] total_error = 0 total_count = 0 p_error = 0 p_count = 0 largeGapCount = 0 largeGap = 0 for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.testLoader), total=len(self.testLoader), desc='Valid :', ncols=80, leave=False): gt_micaps = target.numpy() data, target = data.to(device=self.device), target.to(device=self.device) with torch.no_grad(): predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) predictNumpy = predictValues.cpu().numpy() totalRegressionLoss.append(regressionLoss.item()) # totalClassificationLoss.append(classificationLoss.item()) # predicted = torch.argmax(preds, dim=1) # correct += (predicted == logits).sum().item() gapValues = np.abs(predictNumpy - gt_micaps) total_error += np.sum(gapValues) total_count += gt_micaps.shape[0] p_error += np.sum((gt_micaps > 0.01) * gapValues) p_count += np.sum(gt_micaps > 0.01) largeGap += np.sum((gapValues > 5) * gapValues) largeGapCount += np.sum(gapValues > 5) for i, threas in enumerate(tsthreas): tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas)) tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas)) fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas)) fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas)) for i, _ in enumerate(tsthreas): ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5) totalAverageError = round(total_error / total_count, 5) pAverageError = round(p_error / p_count, 5) totalLoss = np.sum(totalRegressionLoss) largeGapRatio = round(largeGapCount / total_count, 5) largeGapMae = round(largeGap / largeGapCount, 5) info = {"test_regression_loss": totalLoss, "ts_score": ts, "aver_gap": totalAverageError, "aver_p_gap": pAverageError, "large_gap_ratio": largeGapRatio, "large_gap_mae": largeGapMae } print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1)) print(info) # for tag, value in info.items(): # self.logger.scalar_summary(tag, value, self.test_step) # if totalAverageError < self.best_error: # self.best_error = totalAverageError # self.best_res_epoch = self.epoch # info["epoch"] = self.epoch # info["modelParam"] = self.model.state_dict() # info["optimParam"] = self.optim.state_dict() # torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth")) def train_one_epoch(self): self.model.train() for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.trainLoader), total=len(self.trainLoader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iter_idx = batch_idx + self.epoch * len(self.trainLoader) # if (self.iteration != 0) and (iter_idx - 1) != self.iteration: # continue self.iteration = iter_idx assert self.model.training self.optim.zero_grad() data = data.to(device=self.device) target = target.to(device=self.device) predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) regressionLoss.backward() # for named,param in self.model.named_parameters(): # print("Name : " ,named) # print(param.grad.data.sum()) self.optim.step() regressionLossCpu = regressionLoss.item() self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1) for tag, value in self.model.named_parameters(): self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1) self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1) def run(self): for epoch in range(self.max_epoch): self.epoch = epoch self.train_one_epoch() if (self.epoch + 1) % self.val_interval == 0: self.validate_one_epoch()
# Update of the network parameters train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False) step = 0 # Number of batches seen net.train() for epoch in tqdm(np.arange(args.n_epochs), disable=not args.verbose): experiment.log_current_epoch(epoch) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cpu(), target.cpu() optimizer.zero_grad() output = net(data) mse_loss = criterion(output, target) anchoring_loss = criterion_anchoring_loss_full(net.named_parameters(), init_net.named_parameters(), fac_norm, args.batch_size) loss = mse_loss + args.lambda_anchoring * anchoring_loss loss.backward() optimizer.step() step += 1 experiment.log_metric('train_loss', loss.item(), step=step) # Save the model if not Path.exists(savepath / 'models'): os.makedirs(savepath / 'models') model_path = savepath / 'models' / '{}_{}epochs.pt'.format(model_name, epoch + 1)
loaders = [train_loader, valid_loader, test_loader, trainA_loader, trainB_loader, validA_loader, validB_loader] names = ['train_loader','valid_loader', 'test_loader',"trainA_loader", "trainB_loader", "validA_loader", "validB_loader"] for loader, name in zip(loaders, names): train_iter = iter(loader) for _ in range(2): _, target = train_iter.next() print(f'{name}', ': Classes {}, counts: {}'.format( *np.unique(target.numpy(), return_counts=True))) ############################# #########Base Line############ ############################## model = MLP() model = model.to(device) for name, param in model.named_parameters(): if param.device.type != 'cuda': print('param {}, not on GPU'.format(name)) optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9) wandb.init( project='Seq Boost2', config=config, name="Baseline p={} mu={} eta={}".format(P,M,E)) model, train_loss, valid_loss = train(model, train_loader, valid_loader, batch_size=BATCH_SIZE, wandb_log=True, consolidate=False, patience=EARLY_STOPPING, n_epochs=config['epoch']) evaluate(model, test_loader, batch_size = BATCH_SIZE)
def train(lr=args.lr, n_hidden=args.n_hidden, batch_size=args.batch_size, dropout=args.dropout, valid_freq=3000, disp_freq=1000, save_freq=100000, max_epochs=args.n_epoch, patience=15, save_name=args.save_name, save_dir=args.save_dir, device=args.device): # Load train and valid dataset print('loading train') with open(args.train_path, 'rb') as f: train_val_y = pickle.load(f) train_val_x = pickle.load(f) print('loading english test') with open(args.en_test_path, 'rb') as f: en_test_y = pickle.load(f) en_test_x = pickle.load(f) print('loading french test') with open(args.fr_test_path, 'rb') as f: fr_test_y = pickle.load(f) fr_test_x = pickle.load(f) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1125) for train_index, test_index in sss.split(train_val_x, train_val_y): train_y = train_val_y[train_index] train_x = train_val_x[train_index] valid_y = train_val_y[test_index] valid_x = train_val_x[test_index] print('Number of training sample: %d' % train_x.shape[0]) print('Number of validation sample: %d' % valid_x.shape[0]) print('Number of english testing sample: %d' % en_test_x.shape[0]) print('Number of french testing sample: %d' % fr_test_x.shape[0]) print('-' * 100) kf_valid = get_minibatches_idx(len(valid_y), batch_size) kf_en_test = get_minibatches_idx(len(en_test_y), batch_size) kf_fr_test = get_minibatches_idx(len(fr_test_y), batch_size) # Loader parameter: use CUDA pinned memory for faster data loading pin_memory = (device == args.device) # Test set n_emb = train_x.shape[1] n_class = len(set(train_y)) best_valid_acc = None bad_counter = 0 uidx = 0 # the number of update done estop = False # early stop switch net = MLP(n_mlp_layer=args.n_mlp_layers, n_hidden=args.n_hidden, dropout=args.dropout, n_class=n_class, n_emb=n_emb, device=args.device) if args.load_net != '': assert os.path.exists( args.load_net), 'Path to pretrained net does not exist' net.load_state_dict(torch.load(args.load_net)) print('Load exists model stored at: ', args.load_net) if args.device == 'gpu': net = net.cuda() # Begin Training net.train() print('-' * 100) print('Model structure: ') print('MLP baseline') print(net.main) print('-' * 100) print('Parameters for tuning: ') print(net.state_dict().keys()) print('-' * 100) # Define optimizer assert args.optimizer in [ 'SGD', 'Adam', "RMSprop", "LBFGS", "Rprop", "ASGD", "Adadelta", "Adagrad", "Adamax" ], 'Please choose either SGD or Adam' if args.optimizer == 'SGD': optimizer = optim.SGD(lr=lr, params=filter(lambda p: p.requires_grad, net.parameters()), momentum=0.9) else: optimizer = getattr(optim, args.optimizer)(params=filter( lambda p: p.requires_grad, net.parameters()), lr=lr) #lambda1 = lambda epoch: epoch // 30 lambda2 = lambda epoch: 0.98**epoch scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2]) #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max') try: for eidx in range(max_epochs): scheduler.step() # print('Training mode on: ' ,net.training) start_time = time.time() n_samples = 0 # Get new shuffled index for the training set kf = get_minibatches_idx(len(train_y), batch_size, shuffle=True) for _, train_index in kf: # Remove gradient from previous batch #net.zero_grad() optimizer.zero_grad() uidx += 1 y_batch = torch.autograd.Variable( torch.from_numpy(train_y[train_index]).long()) x_batch = torch.autograd.Variable( torch.from_numpy(train_x[train_index]).float()) if net.device == 'gpu': y_batch = y_batch.cuda() scores = net.forward(x_batch) loss = net.loss(scores, y_batch) loss.backward() optimizer.step() n_samples += len(x_batch) gradient = 0 # For logging gradient information for name, w in net.named_parameters(): if w.grad is not None: w_grad = torch.norm(w.grad.data, 2)**2 gradient += w_grad gradient = gradient**0.5 if np.mod(uidx, disp_freq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', loss.data[0], 'Gradient ', gradient) if save_name and np.mod(uidx, save_freq) == 0: print('Saving...') torch.save( net.state_dict(), '%s/%s_epoch%d_update%d.net' % (save_dir, save_name, eidx, uidx)) if np.mod(uidx, valid_freq) == 0: print("=" * 50) print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) top_1_acc, top_n_acc = eval.net_evaluation( net, kf_valid, valid_x, valid_y) #scheduler.step(top_1_acc) # Save best performance state_dict for testing if best_valid_acc is None: best_valid_acc = top_1_acc best_state_dict = net.state_dict() torch.save(best_state_dict, '%s/%s_best.net' % (save_dir, save_name)) else: if top_1_acc > best_valid_acc: print( 'Best validation performance so far, saving model parameters' ) print("*" * 50) bad_counter = 0 # reset counter best_valid_acc = top_1_acc best_state_dict = net.state_dict() torch.save( best_state_dict, '%s/%s_best.net' % (save_dir, save_name)) else: bad_counter += 1 print('Validation accuracy: ', 100 * top_1_acc) print('Getting worse, patience left: ', patience - bad_counter) print('Best validation accuracy now: ', 100 * best_valid_acc) # Learning rate annealing lr /= args.lr_anneal print('Learning rate annealed to: ', lr) print('*' * 100) if args.optimizer == 'SGD': optimizer = optim.SGD( lr=lr, params=filter(lambda p: p.requires_grad, net.parameters()), momentum=0.9) else: optimizer = getattr(optim, args.optimizer)( params=filter(lambda p: p.requires_grad, net.parameters()), lr=lr) if bad_counter > patience: print('-' * 100) print('Early Stop!') estop = True break epoch_time = time.time() - start_time print('Epoch processing time: %.2f s' % epoch_time) print('Seen %d samples' % n_samples) if estop: break print('-' * 100) print('Training finish') best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name)) torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name)) net.load_state_dict(best_state_dict) # add self connection print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) eval.net_evaluation(net, kf_valid, valid_x, valid_y) # Evaluate model on test set print('Evaluation on test set: ') print('Evaluation on English testset: ') eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y) print('Evaluation on French testset: ') eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y) except KeyboardInterrupt: print('-' * 100) print("Training interrupted, saving final model...") best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name)) torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name)) net.load_state_dict(best_state_dict) print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) eval.net_evaluation(net, kf_valid, valid_x, valid_y) # Evaluate model on test set print('Evaluation on English testset: ') eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y) print('Evaluation on French testset: ') eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)