Esempio n. 1
0
class NonLocalTrainer(object):
    def __init__(self, args,
                 trainLoader, testLoader):

        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.out_path = args.out
        self.sigma = args.sigma
        self.beta = args.beta
        self.nClass = args.nClass

        self.model = MLP().to(self.device)
        self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        self.criterion = nn.MSELoss()

        self.trainLoader = trainLoader
        self.testLoader = testLoader

        self.run_datetime = datetime.datetime.now()

        if not os.path.exists(self.out_path):
            os.makedirs(self.out_path)

        self.logger = Logger(self.out_path)

        with open(os.path.join(self.out_path, "para.json"), "w") as f:
            json.dump(args.__dict__, f)

        self.epoch = 0
        self.iteration = 0
        self.test_step = 0
        self.max_epoch = args.epochs
        self.val_interval = args.interval
        self.res = 0
        self.best_error = 1e7;
        self.best_res_epoch = 0

        self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17)
        self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3)

    def validate_one_epoch(self):
        self.model.eval()
        self.test_step += 1

        tsthreas = [0.1, 1, 10]

        tp = [0] * len(tsthreas)  # true positive
        tn = [0] * len(tsthreas)  # true negetive
        fp = [0] * len(tsthreas)  # false positve
        fn = [0] * len(tsthreas)  # false negetive
        ts = [0] * len(tsthreas)

        totalRegressionLoss = []
        total_error = 0
        total_count = 0
        p_error = 0
        p_count = 0

        largeGapCount = 0
        largeGap = 0

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.testLoader), total=len(self.testLoader),
                desc='Valid :', ncols=80,
                leave=False):
            gt_micaps = target.numpy()
            data, target = data.to(device=self.device), target.to(device=self.device)

            with torch.no_grad():

                predictValues = self.model(data)

                regressionLoss = self.criterion(predictValues, target)

                predictNumpy = predictValues.cpu().numpy()
                totalRegressionLoss.append(regressionLoss.item())
                # totalClassificationLoss.append(classificationLoss.item())

                # predicted = torch.argmax(preds, dim=1)
                # correct += (predicted == logits).sum().item()

                gapValues = np.abs(predictNumpy - gt_micaps)
                total_error += np.sum(gapValues)
                total_count += gt_micaps.shape[0]
                p_error += np.sum((gt_micaps > 0.01) * gapValues)
                p_count += np.sum(gt_micaps > 0.01)

                largeGap += np.sum((gapValues > 5) * gapValues)
                largeGapCount += np.sum(gapValues > 5)

                for i, threas in enumerate(tsthreas):
                    tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas))
                    tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas))
                    fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas))
                    fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas))

        for i, _ in enumerate(tsthreas):
            ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5)

        totalAverageError = round(total_error / total_count, 5)
        pAverageError = round(p_error / p_count, 5)
        totalLoss = np.sum(totalRegressionLoss)
        largeGapRatio = round(largeGapCount / total_count, 5)
        largeGapMae = round(largeGap / largeGapCount, 5)

        info = {"test_regression_loss": totalLoss,
                "ts_score": ts,
                "aver_gap": totalAverageError,
                "aver_p_gap": pAverageError,
                "large_gap_ratio": largeGapRatio,
                "large_gap_mae": largeGapMae
                }
        print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1))

        print(info)

        # for tag, value in info.items():
        #     self.logger.scalar_summary(tag, value, self.test_step)

        # if totalAverageError < self.best_error:
        #     self.best_error = totalAverageError
        #     self.best_res_epoch = self.epoch
        #     info["epoch"] = self.epoch
        #     info["modelParam"] = self.model.state_dict()
        #     info["optimParam"] = self.optim.state_dict()
        #     torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth"))

    def train_one_epoch(self):
        self.model.train()

        for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm(
                enumerate(self.trainLoader), total=len(self.trainLoader),
                desc='Train epoch=%d' % self.epoch, ncols=80, leave=False):
            iter_idx = batch_idx + self.epoch * len(self.trainLoader)
            # if (self.iteration != 0) and (iter_idx - 1) != self.iteration:
            #     continue
            self.iteration = iter_idx

            assert self.model.training
            self.optim.zero_grad()

            data = data.to(device=self.device)
            target = target.to(device=self.device)

            predictValues = self.model(data)

            regressionLoss = self.criterion(predictValues, target)

            regressionLoss.backward()
            # for named,param in self.model.named_parameters():
            #     print("Name : " ,named)
            #     print(param.grad.data.sum())
            self.optim.step()

            regressionLossCpu = regressionLoss.item()
            self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1)

        for tag, value in self.model.named_parameters():
            self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1)
            self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1)

    def run(self):
        for epoch in range(self.max_epoch):
            self.epoch = epoch
            self.train_one_epoch()
            if (self.epoch + 1) % self.val_interval == 0:
                self.validate_one_epoch()
Esempio n. 2
0
# Update of the network parameters
train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)

step = 0  # Number of batches seen
net.train()

for epoch in tqdm(np.arange(args.n_epochs), disable=not args.verbose):
    experiment.log_current_epoch(epoch)

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.cpu(), target.cpu()

        optimizer.zero_grad()
        output = net(data)
        mse_loss = criterion(output, target)
        anchoring_loss = criterion_anchoring_loss_full(net.named_parameters(), init_net.named_parameters(), fac_norm,
                                                       args.batch_size)

        loss = mse_loss + args.lambda_anchoring * anchoring_loss
        loss.backward()
        optimizer.step()

        step += 1

        experiment.log_metric('train_loss', loss.item(), step=step)

# Save the model
if not Path.exists(savepath / 'models'):
    os.makedirs(savepath / 'models')

model_path = savepath / 'models' / '{}_{}epochs.pt'.format(model_name, epoch + 1)
Esempio n. 3
0
    
    loaders = [train_loader, valid_loader, test_loader, trainA_loader, trainB_loader, validA_loader, validB_loader]
    names = ['train_loader','valid_loader', 'test_loader',"trainA_loader", "trainB_loader", "validA_loader", "validB_loader"]
    for loader, name in zip(loaders, names):
        train_iter = iter(loader)
        for _ in range(2):
            _, target = train_iter.next()
            print(f'{name}', ': Classes {}, counts: {}'.format(
                *np.unique(target.numpy(), return_counts=True)))

   
    #############################
    #########Base Line############
    ##############################
    model = MLP()
    model = model.to(device)
    for name, param in model.named_parameters():
        if param.device.type != 'cuda':
            print('param {}, not on GPU'.format(name))

    optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    wandb.init(
        project='Seq Boost2',
        config=config,
        name="Baseline p={} mu={} eta={}".format(P,M,E))

    model, train_loss, valid_loss = train(model, train_loader, valid_loader, batch_size=BATCH_SIZE, wandb_log=True,
                                          consolidate=False, patience=EARLY_STOPPING, n_epochs=config['epoch'])
    evaluate(model, test_loader, batch_size = BATCH_SIZE)
Esempio n. 4
0
def train(lr=args.lr,
          n_hidden=args.n_hidden,
          batch_size=args.batch_size,
          dropout=args.dropout,
          valid_freq=3000,
          disp_freq=1000,
          save_freq=100000,
          max_epochs=args.n_epoch,
          patience=15,
          save_name=args.save_name,
          save_dir=args.save_dir,
          device=args.device):
    # Load train and valid dataset
    print('loading train')
    with open(args.train_path, 'rb') as f:
        train_val_y = pickle.load(f)
        train_val_x = pickle.load(f)

    print('loading english test')
    with open(args.en_test_path, 'rb') as f:
        en_test_y = pickle.load(f)
        en_test_x = pickle.load(f)

    print('loading french test')
    with open(args.fr_test_path, 'rb') as f:
        fr_test_y = pickle.load(f)
        fr_test_x = pickle.load(f)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1125)
    for train_index, test_index in sss.split(train_val_x, train_val_y):
        train_y = train_val_y[train_index]
        train_x = train_val_x[train_index]
        valid_y = train_val_y[test_index]
        valid_x = train_val_x[test_index]

    print('Number of training sample: %d' % train_x.shape[0])
    print('Number of validation sample: %d' % valid_x.shape[0])
    print('Number of english testing sample: %d' % en_test_x.shape[0])
    print('Number of french testing sample: %d' % fr_test_x.shape[0])
    print('-' * 100)

    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
    kf_en_test = get_minibatches_idx(len(en_test_y), batch_size)
    kf_fr_test = get_minibatches_idx(len(fr_test_y), batch_size)

    # Loader parameter: use CUDA pinned memory for faster data loading
    pin_memory = (device == args.device)
    # Test set

    n_emb = train_x.shape[1]
    n_class = len(set(train_y))
    best_valid_acc = None
    bad_counter = 0

    uidx = 0  # the number of update done
    estop = False  # early stop switch
    net = MLP(n_mlp_layer=args.n_mlp_layers,
              n_hidden=args.n_hidden,
              dropout=args.dropout,
              n_class=n_class,
              n_emb=n_emb,
              device=args.device)

    if args.load_net != '':
        assert os.path.exists(
            args.load_net), 'Path to pretrained net does not exist'
        net.load_state_dict(torch.load(args.load_net))
        print('Load exists model stored at: ', args.load_net)

    if args.device == 'gpu':
        net = net.cuda()

    # Begin Training
    net.train()
    print('-' * 100)
    print('Model structure: ')
    print('MLP baseline')
    print(net.main)
    print('-' * 100)
    print('Parameters for tuning: ')
    print(net.state_dict().keys())
    print('-' * 100)

    # Define optimizer
    assert args.optimizer in [
        'SGD', 'Adam', "RMSprop", "LBFGS", "Rprop", "ASGD", "Adadelta",
        "Adagrad", "Adamax"
    ], 'Please choose either SGD or Adam'
    if args.optimizer == 'SGD':
        optimizer = optim.SGD(lr=lr,
                              params=filter(lambda p: p.requires_grad,
                                            net.parameters()),
                              momentum=0.9)
    else:
        optimizer = getattr(optim, args.optimizer)(params=filter(
            lambda p: p.requires_grad, net.parameters()),
                                                   lr=lr)

    #lambda1 = lambda epoch: epoch // 30
    lambda2 = lambda epoch: 0.98**epoch
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2])
    #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
    try:
        for eidx in range(max_epochs):
            scheduler.step()
            # print('Training mode on: ' ,net.training)
            start_time = time.time()
            n_samples = 0
            # Get new shuffled index for the training set
            kf = get_minibatches_idx(len(train_y), batch_size, shuffle=True)

            for _, train_index in kf:
                # Remove gradient from previous batch
                #net.zero_grad()
                optimizer.zero_grad()
                uidx += 1
                y_batch = torch.autograd.Variable(
                    torch.from_numpy(train_y[train_index]).long())
                x_batch = torch.autograd.Variable(
                    torch.from_numpy(train_x[train_index]).float())
                if net.device == 'gpu':
                    y_batch = y_batch.cuda()
                scores = net.forward(x_batch)
                loss = net.loss(scores, y_batch)

                loss.backward()
                optimizer.step()
                n_samples += len(x_batch)
                gradient = 0

                # For logging gradient information
                for name, w in net.named_parameters():
                    if w.grad is not None:
                        w_grad = torch.norm(w.grad.data, 2)**2
                        gradient += w_grad
                gradient = gradient**0.5
                if np.mod(uidx, disp_freq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ',
                          loss.data[0], 'Gradient ', gradient)

                if save_name and np.mod(uidx, save_freq) == 0:
                    print('Saving...')
                    torch.save(
                        net.state_dict(), '%s/%s_epoch%d_update%d.net' %
                        (save_dir, save_name, eidx, uidx))

                if np.mod(uidx, valid_freq) == 0:
                    print("=" * 50)
                    print('Evaluation on validation set: ')
                    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
                    top_1_acc, top_n_acc = eval.net_evaluation(
                        net, kf_valid, valid_x, valid_y)
                    #scheduler.step(top_1_acc)

                    # Save best performance state_dict for testing
                    if best_valid_acc is None:
                        best_valid_acc = top_1_acc
                        best_state_dict = net.state_dict()
                        torch.save(best_state_dict,
                                   '%s/%s_best.net' % (save_dir, save_name))
                    else:
                        if top_1_acc > best_valid_acc:
                            print(
                                'Best validation performance so far, saving model parameters'
                            )
                            print("*" * 50)
                            bad_counter = 0  # reset counter
                            best_valid_acc = top_1_acc
                            best_state_dict = net.state_dict()
                            torch.save(
                                best_state_dict,
                                '%s/%s_best.net' % (save_dir, save_name))
                        else:
                            bad_counter += 1
                            print('Validation accuracy: ', 100 * top_1_acc)
                            print('Getting worse, patience left: ',
                                  patience - bad_counter)
                            print('Best validation accuracy  now: ',
                                  100 * best_valid_acc)
                            # Learning rate annealing
                            lr /= args.lr_anneal
                            print('Learning rate annealed to: ', lr)
                            print('*' * 100)
                            if args.optimizer == 'SGD':
                                optimizer = optim.SGD(
                                    lr=lr,
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    momentum=0.9)
                            else:
                                optimizer = getattr(optim, args.optimizer)(
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    lr=lr)
                            if bad_counter > patience:
                                print('-' * 100)
                                print('Early Stop!')
                                estop = True
                                break

            epoch_time = time.time() - start_time
            print('Epoch processing time: %.2f s' % epoch_time)
            print('Seen %d samples' % n_samples)
            if estop:
                break
        print('-' * 100)
        print('Training finish')
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)

        # add self connection
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on test set: ')
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)
    except KeyboardInterrupt:
        print('-' * 100)
        print("Training interrupted, saving final model...")
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)