Exemple #1
0
def eval_acc(model, dataset):
    model.eval()

    loader = DenseLoader(dataset, batch_size=args.batch_size, shuffle=False)
    Xs = []
    Ys = []
    for data in loader:
        data = data.to(device)
        Ys.append(data.y)
        with torch.no_grad():
            xs, new_adjs, Ss, opt_loss = model(data,
                                               epsilon=args.eps,
                                               opt_epochs=args.opt_iters)
            Xs.append(model.jump(xs))
            # Xs.append(xs[0])
    Xs = torch.cat(Xs, 0)
    Ys = torch.cat(Ys, 0)

    clf1 = linear_model.LogisticRegressionCV(solver='saga',
                                             multi_class='auto',
                                             max_iter=200)

    score1 = cross_val_score(clf1,
                             X=Xs.detach().cpu().numpy(),
                             y=Ys.detach().cpu().numpy(),
                             cv=10)

    clf2 = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=400)
    score2 = cross_val_score(clf2,
                             X=Xs.detach().cpu().numpy(),
                             y=Ys.detach().cpu().numpy(),
                             cv=10)

    # print(score.mean(), score.std())
    return score1.mean(), score1.std(), score2.mean(), score2.std()
Exemple #2
0
def eval_reg_loss(model, dataset):
    model.eval()

    loader = DenseLoader(dataset, batch_size=args.batch_size, shuffle=False)
    Xs = []
    Ys = []
    for data in loader:
        data = data.to(device)
        Ys.append(data.y)
        with torch.no_grad():
            xs, new_adjs, Ss, opt_loss = model(data,
                                               epsilon=args.eps,
                                               opt_epochs=args.opt_iters)
            Xs.append(model.jump(xs))
            # Xs.append(xs[0])
    Xs = torch.cat(Xs, 0)
    Ys = torch.cat(Ys, 0).squeeze()

    cv = KFold(10, random_state=12345)

    # clf1 = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    # # clf1 = MultiOutputRegressor(linear_model.Lasso())
    #
    # score1 = cross_val_score(clf1, X=Xs.detach().cpu().numpy(), y=Ys.detach().cpu().numpy(), cv=10, scoring='neg_mean_absolute_error')

    clf2 = MultiOutputRegressor(
        MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=400))
    score2 = cross_val_score(clf2,
                             X=Xs.detach().cpu().numpy(),
                             y=Ys.detach().cpu().numpy(),
                             cv=cv,
                             scoring='neg_mean_absolute_error')

    # print(score.mean(), score.std())
    return -score2.mean(), score2.std()
def getMiddleRes(dataset, model, batch_size, eps, opt_iters):
    model.eval()

    loader = DenseLoader(dataset, batch_size=batch_size, shuffle=False)
    Xs = []
    Ys = []
    for data in loader:
        data = data.to(device)
        Ys.append(data.y)
        with torch.no_grad():
            xs, new_adjs, Ss, opt_loss = model(data,
                                               epsilon=eps,
                                               opt_epochs=opt_iters)
            Xs.append(model.jump(xs))
            # Xs.append(xs[0])
    Xs = torch.cat(Xs, 0)
    Ys = torch.cat(Ys, 0).float()
    myData = MyDataset(Xs, Ys)
    return myData
def cross_validation_with_val_set(dataset,
                                  model,
                                  folds,
                                  epochs,
                                  batch_size,
                                  lr,
                                  lr_decay_factor,
                                  lr_decay_step_size,
                                  weight_decay,
                                  logger=None):

    val_losses, accs, durations = [], [], []
    for fold, (train_idx, test_idx,
               val_idx) in enumerate(zip(*k_fold(dataset, folds))):

        train_dataset = dataset[train_idx]
        test_dataset = dataset[test_idx]
        val_dataset = dataset[val_idx]

        if 'adj' in train_dataset[0]:
            train_loader = DenseLoader(train_dataset, batch_size, shuffle=True)
            val_loader = DenseLoader(val_dataset, batch_size, shuffle=False)
            test_loader = DenseLoader(test_dataset, batch_size, shuffle=False)
        else:
            train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

        model.to(device).reset_parameters()
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_start = time.perf_counter()

        for epoch in range(1, epochs + 1):
            train_loss = train(model, optimizer, train_loader)
            val_losses.append(eval_loss(model, val_loader))
            accs.append(eval_acc(model, test_loader))
            eval_info = {
                'fold': fold,
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_losses[-1],
                'test_acc': accs[-1],
            }

            if logger is not None:
                logger(eval_info)

            if epoch % lr_decay_step_size == 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_decay_factor * param_group['lr']

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()
        durations.append(t_end - t_start)

    loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations)
    loss, acc = loss.view(folds, epochs), acc.view(folds, epochs)
    loss, argmin = loss.min(dim=1)
    acc = acc[torch.arange(folds, dtype=torch.long), argmin]

    loss_mean = loss.mean().item()
    acc_mean = acc.mean().item()
    acc_std = acc.std().item()
    duration_mean = duration.mean().item()
    print('Val Loss: {:.4f}, Test Accuracy: {:.3f} ± {:.3f}, Duration: {:.3f}'.
          format(loss_mean, acc_mean, acc_std, duration_mean))

    return loss_mean, acc_mean, acc_std
Exemple #5
0
            args.opt_iters)
        if not os.path.exists(dirpath):
            os.mkdir(dirpath)
        # model_path = dirpath + "/opt_"+dataset_name+"_layers"+str(num_layers)+"_hidden"+str(hidden)+"_params.pkl"
        model_path = dirpath + "/opt_" + dataset_name + "_params.pkl"

        if args.train:

            perm = torch.randperm(len(dataset))
            train_id = int(0.8 * len(dataset))
            train_index = perm[:train_id]
            val_index = perm[train_id:]
            print("num_layers, hidden", num_layers, hidden)

            train_loader = DenseLoader(dataset[train_index],
                                       batch_size=args.batch_size,
                                       shuffle=True)
            model.to(device).reset_parameters()
            optimizer = Adam(
                model.parameters(), lr=args.lr, weight_decay=0.0001
            )  # adding a negative weight regularizaiton such that it cannot be zero.

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            t_start = time.perf_counter()
            val_losses = []
            val_loader = DenseLoader(dataset[val_index],
                                     batch_size=args.batch_size,
                                     shuffle=False)
            best_val_loss = 100000.0
Exemple #6
0
    def run(self):
        val_accs, test_accs = [], []

        makeDirectory('torch_saved/')
        save_path = 'torch_saved/{}'.format(self.p.name)

        if self.p.restore:
            self.load_model(save_path)
            print('Successfully Loaded previous model')

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        # iterate over 10 folds
        for fold, (train_idx, test_idx,
                   val_idx) in enumerate(zip(*self.k_fold())):

            # Reinitialise model and optimizer for each fold
            self.model = self.addModel()
            self.optimizer = self.addOptimizer()

            train_dataset = self.data[train_idx]
            test_dataset = self.data[test_idx]
            val_dataset = self.data[val_idx]

            if 'adj' in train_dataset[0]:
                train_loader = DenseLoader(train_dataset,
                                           self.p.batch_size,
                                           shuffle=True)
                val_loader = DenseLoader(val_dataset,
                                         self.p.batch_size,
                                         shuffle=False)
                test_loader = DenseLoader(test_dataset,
                                          self.p.batch_size,
                                          shuffle=False)
            else:
                train_loader = DataLoader(train_dataset,
                                          self.p.batch_size,
                                          shuffle=True)
                val_loader = DataLoader(val_dataset,
                                        self.p.batch_size,
                                        shuffle=False)
                test_loader = DataLoader(test_dataset,
                                         self.p.batch_size,
                                         shuffle=False)

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            best_val_acc, best_test_acc = 0.0, 0.0

            for epoch in range(1, self.p.max_epochs + 1):
                train_loss = self.run_epoch(train_loader)
                val_acc = self.predict(val_loader)

                # lr_decay
                if epoch % self.p.lr_decay_step == 0:
                    for param_group in self.optimizer.param_groups:
                        param_group[
                            'lr'] = self.p.lr_decay_factor * param_group['lr']
                # save model for best val score
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    self.save_model(save_path)

                print(
                    '---[INFO]---{:02d}/{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'.
                    format(fold + 1, epoch, train_loss, best_val_acc))

            # load best model for testing
            self.load_model(save_path)
            best_test_acc = self.predict(test_loader)

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            val_accs.append(best_val_acc)
            test_accs.append(best_test_acc)

        val_acc_mean = np.round(np.mean(val_accs), 4)
        test_acc_mean = np.round(np.mean(test_accs), 4)

        print('---[INFO]---Val Acc: {:.4f}, Test Accuracy: {:.3f}'.format(
            val_acc_mean, test_acc_mean))

        return val_acc_mean, test_acc_mean
Exemple #7
0
    def run_new(self):
        val_accs, test_accs = [], []

        makeDirectory('torch_saved/')
        save_path = 'torch_saved/{}'.format(self.p.name)

        if self.p.restore:
            self.load_model(save_path)
            print('Successfully Loaded previous model')

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        # Reinitialise model and optimizer for each fold
        self.model = self.addModel()
        self.optimizer = self.addOptimizer()

        dataset = self.data

        if self.p.dataset != "wechat":
            num_training = int(len(dataset) * 0.5)
            num_val = int(len(dataset) * 0.75) - num_training
            num_test = len(dataset) - (num_training + num_val)
        else:
            num_training = dataset.get_samples_num("train")
            num_val = dataset.get_samples_num("valid")
            num_test = dataset.get_samples_num("test")
        logger.info("num train %d, num valid %d, num test %d", num_training,
                    num_val, num_test)
        # training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])
        train_dataset = dataset[:num_training]
        val_dataset = dataset[num_training:(num_training + num_val)]
        test_dataset = dataset[(num_training + num_val):]

        if 'adj' in train_dataset[0]:
            train_loader = DenseLoader(train_dataset,
                                       self.p.batch_size,
                                       shuffle=True)
            val_loader = DenseLoader(val_dataset,
                                     self.p.batch_size,
                                     shuffle=False)
            test_loader = DenseLoader(test_dataset,
                                      self.p.batch_size,
                                      shuffle=False)
        else:
            train_loader = DataLoader(train_dataset,
                                      self.p.batch_size,
                                      shuffle=True)
            val_loader = DataLoader(val_dataset,
                                    self.p.batch_size,
                                    shuffle=False)
            test_loader = DataLoader(test_dataset,
                                     self.p.batch_size,
                                     shuffle=False)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        best_val_acc, best_test_acc = 0.0, 0.0
        best_thr = None

        val_metrics, val_loss, thr = self.evaluate(val_loader,
                                                   -1,
                                                   return_best_thr=True)
        test_metrics, test_loss, _ = self.evaluate(test_loader, -1, thr=0.5)

        for epoch in range(1, self.p.max_epochs + 1):
            train_loss = self.run_epoch(train_loader, epoch)
            val_metrics, val_loss, thr = self.evaluate(val_loader,
                                                       epoch,
                                                       return_best_thr=True)
            test_metrics, test_loss, _ = self.evaluate(test_loader,
                                                       epoch,
                                                       thr=thr)
            val_auc = val_metrics[-1]

            # lr_decay
            if epoch % self.p.lr_decay_step == 0:
                for param_group in self.optimizer.param_groups:
                    param_group[
                        'lr'] = self.p.lr_decay_factor * param_group['lr']
            # save model for best val score
            if val_auc > best_val_acc:
                best_val_acc = val_auc
                best_thr = thr
                self.save_model(save_path)
                logger.info("************BEST UNTIL NOW**************")

            print('---[INFO]---{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'.format(
                epoch, train_loss, best_val_acc))
            print('---[INFO]---{:03d}: Test metrics'.format(epoch),
                  test_metrics)

        # load best model for testing
        self.load_model(save_path)
        test_metrics, test_loss, _ = self.evaluate(test_loader,
                                                   self.p.max_epochs + 1,
                                                   thr=thr)
        print('---[INFO]---Finally: Test metrics', test_metrics)
Exemple #8
0
def cross_validation_with_val_set(args,
                                  dataset,
                                  max_node_num,
                                  folds,
                                  epochs,
                                  batch_size,
                                  lr,
                                  lr_decay_factor,
                                  lr_decay_step_size,
                                  weight_decay,
                                  epoch_select,
                                  with_eval_mode=True,
                                  logger=None):
    assert epoch_select in ['val_min', 'test_max'], epoch_select

    val_losses, train_accs, test_accs, durations = [], [], [], []
    for fold, (train_idx, test_idx, val_idx) in enumerate(
            zip(*k_fold(dataset, folds, epoch_select))):

        train_dataset = dataset[train_idx]
        test_dataset = dataset[test_idx]
        val_dataset = dataset[val_idx]

        train_loader = DenseLoader(train_dataset, batch_size, shuffle=True)
        val_loader = DenseLoader(val_dataset, batch_size, shuffle=False)
        test_loader = DenseLoader(test_dataset, batch_size, shuffle=False)

        model = CapsGNN(args, dataset.num_features, dataset.num_classes,
                        max_node_num).to(device)
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_start = time.perf_counter()
        writer = SummaryWriter(args.log_path)

        for epoch in range(1, epochs + 1):
            train_loss, train_acc = train(args, model, optimizer, train_loader,
                                          device, max_node_num, epoch, writer)
            train_accs.append(train_acc)
            val_loss, _ = eval_loss(args, model, val_loader, device,
                                    max_node_num, with_eval_mode)
            val_losses.append(val_loss)
            test_accs.append(
                eval_acc(model, test_loader, device, max_node_num,
                         with_eval_mode))
            eval_info = {
                'fold': fold,
                'epoch': epoch,
                'train_loss': train_loss,
                'train_acc': train_accs[-1],
                'val_loss': val_losses[-1],
                'test_acc': test_accs[-1],
            }

            if logger is not None:
                logger(eval_info)

            if epoch % lr_decay_step_size == 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_decay_factor * param_group['lr']

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()
        durations.append(t_end - t_start)

    duration = tensor(durations)
    train_acc, test_acc = tensor(train_accs), tensor(test_accs)
    val_loss = tensor(val_losses)

    train_acc = train_acc.view(folds, epochs)
    test_acc = test_acc.view(folds, epochs)
    val_loss = val_loss.view(folds, epochs)
    if epoch_select == 'test_max':  # take a single epoch that yields best test results across 10 folds
        _, selected_epoch = test_acc.mean(dim=0).max(dim=0)
        selected_epoch_rep = selected_epoch.repeat(folds)
    else:  # take epoch that yields min val loss for each fold individually.
        _, selected_epoch_rep = val_loss.min(dim=1)

    # The criteria used in GMN and STRUCPOOL
    test_acc_epoch_max = torch.max(test_acc, dim=1)[0]
    test_acc_epoch_mean = test_acc_epoch_max.mean().item()
    test_acc_epoch_std = test_acc_epoch_max.std().item()
    ########################################

    test_acc = test_acc[torch.arange(folds, dtype=torch.long),
                        selected_epoch_rep]
    train_acc_mean = train_acc[:, -1].mean().item()
    test_acc_mean = test_acc.mean().item()
    test_acc_std = test_acc.std().item()
    duration_mean = duration.mean().item()

    print(
        'Train Acc: {:.2f}, Test Acc: {:.2f} {:.2f}, Test Acc (C*): {:.2f} {:.2f}, Duration: {:.3f}'
        .format(train_acc_mean * 100, test_acc_mean * 100, test_acc_std * 100,
                test_acc_epoch_mean * 100, test_acc_epoch_std * 100,
                duration_mean))
    sys.stdout.flush()

    return train_acc_mean, test_acc_mean, test_acc_std, duration_mean