Beispiel #1
0
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        base_learner_config = self.parse_config("base:")
        param_config = self.parse_config("param:")

        # train
        base_learner = DecisionTreeRegressor(criterion='friedman_mse', random_state=None, splitter='best',
                                             **base_learner_config)
        self.model = NGBRegressor(Dist=Normal, Base=base_learner, Score=LogScore, verbose=True, **param_config)
        self.model = self.model.fit(X_train, y_train, X_val=X_val, Y_val=y_val,
                                    early_stopping_rounds=self.model_config["early_stopping_rounds"])

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
Beispiel #2
0
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(
            self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        logging.info(
            "LGBOOST TRAIN: Careful categoricals not specified in dataset conversion"
        )

        dtrain = lgb.Dataset(X_train, label=y_train)
        dval = lgb.Dataset(X_val, label=y_val)

        param_config = self.parse_param_config()
        param_config["seed"] = self.seed

        self.model = lgb.train(
            param_config,
            dtrain,
            early_stopping_rounds=self.model_config["early_stopping_rounds"],
            verbose_eval=1,
            valid_sets=[dval])

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred),
                                       np.array(y_train),
                                       xlabel='Predicted',
                                       ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred),
                                     np.array(y_val),
                                     xlabel='Predicted',
                                     ylabel='True',
                                     title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train,
                                               train_pred,
                                               prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val,
                                               val_pred,
                                               prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
Beispiel #3
0
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(
            self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dval = xgb.DMatrix(X_val, label=y_val)

        param_config = self.parse_param_config()
        param_config["seed"] = self.seed

        self.model = xgb.train(
            param_config,
            dtrain,
            num_boost_round=self.model_config["param:num_rounds"],
            early_stopping_rounds=self.model_config["early_stopping_rounds"],
            verbose_eval=1,
            evals=[(dval, 'val')])

        train_pred, var_train = self.model.predict(dtrain), None
        val_pred, var_val = self.model.predict(dval), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred),
                                       np.array(y_train),
                                       xlabel='Predicted',
                                       ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred),
                                     np.array(y_val),
                                     xlabel='Predicted',
                                     ylabel='True',
                                     title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train,
                                               train_pred,
                                               prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val,
                                               val_pred,
                                               prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
Beispiel #4
0
 def evaluate(self, result_paths):
     X_test, y_test, _ = self.load_results_from_result_paths(result_paths)
     test_pred, var_test = self.model.predict(X_test), None
     test_metrics = utils.evaluate_metrics(y_test,
                                           test_pred,
                                           prediction_is_first_arg=False)
     return test_metrics, test_pred, y_test
Beispiel #5
0
    def test(self):
        preds = []
        targets = []
        self.model.eval()

        test_queue = self.load_results_from_result_paths(self.test_paths)
        for step, graph_batch in enumerate(test_queue):
            graph_batch = graph_batch.to(self.device)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)

            else:
                pred = self.model(graph_batch=graph_batch)

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())

        fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg'))
        plt.close()

        test_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False)
        logging.info('test metrics %s', test_results)

        return test_results
Beispiel #6
0
    def validate(self):
        preds = []
        targets = []
        self.model.eval()

        valid_queue = self.load_results_from_result_paths(self.val_paths)
        for step, (arch_path_enc, y_true) in enumerate(valid_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()

            pred = self.model(arch_path_enc)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_valid.jpg'))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets),
                                             np.array(preds),
                                             prediction_is_first_arg=False)
        logging.info('validation metrics %s', val_results)

        return val_results
Beispiel #7
0
    def validate(self):
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)
        val_pred, var_val = self.model.predict(X_val), None

        valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False)

        logging.info('validation metrics %s', valid_metrics)

        return valid_metrics
Beispiel #8
0
    def train(self):

        from nasbench301.surrogate_models import utils
        X_train, y_train, _ = self.load_results_from_result_paths(
            self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)
        self.model.fit(X_train, y_train)

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        #self.save()

        fig_train = utils.scatter_plot(np.array(train_pred),
                                       np.array(y_train),
                                       xlabel='Predicted',
                                       ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred),
                                     np.array(y_val),
                                     xlabel='Predicted',
                                     ylabel='True',
                                     title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train,
                                               train_pred,
                                               prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val,
                                               val_pred,
                                               prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
Beispiel #9
0
    def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch):
        objs = utils.AvgrageMeter()

        # VALIDATION
        preds = []
        targets = []

        model.eval()
        for step, graph_batch in enumerate(valid_queue):
            graph_batch = graph_batch.to(self.device)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)
                criterion = torch.nn.BCELoss()
                criterion_2 = torch.nn.MSELoss()

                bins = self.create_bins(lower_bound=0,
                                        width=10,
                                        quantity=9)
                binned_weights = []
                for value in graph_batch.y.cpu().numpy():
                    bin_index = self.find_bin(value, bins)
                    binned_weights.append(bin_index)
                bins = torch.FloatTensor(binned_weights)
                make_one_hot = lambda index: torch.eye(self.model_config['no_bins'])[index.view(-1).long()]
                binns_one_hot = make_one_hot(bins).to(self.device)

                loss_1 = criterion(pred_bins, binns_one_hot)
                loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y))
                alpha = self.model_config['classification_loss']
                beta = self.model_config['regression_loss']

                loss = alpha * loss_1 + beta * loss_2
            else:
                pred = self.model(graph_batch=graph_batch)
                loss = criterion(self.normalize_data(pred), self.normalize_data(graph_batch.y / 100))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())
            n = graph_batch.num_graphs
            objs.update(loss.data.item(), n)

            if step % self.data_config['report_freq'] == 0:
                logging.info('valid %03d %e ', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_valid_{}.jpg'.format(epoch)))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False)

        return objs.avg, val_results
Beispiel #10
0
    def test(self):
        X_test, y_test, _ = self.load_results_from_result_paths(self.test_paths)
        test_pred, var_test = self.model.predict(X_test), None

        fig = utils.scatter_plot(np.array(test_pred), np.array(y_test), xlabel='Predicted', ylabel='True', title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg'))
        plt.close()

        test_metrics = utils.evaluate_metrics(y_test, test_pred, prediction_is_first_arg=False)

        logging.info('test metrics %s', test_metrics)

        return test_metrics
Beispiel #11
0
    def evaluate(self, result_paths):
        # Get evaluation data
        eval_queue = self.load_results_from_result_paths(result_paths)

        preds = []
        targets = []
        self.model.eval()
        for step, graph_batch in enumerate(eval_queue):
            graph_batch = graph_batch.to(self.device)

            pred = self.model(graph_batch=graph_batch)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())

        test_metrics = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False)
        return test_metrics, np.array(preds), np.array(targets)
Beispiel #12
0
    def evaluate(self, result_paths):
        # Get evaluation data
        eval_queue = self.load_results_from_result_paths(result_paths)

        preds = []
        targets = []
        self.model.eval()
        for step, (arch_path_enc, y_true) in enumerate(eval_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()
            pred = self.model(arch_path_enc)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())

        test_metrics = utils.evaluate_metrics(np.array(targets),
                                              np.array(preds),
                                              prediction_is_first_arg=False)
        return test_metrics, preds, targets
Beispiel #13
0
    def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr,
              epoch):
        objs = utils.AvgrageMeter()

        # VALIDATION
        preds = []
        targets = []

        for step, (arch_path_enc, y_true) in enumerate(valid_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()
            pred = self.model(arch_path_enc)
            loss = torch.mean(
                torch.abs((self.normalize_data(pred) /
                           self.normalize_data(y_true / 100)) - 1))
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())
            objs.update(loss.data.item(), len(arch_path_enc))

            if step % self.data_config['report_freq'] == 0:
                logging.info('valid %03d %e ', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_valid_{}.jpg'.format(epoch)))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets),
                                             np.array(preds),
                                             prediction_is_first_arg=False)

        return objs.avg, val_results
Beispiel #14
0
    def evaluate_ensemble(self, result_paths, apply_noise):
        """Evaluates the metrics on the result paths using ensemble predicitons"""
        preds, targets = [], []

        # Collect individuals predictions
        for member_model in self.ensemble_members:
            member_metrics, member_preds, member_targets = member_model.evaluate(
                result_paths)
            logging.info("==> Eval member metrics: %s", member_metrics)
            if len(targets) == 0:
                preds.append(member_preds)
                targets = member_targets
                continue
            if np.any((targets - member_targets) > 1e-5):
                raise ValueError("Ensemble members have different targets!")
            preds.append(member_preds)

        means = np.mean(preds, axis=0)
        stddevs = np.std(preds, axis=0)

        # Apply noise
        if apply_noise:
            noisy_predictions = [
                np.random.normal(loc=mean, scale=stddev, size=1)[0]
                for mean, stddev in zip(means, stddevs)
            ]
            ensemble_predictions = noisy_predictions
        else:
            ensemble_predictions = means

        # Evaluate metrics
        metrics = utils.evaluate_metrics(targets,
                                         ensemble_predictions,
                                         prediction_is_first_arg=False)

        return metrics, ensemble_predictions, stddevs, targets
Beispiel #15
0
    def train_epoch(self, train_queue, valid_queue, model, criterion, optimizer, lr, epoch):
        objs = utils.AvgrageMeter()

        # TRAINING
        preds = []
        targets = []

        model.train()

        for step, graph_batch in enumerate(train_queue):
            graph_batch = graph_batch.to(self.device)
            #             print(step)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)
                criterion = torch.nn.BCELoss()
                criterion_2 = torch.nn.MSELoss()

                bins = self.create_bins(lower_bound=0,
                                        width=10,
                                        quantity=9)
                binned_weights = []
                for value in graph_batch.y.cpu().numpy():
                    bin_index = self.find_bin(value, bins)
                    binned_weights.append(bin_index)
                bins = torch.FloatTensor(binned_weights)
                make_one_hot = lambda index: torch.eye(self.model_config['no_bins'])[index.view(-1).long()]
                binns_one_hot = make_one_hot(bins).to(self.device)
                loss_1 = criterion(pred_bins, binns_one_hot)
                loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y))
                alpha = self.model_config['classification_loss']
                beta = self.model_config['regression_loss']

                loss = alpha * loss_1 + beta * loss_2

            else:
                pred = self.model(graph_batch=graph_batch)
                if self.model_config['loss:loss_log_transform']:
                    loss = criterion(self.normalize_data(pred), self.normalize_data(graph_batch.y / 100))
                else:
                    loss = criterion(pred, graph_batch.y / 100)
                if self.model_config['loss:pairwise_ranking_loss']:
                    m = 0.1
                    '''
                    y = list(map(lambda y_i: 1 if y_i == True else -1, graph_batch.y[0: -1] > graph_batch.y[1:]))
                    pairwise_ranking_loss = torch.nn.HingeEmbeddingLoss(margin=m)(pred[0:-1] - pred[1:],
                                                                                  target=torch.from_numpy(np.array(y)))
                    '''
                    pairwise_ranking_loss = []
                    sort_idx = torch.argsort(graph_batch.y, descending=True)
                    for idx, idx_y_i in enumerate(sort_idx):
                        for idx_y_i_p1 in sort_idx[idx + 1:]:
                            pairwise_ranking_loss.append(torch.max(torch.tensor(0.0, dtype=torch.float),
                                                                   m - (pred[idx_y_i] - pred[idx_y_i_p1])))
                    pairwise_ranking_loss = torch.mean(torch.stack(pairwise_ranking_loss))

                    loss += pairwise_ranking_loss
                    if step % self.data_config['report_freq'] == 0:
                        logging.info('Pairwise ranking loss {}'.format(pairwise_ranking_loss))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            n = graph_batch.num_graphs
            objs.update(loss.data.item(), n)

            if step % self.data_config['report_freq'] == 0:
                logging.info('train %03d %e', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds), np.array(targets), xlabel='Predicted', ylabel='True', title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_train_{}.jpg'.format(epoch)))
        plt.close()
        train_results = utils.evaluate_metrics(np.array(targets), np.array(preds), prediction_is_first_arg=False)

        return objs.avg, train_results
Beispiel #16
0
    def train_epoch(self, train_queue, valid_queue, model, criterion,
                    optimizer, lr, epoch):
        objs = utils.AvgrageMeter()

        # TRAINING
        preds = []
        targets = []

        model.train()

        for step, (arch_path_enc, y_true) in enumerate(train_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()

            pred = self.model(arch_path_enc)
            if self.model_config['loss:loss_log_transform']:
                loss = torch.mean(
                    torch.abs((self.normalize_data(pred) /
                               self.normalize_data(y_true / 100)) - 1))
            else:
                loss = criterion(1 - pred, 1 - y_true / 100)
            if self.model_config['loss:pairwise_ranking_loss']:
                m = 0.1
                pairwise_ranking_loss = []
                sort_idx = torch.argsort(y_true, descending=True)
                for idx, idx_y_i in enumerate(sort_idx):
                    for idx_y_i_p1 in sort_idx[idx + 1:]:
                        pairwise_ranking_loss.append(
                            torch.max(torch.tensor(0.0, dtype=torch.float),
                                      m - (pred[idx_y_i] - pred[idx_y_i_p1])))
                pairwise_ranking_loss = torch.mean(
                    torch.stack(pairwise_ranking_loss))

                loss += pairwise_ranking_loss
                if step % self.data_config['report_freq'] == 0:
                    logging.info('Pairwise ranking loss {}'.format(
                        pairwise_ranking_loss))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            objs.update(loss.data.item(), len(arch_path_enc))

            if step % self.data_config['report_freq'] == 0:
                logging.info('train %03d %e', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_train_{}.jpg'.format(epoch)))
        plt.close()
        train_results = utils.evaluate_metrics(np.array(targets),
                                               np.array(preds),
                                               prediction_is_first_arg=False)

        return objs.avg, train_results