コード例 #1
0
ファイル: sklearn_forest.py プロジェクト: LMZimmer/temp
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)
        self.model.fit(X_train, y_train)

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        #self.save()

        fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True', title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
コード例 #2
0
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(
            self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        base_learner_config = self.parse_config("base:")
        param_config = self.parse_config("param:")

        # train
        base_learner = DecisionTreeRegressor(criterion='friedman_mse',
                                             random_state=None,
                                             splitter='best',
                                             **base_learner_config)
        self.model = NGBRegressor(Dist=Normal,
                                  Base=base_learner,
                                  Score=LogScore,
                                  verbose=True,
                                  **param_config)
        self.model = self.model.fit(
            X_train,
            y_train,
            X_val=X_val,
            Y_val=y_val,
            early_stopping_rounds=self.model_config["early_stopping_rounds"])

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred),
                                       np.array(y_train),
                                       xlabel='Predicted',
                                       ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred),
                                     np.array(y_val),
                                     xlabel='Predicted',
                                     ylabel='True',
                                     title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train,
                                               train_pred,
                                               prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val,
                                               val_pred,
                                               prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
コード例 #3
0
ファイル: lgboost.py プロジェクト: LMZimmer/temp
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(
            self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        logging.info(
            "LGBOOST TRAIN: Careful categoricals not specified in dataset conversion"
        )

        dtrain = lgb.Dataset(X_train, label=y_train)
        dval = lgb.Dataset(X_val, label=y_val)

        param_config = self.parse_param_config()
        param_config["seed"] = self.seed

        self.model = lgb.train(
            param_config,
            dtrain,
            early_stopping_rounds=self.model_config["early_stopping_rounds"],
            verbose_eval=1,
            valid_sets=[dval])

        train_pred, var_train = self.model.predict(X_train), None
        val_pred, var_val = self.model.predict(X_val), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred),
                                       np.array(y_train),
                                       xlabel='Predicted',
                                       ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred),
                                     np.array(y_val),
                                     xlabel='Predicted',
                                     ylabel='True',
                                     title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train,
                                               train_pred,
                                               prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val,
                                               val_pred,
                                               prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
コード例 #4
0
ファイル: svr.py プロジェクト: LMZimmer/temp
 def evaluate(self, result_paths):
     X_test, y_test, _ = self.load_results_from_result_paths(result_paths)
     test_pred, var_test = self.model.predict(X_test), None
     test_metrics = utils.evaluate_metrics(y_test,
                                           test_pred,
                                           prediction_is_first_arg=False)
     return test_metrics, test_pred, y_test
コード例 #5
0
ファイル: bananas.py プロジェクト: LMZimmer/temp
    def validate(self):
        preds = []
        targets = []
        self.model.eval()

        valid_queue = self.load_results_from_result_paths(self.val_paths)
        for step, (arch_path_enc, y_true) in enumerate(valid_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()

            pred = self.model(arch_path_enc)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_valid.jpg'))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets),
                                             np.array(preds),
                                             prediction_is_first_arg=False)
        logging.info('validation metrics %s', val_results)

        return val_results
コード例 #6
0
    def test(self):
        preds = []
        targets = []
        self.model.eval()

        test_queue = self.load_results_from_result_paths(self.test_paths)
        for step, graph_batch in enumerate(test_queue):
            graph_batch = graph_batch.to(self.device)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)

            else:
                pred = self.model(graph_batch=graph_batch)

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg'))
        plt.close()

        test_results = utils.evaluate_metrics(np.array(targets),
                                              np.array(preds),
                                              prediction_is_first_arg=False)
        logging.info('test metrics %s', test_results)

        return test_results
コード例 #7
0
def leave_one_out_analysis(analysis_directory, nasbench_data):
    result_dict = {}
    data_min, data_max = 100, 0
    optimizers = ['darts', 'bananas', 'combo', 'de', 're', 'tpe', 'pc_darts', 'gdas', 'drnas']
    result_loader = utils.ResultLoader('', '', '', 0)
    for optimizer in optimizers:
        try:
            model_log_dir = glob.glob(os.path.join(analysis_directory, optimizer, '*', '*'))[0]
            data_config = json.load(open(os.path.join(model_log_dir, 'data_config.json'), 'r'))
            model_config = json.load(open(os.path.join(model_log_dir, 'model_config.json'), 'r'))

            # Instantiate model
            surrogate_model = utils.model_dict[model_config['model']](data_root=nasbench_data, log_dir=None,
                                                                      seed=data_config['seed'], data_config=data_config,
                                                                      model_config=model_config)
            surrogate_model.load(os.path.join(model_log_dir, 'surrogate_model.model'))

            left_out_optimizer_paths = result_loader.filter_duplicate_dirs(
                glob.glob(os.path.join(nasbench_data, optimizer, '*')))
            _, val_preds, val_true = surrogate_model.evaluate(left_out_optimizer_paths)
            data_min = min(data_min, *val_preds, *val_true)
            data_max = max(data_max, *val_preds, *val_true)
            if type(val_preds) is not list:
                val_preds, val_true = val_preds.tolist(), val_true.tolist()
            result_dict[optimizer] = {'val_preds': val_preds, 'val_true': val_true}
        except FileNotFoundError as e:
            pass

    fig = plt.figure(figsize=(3, 20))
    grid = ImageGrid(fig, 111,  # as in plt.subplot(111)
                     nrows_ncols=(len(optimizers), 1),
                     axes_pad=0.5,
                     share_all=True)
    counter = 0
    statistics = {}
    for ax, (optimizer, results) in zip(grid, result_dict.items()):
        if counter == 0:
            ax.set_xlabel('Predicted')
            ax.set_ylabel('True')
            counter += 1
        ax.scatter(results['val_preds'], results['val_true'], s=1, alpha=0.15)
        kendall = stats.kendalltau(np.round(np.array(results['val_preds']), decimals=1), results['val_true'])

        statistics[optimizer] = {
            **evaluate_metrics(results['val_true'], results['val_preds'], prediction_is_first_arg=False)
        }
        ax.set_xlim(data_min, data_max)
        ax.set_ylim(data_min, data_max)
        ax.plot([data_min, data_max], [data_min, data_max], c='r', alpha=0.3)
        ax.grid(True, which="both", ls="-", alpha=0.1)
        ax.set_title('{} - K:{}'.format(optimizer.upper(), "{:.3f}".format(kendall.correlation)))

    plt.savefig(os.path.join(analysis_directory, 'loo_analysis.png'), dpi=600)

    # Dump statistics
    json.dump(statistics, open(os.path.join(analysis_directory, 'statistics.json'), 'w'))

    # Dump the results from the analysis
    json.dump(result_dict, open(os.path.join(analysis_directory, 'analysis_results.json'), 'w'))
コード例 #8
0
ファイル: sklearn_forest.py プロジェクト: LMZimmer/temp
    def validate(self):
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)
        val_pred, var_val = self.model.predict(X_val), None
        
        valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False)

        logging.info('validation metrics %s', valid_metrics)

        return valid_metrics
コード例 #9
0
ファイル: sklearn_forest.py プロジェクト: LMZimmer/temp
    def test(self):
        X_test, y_test, _ = self.load_results_from_result_paths(self.test_paths)
        test_pred, var_test = self.model.predict(X_test), None

        fig = utils.scatter_plot(np.array(test_pred), np.array(y_test), xlabel='Predicted', ylabel='True', title='')
        fig.savefig(os.path.join(self.log_dir, 'pred_vs_true_test.jpg'))
        plt.close()

        test_metrics = utils.evaluate_metrics(y_test, test_pred, prediction_is_first_arg=False)

        logging.info('test metrics %s', test_metrics)

        return test_metrics
コード例 #10
0
def validate_and_test_surrogate_model(model_log_dir):
    # Instantiate surrogate model
    surrogate_model = load_surrogate_model(model_log_dir)
    flatten = lambda l: [item for sublist in l for item in sublist]
    aposteriori_analysis = {}
    for split in ['val', 'test']:
        paths = flatten([
            json.load(open(val_opt)) for val_opt in glob.glob(
                os.path.join(model_log_dir, '*_{}_paths.json'.format(split)))
        ])
        _, preds, true = surrogate_model.evaluate(paths)
        aposteriori_analysis[split] = utils.evaluate_metrics(
            true, preds, prediction_is_first_arg=False)
    print(aposteriori_analysis)
コード例 #11
0
    def train(self):
        X_train, y_train, _ = self.load_results_from_result_paths(self.train_paths)
        X_val, y_val, _ = self.load_results_from_result_paths(self.val_paths)

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dval = xgb.DMatrix(X_val, label=y_val)

        param_config = self.parse_param_config()
        param_config["seed"] = self.seed

        self.model = xgb.train(param_config, dtrain, num_boost_round=self.model_config["param:num_rounds"],
                               early_stopping_rounds=self.model_config["early_stopping_rounds"],
                               verbose_eval=1,
                               evals=[(dval, 'val')])

        train_pred, var_train = self.model.predict(dtrain), None
        val_pred, var_val = self.model.predict(dval), None

        # self.save()

        fig_train = utils.scatter_plot(np.array(train_pred), np.array(y_train), xlabel='Predicted', ylabel='True',
                                       title='')
        fig_train.savefig(os.path.join(self.log_dir, 'pred_vs_true_train.jpg'))
        plt.close()

        fig_val = utils.scatter_plot(np.array(val_pred), np.array(y_val), xlabel='Predicted', ylabel='True', title='')
        fig_val.savefig(os.path.join(self.log_dir, 'pred_vs_true_val.jpg'))
        plt.close()

        train_metrics = utils.evaluate_metrics(y_train, train_pred, prediction_is_first_arg=False)
        valid_metrics = utils.evaluate_metrics(y_val, val_pred, prediction_is_first_arg=False)

        logging.info('train metrics: %s', train_metrics)
        logging.info('valid metrics: %s', valid_metrics)

        return valid_metrics
コード例 #12
0
ファイル: bananas.py プロジェクト: LMZimmer/temp
    def evaluate(self, result_paths):
        # Get evaluation data
        eval_queue = self.load_results_from_result_paths(result_paths)

        preds = []
        targets = []
        self.model.eval()
        for step, (arch_path_enc, y_true) in enumerate(eval_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()
            pred = self.model(arch_path_enc)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())

        test_metrics = utils.evaluate_metrics(np.array(targets),
                                              np.array(preds),
                                              prediction_is_first_arg=False)
        return test_metrics, preds, targets
コード例 #13
0
    def evaluate(self, result_paths):
        # Get evaluation data
        eval_queue = self.load_results_from_result_paths(result_paths)

        preds = []
        targets = []
        self.model.eval()
        for step, graph_batch in enumerate(eval_queue):
            graph_batch = graph_batch.to(self.device)

            pred = self.model(graph_batch=graph_batch)
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())

        test_metrics = utils.evaluate_metrics(np.array(targets),
                                              np.array(preds),
                                              prediction_is_first_arg=False)
        return test_metrics, np.array(preds), np.array(targets)
コード例 #14
0
ファイル: bananas.py プロジェクト: LMZimmer/temp
    def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr,
              epoch):
        objs = utils.AvgrageMeter()

        # VALIDATION
        preds = []
        targets = []

        for step, (arch_path_enc, y_true) in enumerate(valid_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()
            pred = self.model(arch_path_enc)
            loss = torch.mean(
                torch.abs((self.normalize_data(pred) /
                           self.normalize_data(y_true / 100)) - 1))
            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())
            objs.update(loss.data.item(), len(arch_path_enc))

            if step % self.data_config['report_freq'] == 0:
                logging.info('valid %03d %e ', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_valid_{}.jpg'.format(epoch)))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets),
                                             np.array(preds),
                                             prediction_is_first_arg=False)

        return objs.avg, val_results
コード例 #15
0
    def evaluate_ensemble(self, result_paths, apply_noise):
        """Evaluates the metrics on the result paths using ensemble predicitons"""
        preds, targets = [], []

        # Collect individuals predictions
        for member_model in self.ensemble_members:
            member_metrics, member_preds, member_targets = member_model.evaluate(
                result_paths)
            logging.info("==> Eval member metrics: %s", member_metrics)
            if len(targets) == 0:
                preds.append(member_preds)
                targets = member_targets
                continue
            if np.any((targets - member_targets) > 1e-5):
                raise ValueError("Ensemble members have different targets!")
            preds.append(member_preds)

        means = np.mean(preds, axis=0)
        stddevs = np.std(preds, axis=0)

        # Apply noise
        if apply_noise:
            noisy_predictions = [
                np.random.normal(loc=mean, scale=stddev, size=1)[0]
                for mean, stddev in zip(means, stddevs)
            ]
            ensemble_predictions = noisy_predictions
        else:
            ensemble_predictions = means

        # Evaluate metrics
        metrics = utils.evaluate_metrics(targets,
                                         ensemble_predictions,
                                         prediction_is_first_arg=False)

        return metrics, ensemble_predictions, stddevs, targets
コード例 #16
0
    def infer(self, train_queue, valid_queue, model, criterion, optimizer, lr,
              epoch):
        objs = utils.AvgrageMeter()

        # VALIDATION
        preds = []
        targets = []

        model.eval()
        for step, graph_batch in enumerate(valid_queue):
            graph_batch = graph_batch.to(self.device)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)
                criterion = torch.nn.BCELoss()
                criterion_2 = torch.nn.MSELoss()

                bins = self.create_bins(lower_bound=0, width=10, quantity=9)
                binned_weights = []
                for value in graph_batch.y.cpu().numpy():
                    bin_index = self.find_bin(value, bins)
                    binned_weights.append(bin_index)
                bins = torch.FloatTensor(binned_weights)
                make_one_hot = lambda index: torch.eye(self.model_config[
                    'no_bins'])[index.view(-1).long()]
                binns_one_hot = make_one_hot(bins).to(self.device)

                loss_1 = criterion(pred_bins, binns_one_hot)
                loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y))
                alpha = self.model_config['classification_loss']
                beta = self.model_config['regression_loss']

                loss = alpha * loss_1 + beta * loss_2
            else:
                pred = self.model(graph_batch=graph_batch)
                loss = criterion(self.normalize_data(pred),
                                 self.normalize_data(graph_batch.y / 100))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())
            n = graph_batch.num_graphs
            objs.update(loss.data.item(), n)

            if step % self.data_config['report_freq'] == 0:
                logging.info('valid %03d %e ', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_valid_{}.jpg'.format(epoch)))
        plt.close()

        val_results = utils.evaluate_metrics(np.array(targets),
                                             np.array(preds),
                                             prediction_is_first_arg=False)

        return objs.avg, val_results
コード例 #17
0
    def train_epoch(self, train_queue, valid_queue, model, criterion,
                    optimizer, lr, epoch):
        objs = utils.AvgrageMeter()

        # TRAINING
        preds = []
        targets = []

        model.train()

        for step, graph_batch in enumerate(train_queue):
            graph_batch = graph_batch.to(self.device)
            #             print(step)

            if self.model_config['model'] == 'gnn_vs_gae_classifier':
                pred_bins, pred = self.model(graph_batch=graph_batch)
                criterion = torch.nn.BCELoss()
                criterion_2 = torch.nn.MSELoss()

                bins = self.create_bins(lower_bound=0, width=10, quantity=9)
                binned_weights = []
                for value in graph_batch.y.cpu().numpy():
                    bin_index = self.find_bin(value, bins)
                    binned_weights.append(bin_index)
                bins = torch.FloatTensor(binned_weights)
                make_one_hot = lambda index: torch.eye(self.model_config[
                    'no_bins'])[index.view(-1).long()]
                binns_one_hot = make_one_hot(bins).to(self.device)
                loss_1 = criterion(pred_bins, binns_one_hot)
                loss_2 = criterion_2(pred, self.normalize_data(graph_batch.y))
                alpha = self.model_config['classification_loss']
                beta = self.model_config['regression_loss']

                loss = alpha * loss_1 + beta * loss_2

            else:
                pred = self.model(graph_batch=graph_batch)
                if self.model_config['loss:loss_log_transform']:
                    loss = criterion(self.normalize_data(pred),
                                     self.normalize_data(graph_batch.y / 100))
                else:
                    loss = criterion(pred, graph_batch.y / 100)
                if self.model_config['loss:pairwise_ranking_loss']:
                    m = 0.1
                    '''
                    y = list(map(lambda y_i: 1 if y_i == True else -1, graph_batch.y[0: -1] > graph_batch.y[1:]))
                    pairwise_ranking_loss = torch.nn.HingeEmbeddingLoss(margin=m)(pred[0:-1] - pred[1:],
                                                                                  target=torch.from_numpy(np.array(y)))
                    '''
                    pairwise_ranking_loss = []
                    sort_idx = torch.argsort(graph_batch.y, descending=True)
                    for idx, idx_y_i in enumerate(sort_idx):
                        for idx_y_i_p1 in sort_idx[idx + 1:]:
                            pairwise_ranking_loss.append(
                                torch.max(
                                    torch.tensor(0.0, dtype=torch.float),
                                    m - (pred[idx_y_i] - pred[idx_y_i_p1])))
                    pairwise_ranking_loss = torch.mean(
                        torch.stack(pairwise_ranking_loss))

                    loss += pairwise_ranking_loss
                    if step % self.data_config['report_freq'] == 0:
                        logging.info('Pairwise ranking loss {}'.format(
                            pairwise_ranking_loss))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(graph_batch.y.detach().cpu().numpy())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            n = graph_batch.num_graphs
            objs.update(loss.data.item(), n)

            if step % self.data_config['report_freq'] == 0:
                logging.info('train %03d %e', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_train_{}.jpg'.format(epoch)))
        plt.close()
        train_results = utils.evaluate_metrics(np.array(targets),
                                               np.array(preds),
                                               prediction_is_first_arg=False)

        return objs.avg, train_results
コード例 #18
0
ファイル: bananas.py プロジェクト: LMZimmer/temp
    def train_epoch(self, train_queue, valid_queue, model, criterion,
                    optimizer, lr, epoch):
        objs = utils.AvgrageMeter()

        # TRAINING
        preds = []
        targets = []

        model.train()

        for step, (arch_path_enc, y_true) in enumerate(train_queue):
            arch_path_enc = arch_path_enc.to(self.device).float()
            y_true = y_true.to(self.device).float()

            pred = self.model(arch_path_enc)
            if self.model_config['loss:loss_log_transform']:
                loss = torch.mean(
                    torch.abs((self.normalize_data(pred) /
                               self.normalize_data(y_true / 100)) - 1))
            else:
                loss = criterion(1 - pred, 1 - y_true / 100)
            if self.model_config['loss:pairwise_ranking_loss']:
                m = 0.1
                pairwise_ranking_loss = []
                sort_idx = torch.argsort(y_true, descending=True)
                for idx, idx_y_i in enumerate(sort_idx):
                    for idx_y_i_p1 in sort_idx[idx + 1:]:
                        pairwise_ranking_loss.append(
                            torch.max(torch.tensor(0.0, dtype=torch.float),
                                      m - (pred[idx_y_i] - pred[idx_y_i_p1])))
                pairwise_ranking_loss = torch.mean(
                    torch.stack(pairwise_ranking_loss))

                loss += pairwise_ranking_loss
                if step % self.data_config['report_freq'] == 0:
                    logging.info('Pairwise ranking loss {}'.format(
                        pairwise_ranking_loss))

            preds.extend(pred.detach().cpu().numpy() * 100)
            targets.extend(y_true.detach().cpu().numpy())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            objs.update(loss.data.item(), len(arch_path_enc))

            if step % self.data_config['report_freq'] == 0:
                logging.info('train %03d %e', step, objs.avg)

        fig = utils.scatter_plot(np.array(preds),
                                 np.array(targets),
                                 xlabel='Predicted',
                                 ylabel='True',
                                 title='')
        fig.savefig(
            os.path.join(self.log_dir,
                         'pred_vs_true_train_{}.jpg'.format(epoch)))
        plt.close()
        train_results = utils.evaluate_metrics(np.array(targets),
                                               np.array(preds),
                                               prediction_is_first_arg=False)

        return objs.avg, train_results
コード例 #19
0
def analyze_cell_topology():
    config_loader = ConfigLoader('configspace.json')

    # Load groundtruth data
    cell_depths = [
        compute_depth_of_cell_simple_path(config_loader[gt][0])[0]
        for gt in glob.glob(
            '/home/user/projects/nasbench_201_2/analysis/nb_301_cell_topology/cell_topology_analysis/results_*.json'
        )
    ]

    gt_data_paths = [
        gt for gt in glob.glob(
            '/home/user/projects/nasbench_201_2/analysis/nb_301_cell_topology/cell_topology_analysis/results_*.json'
        )
    ]

    surrogate_models = {
        'GIN':
        load_surrogate_model(
            '/home/user/projects/nasbench_201_2/experiments/surrogate_models/gnn_gin/20200919-135631-6'
        ),
        'LGB':
        load_surrogate_model(
            '/home/user/projects/nasbench_201_2/experiments/surrogate_models/lgb/20200919-135720-6'
        ),
        'XGB':
        load_surrogate_model(
            '/home/user/projects/nasbench_201_2/experiments/surrogate_models/xgb/20200919-135720-6'
        )
    }

    surrogate_model_results = {'cell_depth': cell_depths}

    for surrogate_model_name, surrogate_model in surrogate_models.items():
        test_metrics, preds, targets = surrogate_model.evaluate(gt_data_paths)
        surrogate_model_results[surrogate_model_name + '_preds'] = preds
        surrogate_model_results[surrogate_model_name + '_targets'] = targets

    fig, ax_left = plt.subplots(figsize=(4, 3))
    ax_left.set_ylabel('sKendall Tau')

    for surrogate_model in surrogate_models:
        idx = 0
        for cell_depth, group in pd.DataFrame(surrogate_model_results).groupby(
                'cell_depth'):
            preds, targets = group['{}_preds'.format(surrogate_model)], group[
                '{}_targets'.format(surrogate_model)]
            metrics = evaluate_metrics(targets,
                                       preds,
                                       prediction_is_first_arg=False)
            print(surrogate_model, cell_depth, metrics)
            ax_left.scatter(
                cell_depth,
                metrics['kendall_tau_1_dec'],
                marker=surrogate_model_markers[surrogate_model.upper()],
                label=surrogate_model.upper() if idx == 0 else None,
                c=surrogate_model_colors[surrogate_model.upper()])
            idx += 1

    plt.legend()
    plt.grid(True, which="both", ls="-", alpha=0.5)
    '''
    ax_right = ax_left.twinx()  # instantiate a second axes that shares the same x-axis
    ax_right.set_ylabel('R² (x)')

    for surrogate_model in surrogate_models:
        idx = 0
        for cell_depth, group in pd.DataFrame(surrogate_model_results).groupby('cell_depth'):
            preds, targets = group['{}_preds'.format(surrogate_model)], group['{}_targets'.format(surrogate_model)]
            metrics = evaluate_metrics(targets, preds, prediction_is_first_arg=False)
            ax_right.scatter(cell_depth, metrics['r2'], marker='x',
                             c=surrogate_model_colors[surrogate_model.upper()])
            idx += 1
    '''
    ax_left.set_xlabel('Cell Depth')
    plt.tight_layout()
    plt.savefig('surrogate_models/analysis/cell_topology_analysis.pdf')