Beispiel #1
0
cv_score_mat = pd.pivot_table(cv_results,
                              values='mean_test_score',
                              index='classify__l1_ratio',
                              columns='classify__alpha')
ax = sns.heatmap(cv_score_mat, annot=True, fmt='.1%')
ax.set_xlabel('Regularization strength multiplier (alpha)')
ax.set_ylabel('Elastic net mixing parameter (l1_ratio)')
plt.tight_layout()
plt.savefig(cv_heatmap_file, dpi=600, bbox_inches='tight')
plt.close()

# Get predictions
y_predict_train = cv_pipeline.decision_function(x_train)
y_predict_test = cv_pipeline.decision_function(x_test)
metrics_train = get_threshold_metrics(y_train,
                                      y_predict_train,
                                      drop_intermediate=keep_inter)
metrics_test = get_threshold_metrics(y_test,
                                     y_predict_test,
                                     drop_intermediate=keep_inter)

# Rerun "cross validation" for the best hyperparameter set to define
# cross-validation disease-specific performance. Each sample prediction is
# based on the fold that the sample was in the testing partition
y_cv = cross_val_predict(cv_pipeline.best_estimator_,
                         X=x_train,
                         y=y_train,
                         cv=folds,
                         method='decision_function')
metrics_cv = get_threshold_metrics(y_train, y_cv, drop_intermediate=keep_inter)
Beispiel #2
0
                        "seed: {}".format(gene_name, alg, signal, z_dim, seed)
                    )

                    # Fit the model
                    cv_pipeline, y_pred_train_df, y_pred_test_df, y_cv_df = train_model(
                        x_train=x_train_df,
                        x_test=x_test_df,
                        y_train=y_train_df,
                        alphas=cfg.alphas,
                        l1_ratios=cfg.l1_ratios,
                        n_folds=cfg.folds,
                        max_iter=cfg.max_iter,
                    )
                    # Get metric predictions
                    y_train_results = get_threshold_metrics(
                        y_train_df.status, y_pred_train_df, drop=False
                    )
                    y_test_results = get_threshold_metrics(
                        y_test_df.status, y_pred_test_df, drop=False
                    )
                    y_cv_results = get_threshold_metrics(
                        y_train_df.status, y_cv_df, drop=False
                    )

                    # Get coefficients
                    coef_df = extract_coefficients(
                        cv_pipeline=cv_pipeline,
                        feature_names=x_train_df.columns,
                        signal=signal,
                        z_dim=z_dim,
                        seed=seed,
Beispiel #3
0
                                 X_test,
                                 y_train,
                                 sklearn_param_choices['alpha'],
                                 sklearn_param_choices['l1_ratio'],
                                 seed=args.seed)

    y_pred_train, y_pred_test, y_pred_bn_train, y_pred_bn_test = y_pred

    sk_train_acc = sum([
        1 for i in range(len(y_pred_train)) if y_pred_bn_train[i] == y_train[i]
    ]) / len(y_pred_train)
    sk_test_acc = sum([
        1 for i in range(len(y_pred_test)) if y_pred_bn_test[i] == y_test[i]
    ]) / len(y_pred_test)

    sk_train_results = get_threshold_metrics(y_train, y_pred_train)
    sk_test_results = get_threshold_metrics(y_test, y_pred_test)

    losses, preds, preds_bn = model.train_torch_model(X_train,
                                                      X_test,
                                                      y_train,
                                                      y_test,
                                                      save_weights=True)

    y_pred_train, y_pred_test = preds
    y_pred_bn_train, y_pred_bn_test = preds_bn

    torch_train_acc = TorchLR.calculate_accuracy(y_train,
                                                 y_pred_bn_train.flatten())
    torch_test_acc = TorchLR.calculate_accuracy(y_test,
                                                y_pred_bn_test.flatten())
Beispiel #4
0
            t_coef_df = s_coef_df.copy()
            t_coef_df['weight'] = torch_weights

            if sklearn_coef_df is None:
                sklearn_coef_df = s_coef_df
            else:
                sklearn_coef_df = pd.concat((sklearn_coef_df, s_coef_df))

            if torch_coef_df is None:
                torch_coef_df = t_coef_df
            else:
                torch_coef_df = pd.concat((torch_coef_df, t_coef_df))

            torch_train_results = get_threshold_metrics(y_subtrain,
                                                        torch_pred_train,
                                                        drop=False)
            torch_tune_results = get_threshold_metrics(y_tune,
                                                       torch_pred_tune,
                                                       drop=False)

            sklearn_train_results = get_threshold_metrics(y_subtrain,
                                                          sklearn_pred_train,
                                                          drop=False)
            sklearn_tune_results = get_threshold_metrics(y_tune,
                                                         sklearn_pred_tune,
                                                         drop=False)

            cv_results['torch_train_auroc'].append(
                torch_train_results['auroc'])
            cv_results['torch_train_aupr'].append(torch_train_results['aupr'])