Example #1
0
    def guppy(self, data):

        fwidth = []
        swidth = []
        rwb = []
        sband_min = []
        sband_max = []

        for index, row in data.iterrows():
            fast = [
                row['ema3'], row['ema5'], row['ema7'], row['ema10'],
                row['ema12'], row['ema15']
            ]
            slow = [
                row['ema30'], row['ema35'], row['ema40'], row['ema45'],
                row['ema50'], row['ema60']
            ]
            fmin, fmax = utils.minmax(fast)
            smin, smax = utils.minmax(slow)
            sband_min.append(smin)
            sband_max.append(smax)

            if row['ema3'] > row['ema15']:
                fwidth.append(fmax - fmin)
            else:
                fwidth.append(fmin - fmax)

            if row['ema30'] > row['ema60']:
                swidth.append(smax - smin)
            else:
                swidth.append(smin - smax)

            if fmin > smax:
                rwb.append(fmin - smax)
            elif smin > fmax:
                rwb.append(fmax - smin)
            else:
                rwb.append(0.0)

        data['fwidth'] = fwidth
        data['fwidth_pb'] = utils.positive_bars(data['fwidth'])
        data['fwidth_roc'] = utils.roc(fwidth)
        data['fwidth_roc_pb'] = utils.positive_bars(data['fwidth_roc'])
        data['fwidth_ranking'] = utils.relative_rank(data['fwidth'])
        data['swidth'] = swidth
        data['sband_min'] = sband_min
        data['sband_max'] = sband_max
        data['swidth_pb'] = utils.positive_bars(data['swidth'])
        data['swidth_roc'] = utils.roc(swidth)
        data['swidth_roc_pb'] = utils.positive_bars(data['swidth_roc'])
        data['swidth_ranking'] = utils.relative_rank(data['swidth'])
        data['rwb'] = rwb
        data['rwb_pb'] = utils.positive_bars(data['rwb'])
        data['rwb_roc'] = utils.roc(rwb)
        data['rwb_roc_pb'] = utils.positive_bars(data['rwb_roc'])
        data['rwb_ranking'] = utils.relative_rank(data['rwb'])
Example #2
0
    def macd(self, data):
        np_closes = np.array(data['close'], dtype=float)
        macd, macd_sig, macd_hist = talib.MACD(np_closes)
        data['macd'] = macd.tolist()
        data['macd_sig'] = macd_sig.tolist()
        data['macd_hist'] = macd_hist.tolist()

        data['macd_roc'] = utils.roc(data['macd'])
        data['macd_roc_pb'] = utils.positive_bars(data['macd_roc'])
        data['macd_sig_roc'] = utils.roc(data['macd_sig'])
        data['macd_sig_roc_pb'] = utils.positive_bars(data['macd_sig_roc'])
        data['macd_hist_roc'] = utils.roc(data['macd_hist'])
        data['macd_hist_pb'] = utils.positive_bars(data['macd_hist'])
        data['macd_hist_roc_pb'] = utils.positive_bars(data['macd_hist_roc'])
Example #3
0
 def stochastic(self, data):
     np_close = np.array(data['close'], dtype=float)
     np_high = np.array(data['high'], dtype=float)
     np_low = np.array(data['low'], dtype=float)
     slowk, slowd = talib.STOCH(np_high,
                                np_low,
                                np_close,
                                fastk_period=14,
                                slowk_period=3)
     data['slowk'] = slowk
     data['slowd'] = slowd
     data['slowk_roc'] = utils.roc(data['slowk'])
     data['slowd_roc'] = utils.roc(data['slowd'])
     data['diff_slow_kd'] = slowk - slowd
     data['diff_slow_kd_pb'] = utils.positive_bars(data['diff_slow_kd'])
Example #4
0
 def ma(self, data):
     np_closes = np.array(data['close'], dtype=float)
     for period in ma_periods:
         data['ema' + str(period)] = talib.EMA(np_closes,
                                               timeperiod=period).tolist()
         data['ema' + str(period) + '_roc'] = utils.roc(data['ema' +
                                                             str(period)])
def random_roc():
    # random data should give 0.5 eer
    # and random roc curve
    pos = np.random.random(1000)
    neg = np.random.random(1000)

    # compare eer versions
    print(utils.eer(pos, neg))

    # plot curve
    fars, frrs = utils.roc(pos, neg)
    plt.plot(fars, frrs)
    plt.show()
def separable_roc():
    # separable data should give low
    # eer and convex roc curve
    pos = np.random.normal(1, 0.5, 1000)
    neg = np.random.normal(0, 0.5, 1000)

    # compare eer versions
    print(utils.eer(pos, neg))

    # plot curve
    fars, frrs = utils.roc(pos, neg)
    plt.plot(fars, frrs)
    plt.show()
Example #7
0
def evaluate_model(dataset, save_file, random_state, pipeline_components,
                   pipeline_parameters, n_combos, label):

    features, labels, feature_names = read_file(dataset, label)
    # pipelines = [dict(zip(pipeline_parameters.keys(), list(parameter_combination)))
    #              for parameter_combination in itertools.product(*pipeline_parameters.values())]

    # Create a temporary folder to store the transformers of the pipeline
    cachedir = mkdtemp()
    memory = Memory(cachedir=cachedir, verbose=0)

    # print ( pipeline_components)
    # print(pipeline_parameters)
    with warnings.catch_warnings():
        # Squash warning messages. Turn this off when debugging!
        warnings.simplefilter('ignore')
        cv = StratifiedKFold(n_splits=10,
                             shuffle=True,
                             random_state=random_state)
        hyperparameters = {}
        for k, v in pipeline_parameters.items():
            for param, pvals in v.items():
                hyperparameters.update({k + '__' + param: pvals})
        pipeline = Pipeline(pipeline_components, memory=memory)

        # run Randomized Search CV to tune the hyperparameter settings
        est = RandomizedSearchCV(estimator=pipeline,
                                 param_distributions=hyperparameters,
                                 n_iter=n_combos,
                                 cv=cv,
                                 random_state=random_state,
                                 refit=True,
                                 error_score=0.0)
        est.fit(features, labels)
        best_est = est.best_estimator_
        # generate cross-validated predictions for each data point using the best estimator
        cv_predictions = cross_val_predict(estimator=best_est,
                                           X=features,
                                           y=labels,
                                           cv=cv)

        # get cv probabilities
        skip = False
        if getattr(best_est, "predict_proba", None):
            method = "predict_proba"
        elif getattr(best_est, "decision_function", None):
            method = "decision_function"
        else:
            skip = True

        if not skip:
            cv_probabilities = cross_val_predict(estimator=best_est,
                                                 X=features,
                                                 y=labels,
                                                 method=method,
                                                 cv=cv)
            if method == "predict_proba":
                cv_probabilities = cv_probabilities[:, 1]

        accuracy = accuracy_score(labels, cv_predictions)
        macro_f1 = f1_score(labels, cv_predictions, average='macro')
        balanced_accuracy = balanced_accuracy_score(labels, cv_predictions)
        try:
            roc_auc = roc_auc_score(labels, cv_probabilities)
        except ValueError as ve:
            print("roc_auc_score: %s" % (str(ve)))
            roc_auc = -1

        preprocessor_classes = [p[0] for p in pipeline_components[:-1]]

        preprocessor_param_string = 'default'
        for preprocessor_class in preprocessor_classes:
            if preprocessor_class in pipeline_parameters.keys():
                preprocessor_param_string = ','.join([
                    '{}={}'.format(
                        parameter,
                        '|'.join([x.strip() for x in str(value).split(',')]))
                    for parameter, value in
                    pipeline_parameters[preprocessor_class].items()
                ])

        classifier_class = pipeline_components[-1][0]
        param_string = ','.join(
            ['{}={}'.format(p, v) for p, v in est.best_params_.items()])
        # for parameter, value in pipeline_parameters[classifier_class].items()])

        out_text = '\t'.join([
            dataset.split('/')[-1].split('.')[0],
            ','.join(preprocessor_classes), preprocessor_param_string,
            classifier_class, param_string,
            str(random_state),
            str(accuracy),
            str(macro_f1),
            str(balanced_accuracy),
            str(roc_auc)
        ])
        print(out_text)
        with open(save_file, 'a') as out:
            out.write(out_text + '\n')
        sys.stdout.flush()

        # write feature importances
        est_name = classifier_class
        feature_importance(save_file, best_est, est_name, feature_names,
                           features, labels, random_state,
                           ','.join(preprocessor_classes),
                           preprocessor_param_string, classifier_class,
                           param_string)
        # write roc curves
        if not skip:
            roc(save_file, best_est, labels, cv_probabilities, random_state,
                ','.join(preprocessor_classes), preprocessor_param_string,
                classifier_class, param_string)
    # Delete the temporary cache before exiting
    rmtree(cachedir)
Example #8
0
    def prediction_roc(self, groups, output_file_path, labels, test_size=0.2, c=.2, verbose=False, save=True,
                       iterations=100, plot=True, kind='test', method='AUC',
                       band=False, fig=None, ax=None, color=None,
                       test_function=get_training_testing, p='', penalty='l1'):
        """

        :param groups: list containing groups of ROC features, for example [[H3K4me3 columns], [H3K27ac columns]]
        :param output_file_path: output file path and prefix
        :param labels: labels of each group
        :param test_size:
        :param c:
        :param verbose:
        :param save:
        :param iterations:
        :param plot:
        :param kind:
        :param method:
        :param band:
        :param fig:
        :param ax:
        :param color:
        :param test_function:
        :param p:
        :param penalty:
        :return:
        """
        train_df = label_label(self.training_table, self.gene_meta_df)

        all_auc_train = {}
        all_tpr_train = {}

        all_auc_test = {}
        all_tpr_test = {}

        scores = defaultdict(float)

        mean_fpr = np.linspace(0, 1, 101)

        for r in range(iterations):
            x_train, x_test, y_train, y_test = test_function(train_df, random_state=r, test_size=test_size)

            y_trues_train = None
            y_scores_train = None

            y_trues_test = None
            y_scores_test = None

            first = True

            for i in range(len(groups)):
                cur_column = groups[i]
                cur_x_train = x_train.ix[:, cur_column]
                cur_x_test = x_test.ix[:, cur_column]

                if len(cur_x_train.shape) == 1:
                    cur_x_train = cur_x_train.to_frame()
                    cur_x_test = cur_x_test.to_frame()

                cur_predictor = predict_logisticregression(cur_x_train, y_train, penalty=penalty, c=c)

                if method == 'score':
                    cur_score = score(cur_x_test, y_test, cur_predictor)
                    scores[labels[i]] += cur_score

                cur_y_score_train = predict_decision(cur_predictor, cur_x_train, False)
                cur_y_score_test = predict_decision(cur_predictor, cur_x_test, False)
                if first:
                    y_trues_train = y_train
                    y_trues_test = y_test
                    y_scores_train = cur_y_score_train.values
                    y_scores_test = cur_y_score_test.values
                    first = False
                else:
                    y_trues_train = np.concatenate((y_trues_train, y_train), axis=1)
                    y_trues_test = np.concatenate((y_trues_test, y_test), axis=1)
                    y_scores_train = np.concatenate((y_scores_train, cur_y_score_train.values), axis=1)
                    y_scores_test = np.concatenate((y_scores_test, cur_y_score_test.values), axis=1)

            auc_train, fpr_train, tpr_train = roc(y_trues_train, y_scores_train, labels)
            auc_test, fpr_test, tpr_test = roc(y_trues_test, y_scores_test, labels)

            for label in labels:
                if kind == 'train' and band:
                    plt.plot(fpr_train[label], tpr_train[label], lw=0.4, alpha=0.1, color='grey')
                elif kind == 'test' and band:
                    plt.plot(fpr_test[label], tpr_test[label], lw=0.4, alpha=0.1, color='grey')
                tpr_train[label] = interp(mean_fpr, fpr_train[label], tpr_train[label])
                tpr_test[label] = interp(mean_fpr, fpr_test[label], tpr_test[label])

            for l in range(len(labels)):
                label = labels[l]
                if label not in all_auc_train:
                    all_auc_train[label] = auc_train[label]
                    all_auc_test[label] = auc_test[label]
                    all_tpr_train[label] = [tpr_train[label]]
                    all_tpr_test[label] = [tpr_test[label]]
                else:
                    all_auc_train[label] += auc_train[label]
                    all_auc_test[label] += auc_test[label]
                    all_tpr_train[label].append(tpr_train[label])
                    all_tpr_test[label].append(tpr_test[label])
        for label in labels:
            all_auc_train[label] /= iterations
            all_auc_test[label] /= iterations

            if method == 'score':
                scores[label] /= iterations

        if plot:
            if kind == 'train':
                roc_plot(all_auc_train, mean_fpr, all_tpr_train, len(labels), output_file_path,
                         labels, verbose=verbose, save=save, band=band, fig=fig, ax=ax, color=color, p=p)
            elif kind == 'test':
                roc_plot(all_auc_test, mean_fpr, all_tpr_test, len(labels), output_file_path,
                         labels, verbose=verbose, save=save, band=band, fig=fig, ax=ax, color=color, p=p)

        if method == 'AUC':
            results_train_df = pd.DataFrame.from_dict(all_auc_train, orient='index')
            results_train_df.columns = ['train']
            results_test_df = pd.DataFrame.from_dict(all_auc_test, orient='index')
            results_test_df.columns = ['test']

            result_df = results_train_df.join(results_test_df)
            return result_df
        elif method == 'score':
            result_df = pd.DataFrame.from_dict(scores, orient='index')
            result_df.columns = ['accuracy']
        else:
            result_df = None

        if kind == 'test':
            for key in all_tpr_test.keys():
                all_tpr_test[key] = np.mean(all_tpr_test[key], axis=0)
                # TPRs_test_df = pd.DataFrame.from_dict(all_tpr_test)
                # TPRs_test_df.to_csv(title+'TPR.csv')
        return result_df
Example #9
0
            image_batch = test_images[idx*config.BATCH_SIZE:(idx+1)*config.BATCH_SIZE]
            label_batch = test_labels[idx*config.BATCH_SIZE:(idx+1)*config.BATCH_SIZE]

            latent_loss, latent_gen_loss = sess.run([model.encoded_input, model.encoded_sample], feed_dict={model.image:image_batch})
            latent_error = np.mean(abs(latent_loss-latent_gen_loss), axis=-1)
            latent_error = np.reshape(latent_error, [-1])
            scores_out = np.append(scores_out, latent_error)
            labels_out = np.append(labels_out, label_batch)

            #out_str = "---------->%d/%d" % (config.BATCH_SIZE*idx, config.BATCH_SIZE*test_num_iters)
            #print(out_str, end='\r')

            scores_out = np.array(scores_out)
            labels_out = np.array(labels_out)
            scores_out = (scores_out - scores_out.min())/(scores_out.max()-scores_out.min())
        auc_out = utils.roc(labels_out, scores_out)
        print("AUC: %.4f BEST AUC: %.4f" %(auc_out, best_auc))

        if auc_out > best_auc:
            best_auc = auc_out
        #if True:
            # Create directories if needed
            if not os.path.isdir("%s/%04d"%("best_checkpoints",epoch)):
                os.makedirs("%s/%04d"%("best_checkpoints",epoch))

            print('Saving model with global step %d ( = %d epochs) to disk' % (global_step, epoch))
            saver.save(sess, "%s/%04d/model.ckpt"%("best_checkpoints",epoch))

            # Save latest checkpoint to same file name
            print('Saving model with %d epochs to disk' % (epoch))
            saver.save(sess, "best_checkpoints/model.ckpt")
Example #10
0
			if num_batches * batch_size < n:
				# Computing rest
				rest = n - num_batches * batch_size
				idx = range(n-rest, n)
				x_batch = X[idx]
				out = predict(x_batch)
				preds.append(out)
			# Making metadata
			predictions = np.concatenate(preds, axis = 0)
			acc_eval = utils.accuracy(predictions, y)
			all_accuracy.append(acc_eval)

			auc_eval = utils.auc(predictions, y)
			all_auc.append(auc_eval)

			roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(predictions, y)
			all_roc_fpr.append(roc_eval_fpr)
			all_roc_tpr.append(roc_eval_tpr)
			all_roc_thresholds.append(roc_eval_thresholds)
			if Print:
				print "  validating: %s loss" % subset
				print "  average evaluation accuracy (%s): %.5f" % (subset, acc_eval)
				print "  average evaluation AUC (%s): %.5f" % (subset, auc_eval)
				print
	print "Epoch %d of %d" % (epoch + 1, num_epochs)

	if epoch in learning_rate_schedule:
		lr = np.float32(learning_rate_schedule[epoch])
		print "  setting learning rate to %.7f" % lr
		learning_rate.set_value(lr)
	print "Shuffling data"
Example #11
0
                        help='range to plot y axis')

    flags = parser.parse_args()

    for path in flags.files:
        if path.endswith('.txt'):
            # read comparisons file
            pos = []
            neg = []
            with open(path, 'r') as f:
                for line in f:
                    t, score = line.split()
                    score = float(score)
                    if int(t) == 1:
                        pos.append(score)
                    else:
                        neg.append(score)

            # compute roc
            fars, frrs = utils.roc(pos, neg)

            # plot roc
            plt.plot(fars, frrs, label=path)

    plt.legend(loc='upper right')
    plt.xlabel('FAR')
    plt.ylabel('FRR')
    plt.axis(flags.xrange + flags.yrange)
    plt.grid()
    plt.show()
Example #12
0
            if num_batches * batch_size < n:
                # Computing rest
                rest = n - num_batches * batch_size
                idx = range(n - rest, n)
                x_batch = X[idx]
                out = predict(x_batch)
                preds.append(out)
            # Making metadata
            predictions = np.concatenate(preds, axis=0)
            acc_eval = utils.accuracy(predictions, y)
            all_accuracy.append(acc_eval)

            auc_eval = utils.auc(predictions, y)
            all_auc.append(auc_eval)

            roc_eval_fpr, roc_eval_tpr, roc_eval_thresholds = utils.roc(
                predictions, y)
            all_roc_fpr.append(roc_eval_fpr)
            all_roc_tpr.append(roc_eval_tpr)
            all_roc_thresholds.append(roc_eval_thresholds)
            if Print:
                print "  validating: %s loss" % subset
                print "  average evaluation accuracy (%s): %.5f" % (subset,
                                                                    acc_eval)
                print "  average evaluation AUC (%s): %.5f" % (subset,
                                                               auc_eval)
                print
    print "Epoch %d of %d" % (epoch + 1, num_epochs)

    if epoch in learning_rate_schedule:
        lr = np.float32(learning_rate_schedule[epoch])
        print "  setting learning rate to %.7f" % lr
Example #13
0
    normalize,
    load_model,
    binary_cross_entropy,
    roc,
)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("dataset_test",
                        type=open_datafile,
                        help="dataset to use")
    parser.add_argument("model", help="model to use")
    parser.add_argument("-vi",
                        "--visu",
                        help="Display graphs",
                        action="store_true")

    args = parser.parse_args()
    n = load_model(args.model)
    # test = args.dataset_test.drop(args.dataset_test.columns[0], axis=1)
    test = args.dataset_test[[
        1, 2, 3, 8, 11, 12, 17, 18, 19, 21, 26, 28, 30, 31
    ]]
    test = normalize(test)
    test = np.array(test)
    error, acc = binary_cross_entropy(test, n)
    print(f"Cross Binary Entropy Error = {error:.5f}")
    print(f"Accuracy = {acc:.5f}")
    if args.visu is True:
        roc(test, n)
def evaluate_model(dataset,
                   save_file,
                   random_state,
                   clf,
                   clf_name,
                   hyper_params,
                   longitudinal=False,
                   rare=True):

    print('reading data...', end='')
    features, labels, pt_ids, feature_names, zfile = read_file(
        dataset, longitudinal, rare)
    print('done.', len(labels), 'samples,', np.sum(labels == 1), 'cases,',
          features.shape[1], 'features')
    if 'Feat' in clf_name:
        #set feature names
        clf.feature_names = ','.join(feature_names).encode()
    n_splits = 10
    cv = StratifiedKFold(n_splits=n_splits,
                         shuffle=True,
                         random_state=random_state)

    scoring = make_scorer(balanced_accuracy)

    ###
    # controls matching on age and sex
    ###
    idx_age = np.argmax(feature_names == 'age')
    idx_sex = np.argmax(feature_names == 'SEX')

    #sampler = NearMiss(random_state=random_state, return_indices=True)
    sampler = QuartileExactMatch(quart_locs=[idx_age],
                                 exact_locs=[idx_sex],
                                 random_state=random_state)

    print('sampling data...', end='')
    X, y, sidx = sampler.fit_sample(features, labels)
    print('sampled data contains', np.sum(y == 1), 'cases', np.sum(y == 0),
          'controls')
    ###
    # split into train/test
    ###
    X_train, X_test, y_train, y_test, sidx_train, sidx_test = (
        train_test_split(X,
                         y,
                         sidx,
                         train_size=0.5,
                         test_size=0.5,
                         random_state=random_state))

    # X,y,sidx = sampler.fit_sample(features[train_idx],labels[train_idx])
    if len(hyper_params) > 0:
        param_grid = list(ParameterGrid(hyper_params))
        #clone estimators
        Clfs = [clone(clf).set_params(**p) for p in param_grid]
        # fit with hyperparameter optimization
        cv_scores = np.zeros((len(param_grid), 10))  # cross validated scores
        cv_preds = np.zeros(
            (len(param_grid), len(y_train)))  # cross validated predictions
        cv_probs = np.zeros(
            (len(param_grid), len(y_train)))  # cross validated probabilities
        FI = np.zeros((
            len(param_grid),
            features.shape[1]))  # cross validated, permuted feature importance
        FI_internal = np.zeros(
            (len(param_grid),
             features.shape[1]))  # cross validated feature importance

        ###########
        # this is a manual version of 10-fold cross validation with hyperparameter tuning
        t0 = time.process_time()
        for j, (train_idx, val_idx) in enumerate(cv.split(X_train, y_train)):
            print('fold', j)

            for i, est in enumerate(Clfs):
                print('training', type(est).__name__, i + 1, 'of', len(Clfs))
                if 'Feat' in clf_name:
                    est.logfile = (est.logfile.decode().split('.log')[0] +
                                   '.log.param' + str(i) + '.cv' +
                                   str(j)).encode()
                ##########
                # fit model
                ##########
                if longitudinal:
                    est.fit(X_train[train_idx], y_train[train_idx], zfile,
                            pt_ids[sidx_train[train_idx]])
                else:
                    est.fit(X_train[train_idx], y_train[train_idx])

                ##########
                # get predictions
                ##########
                print('getting validation predictions...')
                if longitudinal:
                    # cv_preds[i,val_idx] = est.predict(X_train[val_idx],
                    #                                    zfile,pt_ids[sidx_train[train_idx]])
                    if getattr(clf, "predict_proba", None):
                        cv_probs[i, val_idx] = est.predict_proba(
                            X_train[val_idx], zfile,
                            pt_ids[sidx_train[train_idx]])[:, 1]
                    elif getattr(clf, "decision_function", None):
                        cv_probs[i, val_idx] = est.decision_function(
                            X_train[val_idx], zfile,
                            pt_ids[sidx_train[train_idx]])
                else:
                    # cv_preds[i,val_idx] = est.predict(X_train[val_idx])
                    if getattr(clf, "predict_proba", None):
                        cv_probs[i, val_idx] = est.predict_proba(
                            X_train[val_idx])[:, 1]
                    elif getattr(clf, "decision_function", None):
                        cv_probs[i, val_idx] = est.decision_function(
                            X_train[val_idx])

                ##########
                # scores
                ##########
                cv_scores[i, j] = roc_auc_score(y_train[val_idx],
                                                cv_probs[i, val_idx])

        runtime = time.process_time() - t0
        ###########

        print('gridsearch finished in', runtime, 'seconds')

        ##########
        # get best model and its information
        mean_cv_scores = [np.mean(s) for s in cv_scores]
        best_clf = Clfs[np.argmax(mean_cv_scores)]
        ##########
    else:
        print('skipping hyperparameter tuning')
        best_clf = clf  # this option is for skipping model tuning
        t0 = time.process_time()

    print('fitting tuned model to all training data...')
    if longitudinal:
        best_clf.fit(X_train, y_train, zfile, pt_ids[sidx_train])
    else:
        best_clf.fit(X_train, y_train)

    if len(hyper_params) == 0:
        runtime = time.process_time() - t0
    # cv_predictions = cv_preds[np.argmax(mean_cv_scores)]
    # cv_probabilities = cv_probs[np.argmax(mean_cv_scores)]
    if not longitudinal:
        # internal feature importances
        cv_FI_int = compute_imp_score(best_clf,
                                      clf_name,
                                      X_train,
                                      y_train,
                                      random_state,
                                      perm=False)
        # cv_FI_int = FI_internal[np.argmax(mean_cv_scores)]
        # permutation importances
        FI = compute_imp_score(best_clf,
                               clf_name,
                               X_test,
                               y_test,
                               random_state,
                               perm=True)

    ##########
    # metrics: test the best classifier on the held-out test set
    print('getting test predictions...')
    if longitudinal:

        print('best_clf.predict(X_test, zfile, pt_ids[sidx_test])')
        test_predictions = best_clf.predict(X_test, zfile, pt_ids[sidx_test])
        if getattr(clf, "predict_proba", None):
            print('best_clf.predict_proba(X_test, zfile, pt_ids[sidx_test])')
            test_probabilities = best_clf.predict_proba(
                X_test, zfile, pt_ids[sidx_test])[:, 1]
        elif getattr(clf, "decision_function", None):
            test_probabilities = best_clf.decision_function(
                X_test, zfile, pt_ids[sidx_test])
    else:
        test_predictions = best_clf.predict(X_test)
        if getattr(clf, "predict_proba", None):
            test_probabilities = best_clf.predict_proba(X_test)[:, 1]
        elif getattr(clf, "decision_function", None):
            test_probabilities = best_clf.decision_function(X_test)

    # # write cv_pred and cv_prob to file
    # df = pd.DataFrame({'cv_prediction':cv_predictions,'cv_probability':cv_probabilities,
    #                    'pt_id':pt_ids})
    # df.to_csv(save_file.split('.csv')[0] + '_' + str(random_state) + '.cv_predictions',index=None)
    accuracy = accuracy_score(y_test, test_predictions)
    macro_f1 = f1_score(y_test, test_predictions, average='macro')
    bal_acc = balanced_accuracy(y_test, test_predictions)
    roc_auc = roc_auc_score(y_test, test_probabilities)

    ##########
    # save results to file
    print('saving results...')
    param_string = ','.join([
        '{}={}'.format(p, v) for p, v in best_clf.get_params().items()
        if p != 'feature_names'
    ]).replace('\n', '').replace(' ', '')

    out_text = '\t'.join([
        dataset.split('/')[-1], clf_name, param_string,
        str(random_state),
        str(accuracy),
        str(macro_f1),
        str(bal_acc),
        str(roc_auc),
        str(runtime)
    ])
    print(out_text)
    with open(save_file, 'a') as out:
        out.write(out_text + '\n')
    sys.stdout.flush()

    print('saving feature importance')
    # write feature importances
    if not longitudinal:
        feature_importance(save_file,
                           best_clf,
                           feature_names,
                           X_test,
                           y_test,
                           random_state,
                           clf_name,
                           param_string,
                           cv_FI_int,
                           perm=False)
        feature_importance(save_file,
                           best_clf,
                           feature_names,
                           X_test,
                           y_test,
                           random_state,
                           clf_name,
                           param_string,
                           FI,
                           perm=True)
    # write roc curves
    print('saving roc')
    roc(save_file, y_test, test_probabilities, random_state, clf_name,
        param_string)

    return best_clf