split_train = {l:0 for l in labels} for l in labels: split_train[l] = sum(developement_df[l].values) split_test = {l:0 for l in labels} for l in labels: split_test[l] = sum(testing_df[l].values) n_samples_train = len(developement_df) n_samples_test = len(testing_df) # Create the appropriate statistics container for the whole experiment. training_stats = Statistics() validation_stats = Statistics() testing_stats = Statistics() seeds = create_seeds(iterations) min_class_freq = min(split_train.values()) cv_folds = min([min_class_freq, cv_folds]) statistics_objects = [] best_params = {l: {'score': 0.0, 'params': {}} for l in labels} print("Running Supervised Ensemble Classification...") # def do_iteration(i): for i in range(iterations): print("Iteration " + str(i+1)) rng = np.random.RandomState() rng.seed(seeds[i]) dev_df_i = developement_df.copy(deep=True) test_df_i = testing_df.copy(deep=True) folds_i = list(DataFrameStratifiedKFold(
if len(selection) == 0: print("Please select some features using the command line args. Use --help or -h for help.") sys.exit(1) print(selection) # ---------------------- THRESHOLD TESTING ---------------------------- # developement_df, _ = prep_data_frames(selection, load_interactome=False) thresholds = np.arange(0, 1.1, step=0.1) folds = list(DataFrameStratifiedKFold( n_splits=cv_folds, shuffle=True, random_state=None ).split(developement_df, y=developement_df['label'].values)) statistics = Statistics() params = sk_generate_params('lr', columns=None) labels = get_labels_from_file('data/labels.tsv') seeds = create_seeds(len(labels)) things = {} def pr_curve(i): label = labels[i] statistics_l = Statistics() print('Doing label {}'.format(label)) for train_idx, valid_idx in folds: rng = np.random.RandomState() rng.seed(seeds[i]) training_fold = developement_df.loc[train_idx, ] training_fold = training_fold.reset_index(drop=True) validation_fold = developement_df.loc[valid_idx, ] validation_fold = validation_fold.reset_index(drop=True) base_estimators = make_classifiers(method, balanced, labels, random_state=rng)