Ejemplo n.º 1
0
def split_train_val_test(df):
    train_val, test = get_train_test_1fold(df)
    train, val = get_train_test_1fold(train_val)

    df_train = keep_index_and_1diagnose_columns(train, 'Instance labels')
    df_test = keep_index_and_1diagnose_columns(test, 'Instance labels')
    df_val = keep_index_and_1diagnose_columns(val, 'Instance labels')
    return df_train, df_val, df_test
Ejemplo n.º 2
0
def construct_train_test_cv(df, nr_cv, split):
    train_val_ind_col, test_ind_col = split_data_cv(df, nr_cv)
    df_train_val, df_test = get_rows_from_indices(df, train_val_ind_col[split], test_ind_col[split])
    train_ind_col, val_ind_col = split_data_cv(df_train_val, nr_cv)
    df_train, df_val = get_rows_from_indices(df_train_val, train_ind_col[split], val_ind_col[split])

    train_set = keep_index_and_1diagnose_columns(df_train, 'Instance labels')
    val_set = keep_index_and_1diagnose_columns(df_val, 'Instance labels')
    test_set = keep_index_and_1diagnose_columns(df_test, 'Instance labels')

    return train_set, val_set, test_set
Ejemplo n.º 3
0
def split_data_cv(df,
                  splits_nr,
                  current_split,
                  random_seed,
                  diagnose_col,
                  ratio_to_keep=None):
    df_train_val = filter_rows_on_class(df, class_name=diagnose_col)
    train_inds_coll, val_inds_coll = split_test_train_cv(
        df_train_val, splits_nr, test_ratio=0.2, random_state=random_seed)
    df_train = df.iloc[train_inds_coll[current_split]]
    df_val = df.iloc[val_inds_coll[current_split]]
    df_train_final = keep_index_and_1diagnose_columns(df_train,
                                                      'instance labels')
    df_val_final = keep_index_and_1diagnose_columns(df_val, 'instance labels')
    return df_train_final, df_val_final
Ejemplo n.º 4
0
def prepare_mura_set(df_train_val, test_df_all_classes, class_name):
    _, _, train_df_all_classes, val_df_all_classes = split_train_val_set(
        df_train_val)
    df_train, df_val, df_test = filter_all_set_for_class(
        train_df_all_classes, val_df_all_classes, test_df_all_classes,
        class_name)
    df_train_final = keep_index_and_1diagnose_columns(df_train,
                                                      'instance labels')
    df_val_final = keep_index_and_1diagnose_columns(df_val, 'instance labels')
    df_test_final = keep_index_and_1diagnose_columns(df_test,
                                                     'instance labels')

    print('Training set: ' + str(df_train_final.shape))
    print('Validation set: ' + str(df_val_final.shape))
    # print('Localization testing set: '+ str(df_bbox_test.shape))
    print('Classification testing set: ' + str(df_test_final.shape))
    return df_train_final, df_val_final, df_test_final
Ejemplo n.º 5
0
def split_filter_data(config, df):
    '''
    Splits a dataframe into test, validation and training subsets and Filters unnecessary columns
    '''
    results_path = config['results_path']
    class_name = config['class_name']

    print("Splitting data ...")

    df_train, df_val, df_test = ld.get_train_test(df, random_state=1, do_stats=False,
                                                  res_path=results_path,
                                                  label_col=class_name)

    label_patches = class_name + '_loc'
    if class_name is not None:
        df_train_filtered_cols, df_val_filtered_cols, df_test_filtered_cols = \
            ld.keep_index_and_1diagnose_columns(df_train, label_patches), \
            ld.keep_index_and_1diagnose_columns(df_val, label_patches), \
            ld.keep_index_and_1diagnose_columns(df_test, label_patches)
    return df_train_filtered_cols, df_val_filtered_cols, df_test_filtered_cols
Ejemplo n.º 6
0
def filter_rows_and_columns(df, class_name):
    df = filter_rows_on_class(df, class_name=class_name)
    return keep_index_and_1diagnose_columns(df, 'instance labels')