Beispiel #1
0
def load_residential_building(return_X_y=False,
                              encode=True,
                              verbose=False,
                              onehot_threshold=10):
    # target: V9

    db = read_xls_data(
        'data/regression/residential_building/Residential-Building-Data-Set.xlsx'
    )
    db = db.drop(0, axis='index')
    db.reset_index(drop=True, inplace=True)
    del db[db.columns[-1]]
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    for c in db.columns:
        db[c] = db[c].astype(float)

    return construct_return_set(db,
                                "residential_building",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="residential_building",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #2
0
def load_stock_portfolio_performance(return_X_y=False,
                                     encode=True,
                                     verbose=False,
                                     onehot_threshold=10):
    # target: normalized annual return

    db = read_xls_data(
        'data/regression/stock_portfolio_performance/stock portfolio performance data set.xlsx',
        sheet_name='all period')
    db.columns = db.iloc[0].values
    db = db.drop(db.index[0], axis='index')
    db.reset_index(drop=True, inplace=True)
    del db[db.columns[0]]
    columns = list(db.columns[0:5]) + [db.columns[11]]
    db = db[columns]
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    for c in db.columns:
        db[c] = db[c].astype(float)

    return construct_return_set(db,
                                "stock_portfolio_performance",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="stock_portfolio_performance",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #3
0
def load_puma32h(return_X_y=False,
                 encode=True,
                 verbose=False,
                 onehot_threshold=10):
    data, meta = read_arff_data('data/regression/puma32h/puma32h.dat')
    db = pd.DataFrame(data)
    db.columns = list(db.columns[:-1]) + ['target']

    return construct_return_set(db,
                                "puma32h",
                                return_X_y,
                                encode,
                                citation='keel',
                                name="puma32h",
                                verbose=verbose,
                                problem_type='regression')
Beispiel #4
0
def load_abalone(return_X_y=False,
                 encode=True,
                 verbose=False,
                 onehot_threshold=10):
    db = read_csv_data('data/classification/abalone/abalone.data.txt')
    db.columns = list(db.columns[:-1]) + ['target']
    del db[db.columns[0]]

    return construct_return_set(db,
                                "abalone",
                                return_X_y,
                                encode,
                                citation='krnn',
                                name="abalone",
                                verbose=verbose,
                                problem_type='classification',
                                onehot_threshold=onehot_threshold)
Beispiel #5
0
def load_zoo(return_X_y=False,
             encode=True,
             verbose=False,
             onehot_threshold=10):
    data, meta = read_arff_data('data/classification/zoo/zoo.dat')
    db = pd.DataFrame(data)
    db.columns = list(db.columns[:-1]) + ['target']

    return construct_return_set(db,
                                "zoo",
                                return_X_y,
                                encode,
                                citation='keel',
                                name="zoo",
                                verbose=verbose,
                                problem_type='classification',
                                onehot_threshold=onehot_threshold)
Beispiel #6
0
def load_communities(return_X_y=False,
                     encode=True,
                     verbose=False,
                     onehot_threshold=10):
    db = read_csv_data('data/regression/communities/communities.data', sep=',')
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "communities",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="communities",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #7
0
def load_ecoli(return_X_y=False,
               encode=True,
               verbose=False,
               onehot_threshold=10):
    db = read_csv_data('data/classification/ecoli/ecoli.data.txt',
                       delim_whitespace=True)
    db.columns = list(db.columns[:-1]) + ['target']
    del db[db.columns[0]]
    print(db.columns)
    return construct_return_set(db,
                                "ecoli",
                                return_X_y,
                                encode,
                                citation='krnn',
                                name="ecoli",
                                verbose=verbose,
                                problem_type='classification',
                                onehot_threshold=onehot_threshold)
Beispiel #8
0
def load_satimage(return_X_y=False,
                  encode=True,
                  verbose=False,
                  onehot_threshold=10):
    db0 = read_csv_data('data/classification/satimage/sat.trn.txt', sep=' ')
    db1 = read_csv_data('data/classification/satimage/sat.tst.txt', sep=' ')
    db = pd.concat([db0, db1])
    db.columns = list(db.columns[:-1]) + ['target']

    return construct_return_set(db,
                                "SATIMAGE",
                                return_X_y,
                                encode,
                                citation='krnn',
                                name="SATIMAGE",
                                verbose=verbose,
                                problem_type='classification',
                                onehot_threshold=onehot_threshold)
Beispiel #9
0
def load_ccpp(return_X_y=False,
              encode=True,
              verbose=False,
              onehot_threshold=10):
    db = read_xls_data('data/regression/ccpp/Folds5x2_pp.xlsx',
                       sheet_name='Sheet1')
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "ccpp",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="ccpp",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #10
0
def load_airfoil(return_X_y=False,
                 encode=True,
                 verbose=False,
                 onehot_threshold=10):
    db = read_csv_data('data/regression/airfoil/airfoil_self_noise.dat.txt',
                       sep='\t')
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "airfoil",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="airfoil",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #11
0
def load_winequality_red(return_X_y=False,
                         encode=True,
                         verbose=False,
                         onehot_threshold=10):
    db = read_csv_data('data/regression/winequality_red/winequality-red.csv',
                       sep=';',
                       header=0)
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "winequality_red",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="winequality_red",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #12
0
def load_cpu_performance(return_X_y=False,
                         encode=True,
                         verbose=False,
                         onehot_threshold=10):
    db = read_csv_data('data/regression/cpu_performance/machine.data.txt',
                       sep=',')
    del db[db.columns[-1]]
    del db[db.columns[1]]
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "cpu_performance",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="cpu_performance",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #13
0
def load_slump_test(return_X_y=False,
                    encode=True,
                    verbose=False,
                    onehot_threshold=10):
    db = read_csv_data('data/regression/slump_test/slump_test.data.txt',
                       sep=',',
                       header=0)
    del db[db.columns[0]]
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "slump_test",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="slump_test",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #14
0
def load_yacht_hydrodynamics(return_X_y=False,
                             encode=True,
                             verbose=False,
                             onehot_threshold=10):
    db = read_csv_data(
        'data/regression/yacht_hydrodynamics/yacht_hydrodynamics.data.txt',
        sep=None,
        header=0,
        delim_whitespace=True)
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    return construct_return_set(db,
                                "yacht_hydrodynamics",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="yacht_hydrodynamics",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)
Beispiel #15
0
def load_real_estate_valuation(return_X_y=False,
                               encode=True,
                               verbose=False,
                               onehot_threshold=10):
    db = read_xls_data(
        'data/regression/real_estate_valuation/Real estate valuation data set.xlsx'
    )
    del db[db.columns[0]]
    columns = list(db.columns)
    columns[-1] = 'target'
    db.columns = columns

    for c in db.columns:
        db[c] = db[c].astype(float)

    return construct_return_set(db,
                                "real_estate_valuation",
                                return_X_y,
                                encode,
                                citation='uci',
                                name="real_estate_valuation",
                                verbose=verbose,
                                problem_type='regression',
                                onehot_threshold=onehot_threshold)