def load_residential_building(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): # target: V9 db = read_xls_data( 'data/regression/residential_building/Residential-Building-Data-Set.xlsx' ) db = db.drop(0, axis='index') db.reset_index(drop=True, inplace=True) del db[db.columns[-1]] columns = list(db.columns) columns[-1] = 'target' db.columns = columns for c in db.columns: db[c] = db[c].astype(float) return construct_return_set(db, "residential_building", return_X_y, encode, citation='uci', name="residential_building", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_stock_portfolio_performance(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): # target: normalized annual return db = read_xls_data( 'data/regression/stock_portfolio_performance/stock portfolio performance data set.xlsx', sheet_name='all period') db.columns = db.iloc[0].values db = db.drop(db.index[0], axis='index') db.reset_index(drop=True, inplace=True) del db[db.columns[0]] columns = list(db.columns[0:5]) + [db.columns[11]] db = db[columns] columns = list(db.columns) columns[-1] = 'target' db.columns = columns for c in db.columns: db[c] = db[c].astype(float) return construct_return_set(db, "stock_portfolio_performance", return_X_y, encode, citation='uci', name="stock_portfolio_performance", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_puma32h(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): data, meta = read_arff_data('data/regression/puma32h/puma32h.dat') db = pd.DataFrame(data) db.columns = list(db.columns[:-1]) + ['target'] return construct_return_set(db, "puma32h", return_X_y, encode, citation='keel', name="puma32h", verbose=verbose, problem_type='regression')
def load_abalone(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/classification/abalone/abalone.data.txt') db.columns = list(db.columns[:-1]) + ['target'] del db[db.columns[0]] return construct_return_set(db, "abalone", return_X_y, encode, citation='krnn', name="abalone", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_zoo(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): data, meta = read_arff_data('data/classification/zoo/zoo.dat') db = pd.DataFrame(data) db.columns = list(db.columns[:-1]) + ['target'] return construct_return_set(db, "zoo", return_X_y, encode, citation='keel', name="zoo", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_communities(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/communities/communities.data', sep=',') columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "communities", return_X_y, encode, citation='uci', name="communities", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_ecoli(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/classification/ecoli/ecoli.data.txt', delim_whitespace=True) db.columns = list(db.columns[:-1]) + ['target'] del db[db.columns[0]] print(db.columns) return construct_return_set(db, "ecoli", return_X_y, encode, citation='krnn', name="ecoli", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_satimage(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db0 = read_csv_data('data/classification/satimage/sat.trn.txt', sep=' ') db1 = read_csv_data('data/classification/satimage/sat.tst.txt', sep=' ') db = pd.concat([db0, db1]) db.columns = list(db.columns[:-1]) + ['target'] return construct_return_set(db, "SATIMAGE", return_X_y, encode, citation='krnn', name="SATIMAGE", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_ccpp(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_xls_data('data/regression/ccpp/Folds5x2_pp.xlsx', sheet_name='Sheet1') columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "ccpp", return_X_y, encode, citation='uci', name="ccpp", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_airfoil(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/airfoil/airfoil_self_noise.dat.txt', sep='\t') columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "airfoil", return_X_y, encode, citation='uci', name="airfoil", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_winequality_red(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/winequality_red/winequality-red.csv', sep=';', header=0) columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "winequality_red", return_X_y, encode, citation='uci', name="winequality_red", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_cpu_performance(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/cpu_performance/machine.data.txt', sep=',') del db[db.columns[-1]] del db[db.columns[1]] columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "cpu_performance", return_X_y, encode, citation='uci', name="cpu_performance", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_slump_test(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/slump_test/slump_test.data.txt', sep=',', header=0) del db[db.columns[0]] columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "slump_test", return_X_y, encode, citation='uci', name="slump_test", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_yacht_hydrodynamics(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data( 'data/regression/yacht_hydrodynamics/yacht_hydrodynamics.data.txt', sep=None, header=0, delim_whitespace=True) columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "yacht_hydrodynamics", return_X_y, encode, citation='uci', name="yacht_hydrodynamics", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_real_estate_valuation(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_xls_data( 'data/regression/real_estate_valuation/Real estate valuation data set.xlsx' ) del db[db.columns[0]] columns = list(db.columns) columns[-1] = 'target' db.columns = columns for c in db.columns: db[c] = db[c].astype(float) return construct_return_set(db, "real_estate_valuation", return_X_y, encode, citation='uci', name="real_estate_valuation", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)