def load_satimage(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db0 = read_csv_data('data/classification/satimage/sat.trn.txt', sep=' ') db1 = read_csv_data('data/classification/satimage/sat.tst.txt', sep=' ') db = pd.concat([db0, db1]) db.columns = list(db.columns[:-1]) + ['target'] return construct_return_set(db, "SATIMAGE", return_X_y, encode, citation='krnn', name="SATIMAGE", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_abalone(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/classification/abalone/abalone.data.txt') db.columns = list(db.columns[:-1]) + ['target'] del db[db.columns[0]] return construct_return_set(db, "abalone", return_X_y, encode, citation='krnn', name="abalone", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_ecoli(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/classification/ecoli/ecoli.data.txt', delim_whitespace=True) db.columns = list(db.columns[:-1]) + ['target'] del db[db.columns[0]] print(db.columns) return construct_return_set(db, "ecoli", return_X_y, encode, citation='krnn', name="ecoli", verbose=verbose, problem_type='classification', onehot_threshold=onehot_threshold)
def load_airfoil(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/airfoil/airfoil_self_noise.dat.txt', sep='\t') columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "airfoil", return_X_y, encode, citation='uci', name="airfoil", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_winequality_red(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/winequality_red/winequality-red.csv', sep=';', header=0) columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "winequality_red", return_X_y, encode, citation='uci', name="winequality_red", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_slump_test(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data('data/regression/slump_test/slump_test.data.txt', sep=',', header=0) del db[db.columns[0]] columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "slump_test", return_X_y, encode, citation='uci', name="slump_test", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)
def load_yacht_hydrodynamics(return_X_y=False, encode=True, verbose=False, onehot_threshold=10): db = read_csv_data( 'data/regression/yacht_hydrodynamics/yacht_hydrodynamics.data.txt', sep=None, header=0, delim_whitespace=True) columns = list(db.columns) columns[-1] = 'target' db.columns = columns return construct_return_set(db, "yacht_hydrodynamics", return_X_y, encode, citation='uci', name="yacht_hydrodynamics", verbose=verbose, problem_type='regression', onehot_threshold=onehot_threshold)