def read_dataset(is_merged): debug = DEBUG if debug: filename_train = '../input/debug{}/{}_debug{}.feather'.format( debug, 'train_translated', debug) filename_test = '../input/debug{}/{}_debug{}.feather'.format( debug, 'test_translated', debug) else: filename_train = '../input/{}.feather'.format('train_translated') filename_test = '../input/{}.feather'.format('test_translated') print_doing('reading train, test and merge') if is_merged: df = read_train_test(filename_train, filename_test, '.feather', is_merged=True) if debug: print(df.head()) else: train_df, test_df = read_train_test(filename_train, filename_test, '.feather', is_merged=False) if debug: print(train_df.head()) print(test_df.head()) print_memory() if is_merged: return df else: return train_df, test_df
def read_dataset_origin(dataset): filename_train = '../input/train.csv' filename_test = '../input/test.csv' print_doing('reading train, test and merge') df = read_train_test(filename_train, filename_test, '.feather', is_merged=1) print_memory() print(df.head()) return df
def read_dataset(): debug = DEBUG if debug: filename_train = '../input/debug{}/{}_debug{}.feather'.format( debug, 'train', debug) filename_test = '../input/debug{}/{}_debug{}.feather'.format( debug, 'test', debug) else: filename_train = '../input/{}.feather'.format('train') filename_test = '../input/{}.feather'.format('test') print_doing('reading train, test and merge') df = read_train_test(filename_train, filename_test, '.feather', is_merged=1) print_memory() print(df.head()) return df
def read_dataset_deal_probability(seed): debug = DEBUG if debug: filename_train = '../input/debug{}/{}_debug{}.feather'.format( debug, 'train', debug) filename_test = '../input/debug{}/{}_debug{}.feather'.format( debug, 'test', debug) else: filename_train = '../input/{}.feather'.format('train') filename_test = '../input/{}.feather'.format('test') print_doing('reading train, test and merge') train_df, test_df = read_train_test(filename_train, filename_test, '.feather', is_merged=0) df = find_df_local_valid_and_make_deal_prob_nan(train_df, test_df, seed) print_memory() print(df.head()) return df