예제 #1
0
def read_dataset(is_merged):
    debug = DEBUG
    if debug:
        filename_train = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'train_translated', debug)
        filename_test = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'test_translated', debug)
    else:
        filename_train = '../input/{}.feather'.format('train_translated')
        filename_test = '../input/{}.feather'.format('test_translated')

    print_doing('reading train, test and merge')
    if is_merged:
        df = read_train_test(filename_train,
                             filename_test,
                             '.feather',
                             is_merged=True)
        if debug: print(df.head())
    else:
        train_df, test_df = read_train_test(filename_train,
                                            filename_test,
                                            '.feather',
                                            is_merged=False)
        if debug:
            print(train_df.head())
            print(test_df.head())
    print_memory()
    if is_merged:
        return df
    else:
        return train_df, test_df
예제 #2
0
def read_dataset_origin(dataset):
    filename_train = '../input/train.csv'
    filename_test = '../input/test.csv'
    print_doing('reading train, test and merge')
    df = read_train_test(filename_train,
                         filename_test,
                         '.feather',
                         is_merged=1)
    print_memory()
    print(df.head())
    return df
예제 #3
0
def read_dataset():
    debug = DEBUG
    if debug:
        filename_train = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'train', debug)
        filename_test = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'test', debug)
    else:
        filename_train = '../input/{}.feather'.format('train')
        filename_test = '../input/{}.feather'.format('test')

    print_doing('reading train, test and merge')
    df = read_train_test(filename_train,
                         filename_test,
                         '.feather',
                         is_merged=1)
    print_memory()
    print(df.head())
    return df
예제 #4
0
def read_dataset_deal_probability(seed):
    debug = DEBUG
    if debug:
        filename_train = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'train', debug)
        filename_test = '../input/debug{}/{}_debug{}.feather'.format(
            debug, 'test', debug)
    else:
        filename_train = '../input/{}.feather'.format('train')
        filename_test = '../input/{}.feather'.format('test')

    print_doing('reading train, test and merge')
    train_df, test_df = read_train_test(filename_train,
                                        filename_test,
                                        '.feather',
                                        is_merged=0)
    df = find_df_local_valid_and_make_deal_prob_nan(train_df, test_df, seed)
    print_memory()
    print(df.head())
    return df