コード例 #1
0
def create_local_validation():
    if DEBUG:
        dir_feature = '../processed_features_debug2/'
        N = 1503
    else:
        dir_feature = '../processed_features/'
        N = 1503424
    filename = dir_feature + 'local_validation.pickle'
    if os.path.exists(filename):
        local_validation_array = load_pickle(filename)
    else:
        local_validation_array = [i for i in range(0, N, int(N / 100))]
        save_pickle(local_validation_array, filename)
    return local_validation_array
コード例 #2
0
#         "weight":[7,8,3,8],"price":[4,5,6,6]})
# print(df)

# df['size'] = df.groupby(['fruit','price']).transform(np.size)
# print(df)

# df['freq'] = df.groupby('fruit')['fruit'].transform('count')
# print(df)

# selcols = ['fruit']

# df5 = df[selcols].groupby(selcols).size().reset_index(name="Time4")
# print(df5)
# df = df.merge(df5, on=selcols, how='left')
# print(df)

# feature_name = 'Freq'
# df6 = df[selcols]. \
#     groupby(selcols).size(). \
#     reset_index(name=feature_name)

# print(df6)
# df = df.merge(df6, on=selcols, how='left')
# print(df)

filename = '../input/aggregated_features.csv'
df = load_csv(filename)

savename = '../input/kernel_aggregated_features.pickle'
save_pickle(df, savename)