def create_local_validation(): if DEBUG: dir_feature = '../processed_features_debug2/' N = 1503 else: dir_feature = '../processed_features/' N = 1503424 filename = dir_feature + 'local_validation.pickle' if os.path.exists(filename): local_validation_array = load_pickle(filename) else: local_validation_array = [i for i in range(0, N, int(N / 100))] save_pickle(local_validation_array, filename) return local_validation_array
# "weight":[7,8,3,8],"price":[4,5,6,6]}) # print(df) # df['size'] = df.groupby(['fruit','price']).transform(np.size) # print(df) # df['freq'] = df.groupby('fruit')['fruit'].transform('count') # print(df) # selcols = ['fruit'] # df5 = df[selcols].groupby(selcols).size().reset_index(name="Time4") # print(df5) # df = df.merge(df5, on=selcols, how='left') # print(df) # feature_name = 'Freq' # df6 = df[selcols]. \ # groupby(selcols).size(). \ # reset_index(name=feature_name) # print(df6) # df = df.merge(df6, on=selcols, how='left') # print(df) filename = '../input/aggregated_features.csv' df = load_csv(filename) savename = '../input/kernel_aggregated_features.pickle' save_pickle(df, savename)