Ejemplo n.º 1
0
 def test_groupby_to_series_to_frame_2(self):
     df = pd.DataFrame({'a': [6, 2, 2], 'b': [4, 5, 6]})
     labels = ['g1', 'g1', 'g2']
     benchmark = df.groupby(labels).apply(frame_to_series)
     result = pandas_easy.groupby_to_series_to_frame(
         df, frame_to_series, 1, use_apply=False, by=labels)
     assert_frame_equal(result, benchmark)
Ejemplo n.º 2
0
def user_appCategory():

    user_app_category = get_userID_appID()

    user_app_category = user_app_category[:1000][:]
    app_category_series = user_app_category['appCategory'].unique()
    columns_name = [
        'user_category_count_' + str(i) for i in app_category_series.tolist()
    ]
    func = partial(count_user_appCategory, app_category_series)
    user_app_cateogry_dataframe = pandas_easy.groupby_to_series_to_frame(
        user_app_category,
        func,
        n_jobs=mp.cpu_count() - 1,
        use_apply=True,
        by='userID')
    '''
    user_app_category_groupby_user = user_app_category.groupby('userID',as_index = False)

    user_app_cateogry_list = papply(user_app_category_groupby_user,count_user_appCategory,app_category_series)

    #user_app_cateogry_list = user_app_category_groupby_user.apply(lambda x:count_user_appCategory(x,app_category_series))
    user_app_cateogry_dataframe = pd.DataFrame(list(user_app_cateogry_list),
                                   columns=['userID'] + columns_name)
    '''
    #print(user_app_cateogry_dataframe)
    split_point = len(user_app_cateogry_dataframe) // 4
    data1 = user_app_cateogry_dataframe[:split_point][:]
    data2 = user_app_cateogry_dataframe[split_point:split_point * 2]
    data3 = user_app_cateogry_dataframe[split_point * 2:split_point * 3]
    data4 = user_app_cateogry_dataframe[split_point * 3:][:]

    pickle.dump(data1, open(user_category_path1, 'wb'), protocol=4)
    pickle.dump(data2, open(user_category_path2, 'wb'), protocol=4)
    pickle.dump(data3, open(user_category_path3, 'wb'), protocol=4)
    pickle.dump(data4, open(user_category_path4, 'wb'), protocol=4)
Ejemplo n.º 3
0
 def test_groupby_to_series_to_frame_3(self):
     df = pd.DataFrame({'a': [6, 2, 2], 'b': [4, 5, 6]})
     benchmark = df.groupby('a').apply(max)
     result = pandas_easy.groupby_to_series_to_frame(df, max, 1, by='a')
     assert_frame_equal(result, benchmark)