Ejemplo n.º 1
0
def main():
    df = pd.read_csv('train_format2.csv', index_col='user_id')

    specific_user_tensor_info = df.ix[34176, ['merchant_id', 'activity_log']]
    
    specific_user_tensor = Tensor(specific_user_tensor_info)

    specific_user_tensor.build_tensor()

    one_click_list = specific_user_tensor.is_one_click()
    
    user_df = df.ix[34176]
    user_df = user_df.reset_index()
    user_df = user_df.drop(one_click_list) 
    user_df.to_csv('34176_clean_data.csv', index=False)    
Ejemplo n.º 2
0
def transform_csv_to_liner_tensor(df):
    df.fillna(0)
    user_id_list = list(deque(df.index.values))
    users_tensor = dict()
    # help debug
    user_time = 0
    user_count = len(user_id_list)
    print "total user_count:", user_count 
    for user_id in user_id_list:
        print "the times:", user_time,
        print "the user_id:", user_id,
        user_tensor_info = df.ix[user_id, ['merchant_id', 'activity_log']]
        specific_user_tensor = Tensor(user_tensor_info) 
        specific_user_tensor.build_tensor()
        users_tensor[user_id] = specific_user_tensor
        user_time = user_time + 1
        print "percent:", user_time/user_count    
     
    return users_tensor