def main(): df = pd.read_csv('train_format2.csv', index_col='user_id') specific_user_tensor_info = df.ix[34176, ['merchant_id', 'activity_log']] specific_user_tensor = Tensor(specific_user_tensor_info) specific_user_tensor.build_tensor() one_click_list = specific_user_tensor.is_one_click() user_df = df.ix[34176] user_df = user_df.reset_index() user_df = user_df.drop(one_click_list) user_df.to_csv('34176_clean_data.csv', index=False)
def transform_csv_to_liner_tensor(df): df.fillna(0) user_id_list = list(deque(df.index.values)) users_tensor = dict() # help debug user_time = 0 user_count = len(user_id_list) print "total user_count:", user_count for user_id in user_id_list: print "the times:", user_time, print "the user_id:", user_id, user_tensor_info = df.ix[user_id, ['merchant_id', 'activity_log']] specific_user_tensor = Tensor(user_tensor_info) specific_user_tensor.build_tensor() users_tensor[user_id] = specific_user_tensor user_time = user_time + 1 print "percent:", user_time/user_count return users_tensor