def update_userdb(new_files_list,entire_list): print('Updating user database...') if len(new_files_list) == 0: print('No new file needs to be updated.') else: print('Loading...') user_dict = load_obj('user_dict','userdb') print('Updating...') for idx,f in enumerate(new_files_list): f_path = "".join(['dataset/',f]) df = pd.read_csv(f_path,header = 0) df.dropna() if len(df.columns) != 10: continue for row in df.itertuples(): if row[2] != 'guest': userObj = user_dict.get(row[2]) if userObj is not None: userObj.add_ip(row[5]) userObj.add_visit(row[3]) userObj.add_item(row[7]) userObj.add_source(row[10]) else: userObj = User(row[2]) userObj.add_ip(row[5]) userObj.add_visit(row[3]) userObj.add_item(row[7]) userObj.add_source(row[10]) user_dict[row[2]] = userObj print("{0:.3f}%\r".format((idx+1)*100/len(new_files_list)),end='') sys.stdout.flush() entire_list = np.append(entire_list,f) print('\nFinish updating...\nSaving...') save_obj(user_dict,'user_dict','userdb') save_obj(entire_list,'list_of_files',data_manage_folder) gc.collect() print('Done')