def update_batch(num): mydb = CollectionFactory.create_client_and_db() tag = mydb['test_tags'] sum_time = 0.0 starttime = datetime.datetime.now() tag.update_many({}, {'$set': {'Count': 10001}}) endtime = datetime.datetime.now() sum_time += (endtime - starttime).total_seconds() return {"type": "update batch", "num": num, "time": sum_time}
def update_separate(num): mydb = CollectionFactory.create_client_and_db() tag = mydb['test_tags'] sum_time = 0.0 for iter in range(num): starttime = datetime.datetime.now() tag.update_one({'Id': iter}, {'$set': {'Count': 10002}}) endtime = datetime.datetime.now() sum_time = (endtime - starttime).total_seconds() return {"type": "update separate", "num": num, "time": sum_time}
def demo(): mydb = CollectionFactory.create_client_and_db() start = datetime.datetime.now() query = {"Id": {'$lt': 10}} list = mydb.users.find(query) count = 0 print(type(list)) for i in list: # count+=1 print(i) end = datetime.datetime.now() print((end - start))
def read_csv2mongodb(filename, collectionName): mydb = CollectionFactory.create_client_and_db() mydb[collectionName].delete_many({}) count = 0 with open(filename, encoding='utf-8', errors="ignore") as fr: rows = csv.DictReader(fr) post_list = [] # print(type(rows)) for row in rows: post_list.append(row) count += 1 mydb[collectionName].bulk_write(list(map(InsertOne, post_list))) return count
def update_batch_mutiple(num, average_iteration_num=1): sum_time = 0.0 mydb = CollectionFactory.create_client_and_db() tag = mydb['posts'] for i in range(0, average_iteration_num): starttime = datetime.datetime.now() tag.update_many({'Id':{'$lt': num},'Score':{'$gt': 20}}, {'$inc': {'ViewCount': 1}}) endtime = datetime.datetime.now() sum_time += (endtime - starttime).total_seconds() return { "type": "update_one_table_mul_filter", "num": num, "time": sum_time / average_iteration_num }
def init_env(num): mydb = CollectionFactory.create_client_and_db() users = mydb['users'] posts = mydb['posts'] for iter in range(num): user = {}; user['id'] = iter; user['reputation'] = 'reputation'+ str(iter) user['display_name'] = 'display_name'+ str(iter) user['age']= 28; post = {} post['id'] = iter; post['body'] = 'body'+ str(iter) post['owner_user_id'] = user['id']; post['title'] = 'title'+ str(iter) users.insert_one(user) posts.insert_one(post)
def delete_separate(num,average_iteration_num=1): mydb = CollectionFactory.create_client_and_db() starttime = datetime.datetime.now() # 逐条写 for i in range(0,num): # 不存在会新建一个数据库表 # 插入测试的时候目前没有用post或者tags,之后自己修改 mydb['test_1'].delete_one({'_id':i,'x':1}) endtime = datetime.datetime.now() sum_time = (endtime - starttime).total_seconds() return { "type": "delete separate", "num": num, "time": sum_time }
def delete_batch(num, average_iteration_num=1): mydb = CollectionFactory.create_client_and_db() starttime = datetime.datetime.now() res = [] for i in range(0, num): res.append(DeleteOne({'_id': i, 'x': 1})) mydb['test_2'].bulk_write(res) endtime = datetime.datetime.now() sum_time = (endtime - starttime).total_seconds() return { "type": "delete batch", "num": num, "time": sum_time }
def update_batch_mutiple_query_mutiple_update(num, average_iteration_num=1): mydb = CollectionFactory.create_client_and_db() tag = mydb['users'] tag_posts = mydb['posts'] sum_time = 0.0 for i in range(0, average_iteration_num): starttime = datetime.datetime.now() cursor = mydb["users"].aggregate([ { "$lookup": { "from":"posts", "localField": "Id", "foreignField": "OwnerUserId", "as":"inventory_docs" } }, { "$match": { "ViewCount":{"$gt":num} } } ]) quarymap = {'Id':-1000} if(cursor.alive): quarymap =cursor.next() queryid = quarymap['Id'] tag.update_many({'Id':queryid}, {'$inc': {'Reputation': 1}}) tag_posts.update_many({'OwnerUserId':queryid}, {'$inc': {'ViewCount':1}}) endtime = datetime.datetime.now() sum_time += (endtime - starttime).total_seconds() return { "type": "update_aggregate", "num": num, "time": sum_time / average_iteration_num }
def read_csv2mongodb_forusers(filename, collectionName): mydb = CollectionFactory.create_client_and_db() mydb[collectionName].delete_many({}) count = 0 with open(filename, encoding='utf-8', errors="ignore") as fr: rows = csv.DictReader(fr) post_list = [] for row in rows: if len(row["Age"]) > 3 or len(row["Views"]) > 7: continue # print(row['Id']) row['Id'] = int(row['Id']) if row["Id"] != '' else 0 row["Reputation"] = int( row["Reputation"]) if row["Reputation"] != '' else 0 row["Views"] = int(row["Views"]) if row["Views"] != '' else 0 row["Age"] = int(row["Age"]) if row["Age"] != '' else 0 row["UpVotes"] = int(row["UpVotes"]) if row["UpVotes"] != '' else 0 row["DownVotes"] = int( row["DownVotes"]) if row["DownVotes"] != '' else 0 post_list.append(row) count += 1 mydb[collectionName].bulk_write(list(map(InsertOne, post_list))) return count
def read_csv2mongodb_forposts(filename, collectionName): mydb = CollectionFactory.create_client_and_db() mydb[collectionName].delete_many({}) count = 0 with open(filename, encoding='utf-8', errors="ignore") as fr: rows = csv.DictReader(fr) post_list = [] # print(type(rows)) for row in rows: # if len(row["OwnerUserId"]) > 10 or len(row["Views"]) > 7: # continue row['Id'] = int(row['Id']) if row["Id"] != '' else 0 row["PostTypeId"] = int( row["PostTypeId"]) if row["PostTypeId"] != '' else 0 row["AcceptedAnswerId"] = int( row["AcceptedAnswerId"] ) if row["AcceptedAnswerId"] != '' else 0 row["ParentId"] = int( row["ParentId"]) if row["ParentId"] != '' else 0 row["Score"] = int(row["Score"]) if row["Score"] != '' else 0 row["ViewCount"] = int( row["ViewCount"]) if row["ViewCount"] != '' else 0 row["OwnerUserId"] = int( row["OwnerUserId"]) if row["OwnerUserId"] != '' else 0 row["LastEditorUserId"] = int( row["LastEditorUserId"] ) if row["LastEditorUserId"] != '' else 0 row["AnswerCount"] = int( row["AnswerCount"]) if row["AnswerCount"] != '' else 0 row["CommentCount"] = int( row["CommentCount"]) if row["CommentCount"] != '' else 0 row["FavoriteCount"] = int( row["FavoriteCount"]) if row["FavoriteCount"] != '' else 0 post_list.append(row) count += 1 mydb[collectionName].bulk_write(list(map(InsertOne, post_list))) return count
def demo(): mydb = CollectionFactory.create_client_and_db() num_list = [5000, 10000] # 传入数据量,数据库,测试次数 start_test_delete_exp(num_list, mydb, 3)
## 测试非批量的删除(id)操作时间 result = delete_separate(num, mydb, iteration_num) result_list.append(result) for num in num_list: ## 测试非批量的删除(_id)操作时间 result = delete_separate(num, mydb, iteration_num, tags_id=True) result_list.append(result) filename = "experiment_mongodb_delete_result.json" save(filename, result_list) def save(filename, result_list): with open(filename, "w") as f: json.dump(result_list, f) def demo(): mydb = CollectionFactory.create_client_and_db() num_list = [5000, 10000] # 传入数据量,数据库,测试次数 start_test_delete_exp(num_list, mydb, 3) if __name__ == '__main__': mydb = CollectionFactory.create_client_and_db() num_list = [5000] # 传入数据量,数据库,测试次数 start_test_delete_exp(num_list, mydb, 3)