Beispiel #1
0
def update_batch(num):
    mydb = CollectionFactory.create_client_and_db()
    tag = mydb['test_tags']
    sum_time = 0.0
    starttime = datetime.datetime.now()
    tag.update_many({}, {'$set': {'Count': 10001}})
    endtime = datetime.datetime.now()
    sum_time += (endtime - starttime).total_seconds()

    return {"type": "update batch", "num": num, "time": sum_time}
Beispiel #2
0
def update_separate(num):
    mydb = CollectionFactory.create_client_and_db()
    tag = mydb['test_tags']
    sum_time = 0.0

    for iter in range(num):
        starttime = datetime.datetime.now()
        tag.update_one({'Id': iter}, {'$set': {'Count': 10002}})
        endtime = datetime.datetime.now()
        sum_time = (endtime - starttime).total_seconds()

    return {"type": "update separate", "num": num, "time": sum_time}
Beispiel #3
0
def demo():
    mydb = CollectionFactory.create_client_and_db()
    start = datetime.datetime.now()
    query = {"Id": {'$lt': 10}}
    list = mydb.users.find(query)
    count = 0
    print(type(list))
    for i in list:
        # count+=1
        print(i)
    end = datetime.datetime.now()
    print((end - start))
Beispiel #4
0
def read_csv2mongodb(filename, collectionName):
    mydb = CollectionFactory.create_client_and_db()
    mydb[collectionName].delete_many({})
    count = 0
    with open(filename, encoding='utf-8', errors="ignore") as fr:
        rows = csv.DictReader(fr)
        post_list = []
        # print(type(rows))
        for row in rows:
            post_list.append(row)
            count += 1
        mydb[collectionName].bulk_write(list(map(InsertOne, post_list)))
    return count
Beispiel #5
0
def update_batch_mutiple(num, average_iteration_num=1):
    sum_time = 0.0
    mydb = CollectionFactory.create_client_and_db()
    tag = mydb['posts']
    for i in range(0, average_iteration_num):
        starttime = datetime.datetime.now()
        tag.update_many({'Id':{'$lt': num},'Score':{'$gt': 20}}, {'$inc': {'ViewCount': 1}})
        endtime = datetime.datetime.now()
        sum_time += (endtime - starttime).total_seconds()
    return {
        "type": "update_one_table_mul_filter",
        "num": num,
        "time": sum_time / average_iteration_num
    }
Beispiel #6
0
def init_env(num):
   mydb = CollectionFactory.create_client_and_db()
   users = mydb['users']
   posts = mydb['posts']
   for iter in range(num):
       user = {};
       user['id'] = iter;
       user['reputation'] = 'reputation'+ str(iter)
       user['display_name'] = 'display_name'+ str(iter)
       user['age']= 28;
       post = {}
       post['id'] = iter;
       post['body'] = 'body'+ str(iter)
       post['owner_user_id'] = user['id'];
       post['title'] = 'title'+ str(iter)
       users.insert_one(user)
       posts.insert_one(post)
def delete_separate(num,average_iteration_num=1):

    mydb = CollectionFactory.create_client_and_db()
    starttime = datetime.datetime.now()
    # 逐条写
    for i in range(0,num):
        # 不存在会新建一个数据库表
        # 插入测试的时候目前没有用post或者tags,之后自己修改
        mydb['test_1'].delete_one({'_id':i,'x':1})
    endtime = datetime.datetime.now()
    sum_time = (endtime - starttime).total_seconds()

    return {
        "type": "delete separate",
        "num": num,
        "time": sum_time
    }
def delete_batch(num, average_iteration_num=1):
    mydb = CollectionFactory.create_client_and_db()

    starttime = datetime.datetime.now()
    res = []
    for i in range(0, num):
        res.append(DeleteOne({'_id': i, 'x': 1}))
    mydb['test_2'].bulk_write(res)

    endtime = datetime.datetime.now()
    sum_time = (endtime - starttime).total_seconds()

    return {
        "type": "delete batch",
        "num": num,
        "time": sum_time
    }
Beispiel #9
0
def update_batch_mutiple_query_mutiple_update(num, average_iteration_num=1):
    mydb = CollectionFactory.create_client_and_db()
    tag = mydb['users']
    tag_posts = mydb['posts']
    sum_time = 0.0
    for i in range(0, average_iteration_num):
        starttime = datetime.datetime.now()
        cursor = mydb["users"].aggregate([
            {
                "$lookup":
                    {
                        "from":"posts",
                        "localField": "Id",
                        "foreignField": "OwnerUserId",
                        "as":"inventory_docs"
                    }

            },
            {
                "$match":
                    {
                        "ViewCount":{"$gt":num}
                    }
            }
        ])
        quarymap = {'Id':-1000}
        if(cursor.alive):
            quarymap =cursor.next()
        queryid = quarymap['Id']
        tag.update_many({'Id':queryid}, {'$inc': {'Reputation': 1}})
        tag_posts.update_many({'OwnerUserId':queryid}, {'$inc': {'ViewCount':1}})
        endtime = datetime.datetime.now()
        sum_time += (endtime - starttime).total_seconds()
    return {
        "type": "update_aggregate",
        "num": num,
        "time": sum_time / average_iteration_num
    }
Beispiel #10
0
def read_csv2mongodb_forusers(filename, collectionName):
    mydb = CollectionFactory.create_client_and_db()
    mydb[collectionName].delete_many({})
    count = 0
    with open(filename, encoding='utf-8', errors="ignore") as fr:
        rows = csv.DictReader(fr)
        post_list = []
        for row in rows:
            if len(row["Age"]) > 3 or len(row["Views"]) > 7:
                continue
            # print(row['Id'])
            row['Id'] = int(row['Id']) if row["Id"] != '' else 0
            row["Reputation"] = int(
                row["Reputation"]) if row["Reputation"] != '' else 0
            row["Views"] = int(row["Views"]) if row["Views"] != '' else 0
            row["Age"] = int(row["Age"]) if row["Age"] != '' else 0
            row["UpVotes"] = int(row["UpVotes"]) if row["UpVotes"] != '' else 0
            row["DownVotes"] = int(
                row["DownVotes"]) if row["DownVotes"] != '' else 0
            post_list.append(row)
            count += 1
        mydb[collectionName].bulk_write(list(map(InsertOne, post_list)))
    return count
Beispiel #11
0
def read_csv2mongodb_forposts(filename, collectionName):
    mydb = CollectionFactory.create_client_and_db()
    mydb[collectionName].delete_many({})
    count = 0
    with open(filename, encoding='utf-8', errors="ignore") as fr:
        rows = csv.DictReader(fr)
        post_list = []
        # print(type(rows))
        for row in rows:
            # if len(row["OwnerUserId"]) > 10 or len(row["Views"]) > 7:
            #     continue
            row['Id'] = int(row['Id']) if row["Id"] != '' else 0
            row["PostTypeId"] = int(
                row["PostTypeId"]) if row["PostTypeId"] != '' else 0
            row["AcceptedAnswerId"] = int(
                row["AcceptedAnswerId"]
            ) if row["AcceptedAnswerId"] != '' else 0
            row["ParentId"] = int(
                row["ParentId"]) if row["ParentId"] != '' else 0
            row["Score"] = int(row["Score"]) if row["Score"] != '' else 0
            row["ViewCount"] = int(
                row["ViewCount"]) if row["ViewCount"] != '' else 0
            row["OwnerUserId"] = int(
                row["OwnerUserId"]) if row["OwnerUserId"] != '' else 0
            row["LastEditorUserId"] = int(
                row["LastEditorUserId"]
            ) if row["LastEditorUserId"] != '' else 0
            row["AnswerCount"] = int(
                row["AnswerCount"]) if row["AnswerCount"] != '' else 0
            row["CommentCount"] = int(
                row["CommentCount"]) if row["CommentCount"] != '' else 0
            row["FavoriteCount"] = int(
                row["FavoriteCount"]) if row["FavoriteCount"] != '' else 0
            post_list.append(row)
            count += 1
        mydb[collectionName].bulk_write(list(map(InsertOne, post_list)))
    return count
Beispiel #12
0
def demo():
    mydb = CollectionFactory.create_client_and_db()
    num_list = [5000, 10000]
    # 传入数据量,数据库,测试次数
    start_test_delete_exp(num_list, mydb, 3)
Beispiel #13
0
        ## 测试非批量的删除(id)操作时间
        result = delete_separate(num, mydb, iteration_num)
        result_list.append(result)

    for num in num_list:
        ## 测试非批量的删除(_id)操作时间
        result = delete_separate(num, mydb, iteration_num, tags_id=True)
        result_list.append(result)

    filename = "experiment_mongodb_delete_result.json"
    save(filename, result_list)


def save(filename, result_list):
    with open(filename, "w") as f:
        json.dump(result_list, f)


def demo():
    mydb = CollectionFactory.create_client_and_db()
    num_list = [5000, 10000]
    # 传入数据量,数据库,测试次数
    start_test_delete_exp(num_list, mydb, 3)


if __name__ == '__main__':
    mydb = CollectionFactory.create_client_and_db()
    num_list = [5000]
    # 传入数据量,数据库,测试次数
    start_test_delete_exp(num_list, mydb, 3)