예제 #1
0
    def update_values(self, pd_obj, col_key):
        if pd_obj.empty:
            print('pd_obj empty, no data to update')
            return

        # initial checks
        if isinstance(col_key, str):
            col_key = [col_key]

        if len(self._key_ls) != len(col_key):
            name = self._name
            full = len(self._key_ls)
            given = len(col_key)
            raise Exception(f'insert error: {name}. col_key must have {full} elements. provided was {given}')

        if isinstance(pd_obj, pd.Series):
            df = pd.DataFrame(pd_obj)
        else:
            df = pd_obj.copy()

        # check no dups
        if df.columns.has_duplicates:
            raise Exception('inserted object cannot have duplicated keys!')

        # integrity check
        for col in df.columns:
            col_val_ls = col.split('.')
            if len(self._full_key_ls) - 1 != len(col_val_ls):
                raise Exception(f'all columns must have {len(col_key)} elements. provided was {col_val_ls}')

        df.index = df.index.map(lambda i: i.strftime('%Y%m%d'))
        df = df.dropna(axis=0, how='all')
        df = df.dropna(axis=1, how='all')

        request_ls = []
        for col in df.columns:
            col_val_ls = col.split('.')
            tup = zip(col_key, col_val_ls)
            query_dd = {k: v for k, v in tup}
            values_dd = df[col].dropna().to_dict()

            # pull many
            date_ls = list(values_dd.keys())
            update_dd = {
                '$pull': {'values': {'date': {'$in': date_ls}}}
            }
            request = pymongo.UpdateMany(query_dd, update_dd, upsert=True)
            request_ls.append(request)

            # push many
            new_ls = [{'date': k, 'value': v} for k, v in values_dd.items()]
            update_dd = {'$push': {'values': {'$each': new_ls}}}
            request = pymongo.UpdateMany(query_dd, update_dd, upsert=True)
            request_ls.append(request)
        self._get_collection().bulk_write(request_ls, ordered=True)
예제 #2
0
    def merge_players(self, src_player_id, target_player_id):
        # TODO this can probably be tested properly only with integration tests
        self.db.badge.bulk_write([
            pymongo.UpdateMany({'player_id': src_player_id},
                               {'$set': {
                                   'player_id': target_player_id
                               }})
        ])
        self.db.special_score.bulk_write([
            pymongo.UpdateMany({'killer_id': src_player_id},
                               {'$set': {
                                   'killer_id': target_player_id
                               }}),
            pymongo.UpdateMany({'victim_id': src_player_id},
                               {'$set': {
                                   'victim_id': target_player_id
                               }})
        ])
        self.db.kill.bulk_write([
            pymongo.UpdateMany({'killer_id': src_player_id},
                               {'$set': {
                                   'killer_id': target_player_id
                               }}),
            pymongo.UpdateMany({'victim_id': src_player_id},
                               {'$set': {
                                   'victim_id': target_player_id
                               }})
        ])
        self.db.score.bulk_write([
            pymongo.UpdateMany({'player_id': src_player_id},
                               {'$set': {
                                   'player_id': target_player_id
                               }})
        ])
        self.db.team_switch.bulk_write([
            pymongo.UpdateMany({'player_id': src_player_id},
                               {'$set': {
                                   'player_id': target_player_id
                               }})
        ])

        rows = self.db.player_merge.find({
            'src_player_id': src_player_id,
            'target_player_id': target_player_id
        })

        if not list(rows):
            self.db.player_merge.insert_one({
                'src_player_id':
                src_player_id,
                'target_player_id':
                target_player_id
            })
예제 #3
0
def update_sramongo_pubmed_records(docs, collection):
    db_operations = []
    for doc in docs:
        db_operations.append(
            pymongo.UpdateMany(
                {"study.pubmed": doc.accn}, {"$addToSet": {"papers": doc.to_mongo()}}
            )
        )

        # Write intermediate results
        if len(db_operations) > 500:
            res = collection.bulk_write(db_operations)
            logger.debug(res.bulk_api_result)
            db_operations = []

    if db_operations:
        res = collection.bulk_write(db_operations)
        logger.debug(res.bulk_api_result)
예제 #4
0
def update_sramongo_biosample_records(docs, collection):
    db_operations = []
    for doc in docs:
        db_operations.append(
            pymongo.UpdateMany(
                {"sample.biosample": doc.accn}, {"$set": {"BioSample": doc.to_mongo()}}
            )
        )

        # Write intermediate results
        if len(db_operations) > 500:
            res = collection.bulk_write(db_operations)
            logger.debug(res.bulk_api_result)
            db_operations = []

    if db_operations:
        res = collection.bulk_write(db_operations)
        logger.debug(res.bulk_api_result)
예제 #5
0
def add_field():
    col1 = connectTable("qiuzh", "mag_papers")

    for field in [
            "地学", "地学天文", "工程技术", "管理科学", "化学", "环境科学与生态学", "农林科学", "社会科学",
            "生物", "数学", "物理", "医学", "综合性期刊"
    ]:
        operation = []
        print(field)
        journal_detail = open(
            "C://Users//qzh//PycharmProjects//MAG//JournalDetailsWithID//" +
            field + ".txt",
            "r",
            encoding="gbk")
        for line in journal_detail:
            a_journal_detail = json.loads(line)
            journal_ID = a_journal_detail[8]
            journal_field = a_journal_detail[6][0].replace(
                "地学", "GEOGRAPHY").replace("地学天文", "ASTRONOMY").replace(
                    "工程技术",
                    "ENGINEERING").replace("管理科学", "MANAGEMENT").replace(
                        "化学", "CHEMISTRY"
                    ).replace("环境科学与生态学", "ENVIRONMENTAL SCIENCES").replace(
                        "农林科学",
                        "AGRONOMY").replace("社会科学", "SOCIAL SCIENCE").replace(
                            "生物",
                            "BIOLOGY").replace("数学", "MATHEMATICS").replace(
                                "物理",
                                "PHYSICS").replace("医学", "MEDICINE").replace(
                                    "综合性期刊", "MULTIDISCIPLINARY SCIENCES")
            journal_level = a_journal_detail[6][1]
            operation.append(
                pymongo.UpdateMany(
                    {"venue.id": journal_ID},
                    {"$set": {
                        "field": journal_field,
                        "level": journal_level
                    }}))
        print(len(operation))
        col1.bulk_write(operation, ordered=False)
예제 #6
0
age_bins = {
    '<1': [0, 1],
    '1-5': [1, 5],
    '6-10': [6, 10],
    '11-17': [11, 17],
    '18-34': [18, 34],
    '35-49': [35, 49],
    '50-64': [50, 64],
    '65+': [65, 300]
}

# Cycle through the age bins to perform the groupings
for index in age_bins:
    min_age = age_bins[index][0]
    max_age = age_bins[index][1]
    bin_name = index

    # Detail the requested change to make
    requests = [
        pymongo.UpdateMany(
            {args.age_variable: {
                "$gte": min_age,
                "$lte": max_age
            }}, {"$set": {
                args.bin_variable: bin_name
            }})
    ]

    # Perform the operation
    result = db[args.collection_name].bulk_write(requests)
예제 #7
0
    def merge_players(self, src_player_id, target_player_id):
        # TODO this can probably be tested properly only with integration tests
        self.db.badge.bulk_write([
            pymongo.UpdateMany(
                {"player_id": src_player_id},
                {"$set": {
                    "player_id": target_player_id
                }},
            )
        ])
        self.db.special_score.bulk_write([
            pymongo.UpdateMany(
                {"killer_id": src_player_id},
                {"$set": {
                    "killer_id": target_player_id
                }},
            ),
            pymongo.UpdateMany(
                {"victim_id": src_player_id},
                {"$set": {
                    "victim_id": target_player_id
                }},
            ),
        ])
        self.db.kill.bulk_write([
            pymongo.UpdateMany(
                {"killer_id": src_player_id},
                {"$set": {
                    "killer_id": target_player_id
                }},
            ),
            pymongo.UpdateMany(
                {"victim_id": src_player_id},
                {"$set": {
                    "victim_id": target_player_id
                }},
            ),
        ])
        self.db.score.bulk_write([
            pymongo.UpdateMany(
                {"player_id": src_player_id},
                {"$set": {
                    "player_id": target_player_id
                }},
            )
        ])
        self.db.team_switch.bulk_write([
            pymongo.UpdateMany(
                {"player_id": src_player_id},
                {"$set": {
                    "player_id": target_player_id
                }},
            )
        ])
        self.db.player_stats.bulk_write([
            pymongo.UpdateMany(
                {"player_id": src_player_id},
                {"$set": {
                    "player_id": target_player_id
                }},
            )
        ])
        rows = self.db.player_merge.find({
            "src_player_id": src_player_id,
            "target_player_id": target_player_id,
        })

        if not list(rows):
            self.db.player_merge.insert_one({
                "src_player_id":
                src_player_id,
                "target_player_id":
                target_player_id,
            })
예제 #8
0
def save_annotations(
    dsFolder: types.GirderModel,
    upsert_list: Iterable[dict],
    delete_list: Iterable[int],
    user: types.GirderUserModel,
    description="save",
    overwrite=False,
):
    """
    Annotations are lazy-deleted by marking their staleness property as true.
    """
    datasetId = dsFolder['_id']
    expire_operations = []  # Mark existing records as deleted
    expire_result = {}
    insert_operations = []  # Insert new records
    insert_result = {}
    new_revision = get_last_revision(dsFolder) + 1
    delete_annotation_update = {'$set': {REVISION_DELETED: new_revision}}

    if overwrite:
        query = {DATASET: datasetId, REVISION_DELETED: {'$exists': False}}
        expire_result = (AnnotationItem().collection.bulk_write(
            [pymongo.UpdateMany(query,
                                delete_annotation_update)]).bulk_api_result)

    for track_id in delete_list:
        filter = {
            TRACKID: track_id,
            DATASET: datasetId,
            REVISION_DELETED: {
                '$exists': False
            }
        }
        # UpdateMany for safety, UpdateOne would also work
        expire_operations.append(
            pymongo.UpdateMany(filter, delete_annotation_update))

    for newdict in upsert_list:
        newdict.update({DATASET: datasetId, REVISION_CREATED: new_revision})
        newdict.pop(REVISION_DELETED, None)
        filter = {
            TRACKID: newdict['trackId'],
            DATASET: datasetId,
            REVISION_DELETED: {
                '$exists': False
            },
        }
        if not overwrite:
            # UpdateMany for safety, UpdateOne would also work
            expire_operations.append(
                pymongo.UpdateMany(filter, delete_annotation_update))
        insert_operations.append(pymongo.InsertOne(newdict))

    # Ordered=false allows fast parallel writes
    if len(expire_operations):
        expire_result = (AnnotationItem().collection.bulk_write(
            expire_operations, ordered=False).bulk_api_result)
    if len(insert_operations):
        insert_result = (AnnotationItem().collection.bulk_write(
            insert_operations, ordered=False).bulk_api_result)

    additions = insert_result.get('nInserted', 0)
    deletions = expire_result.get('nModified', 0)

    if additions or deletions:
        # Write the revision to the log
        log_entry = models.RevisionLog(
            dataset=datasetId,
            author_name=user['login'],
            author_id=user['_id'],
            revision=new_revision,
            additions=additions,
            deletions=deletions,
            description=description,
        )
        RevisionLogItem().create(log_entry)

    return {"updated": additions, "deleted": deletions}
예제 #9
0
def retweetFilterCleansing():
    masterData = []
    rawData = []
    retweetCountUpdate = []
    dataA = db.retweet_raw_data.find({"addData":"incomplete"})
    for item in dataA:
        try:
            if "extended_tweet" in item['data']['retweeted_status']:
                strTime = addHours(item['data']['retweeted_status']['created_at'])
                getDate = strTime.split(" ")
                getHour = getDate[3].split(":")
                month = getMonth(getDate[1])
                if "extended_entities" in item['data']['retweeted_status']:
                    db.master_data.insert_one(
                        {
                            'university':item['university'],
                            'keyword':item['keyword'],
                            'id_str': item['data']['retweeted_status']['id_str'],
                            'created_at': strTime,
                            'text':item['data']['retweeted_status']['extended_tweet']['full_text'],
                            'user_id':item['data']['retweeted_status']['user']['id_str'],
                            'user_name':item['data']['retweeted_status']['user']['name'],
                            'user_img':item['data']['retweeted_status']['user']['profile_image_url_https'],
                            'user_followers':item['data']['retweeted_status']['user']['followers_count'],
                            'retweet_count':item['data']['retweeted_status']['retweet_count'],
                            'favorite_count':item['data']['retweeted_status']['favorite_count'],
                            'hashtags':item['data']['retweeted_status']['extended_tweet']['entities']['hashtags'],
                            'checkImg':True,
                            'img':item['data']['retweeted_status']['extended_entities']['media'][0]['media_url_https'],
                            'retweet_1Day':0,
                            'timeCreate':dt.datetime(int(getDate[5]), month, int(getDate[2]),int(getHour[0]),int(getHour[1])),
                            'timeUpdate':dt.datetime.today()
                        }
                    )
                else:
                    db.master_data.insert_one(
                        {
                            'university':item['university'],
                            'keyword':item['keyword'],
                            'id_str': item['data']['retweeted_status']['id_str'],
                            'created_at': strTime,
                            'text':item['data']['retweeted_status']['extended_tweet']['full_text'],
                            'user_id':item['data']['retweeted_status']['user']['id_str'],
                            'user_name':item['data']['retweeted_status']['user']['name'],
                            'user_img':item['data']['retweeted_status']['user']['profile_image_url_https'],
                            'user_followers':item['data']['retweeted_status']['user']['followers_count'],
                            'retweet_count':item['data']['retweeted_status']['retweet_count'],
                            'favorite_count':item['data']['retweeted_status']['favorite_count'],
                            'hashtags':item['data']['retweeted_status']['extended_tweet']['entities']['hashtags'],
                            'retweet_1Day':0,
                            'timeCreate':dt.datetime(int(getDate[5]), month, int(getDate[2]),int(getHour[0]),int(getHour[1])),
                            'timeUpdate':dt.datetime.today()
                        }
                    )
                db.retweet_update_data.insert_one(
                    {
                        'university':item['university'],
                        'id_str':item['data']['retweeted_status']['id_str'],
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'state_rt':0,
                        'state_check':0,
                        'timeUpdate':dt.datetime.today()
                    }
                ) 
                db.retweet_state_data.insert_one(
                    {
                        'university':item['university'],
                        'id_str':item['data']['retweeted_status']['id_str'],
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'timeUpdate':dt.datetime.today() - dt.timedelta(minutes=1)
                    }
                ) 
                print('create_to_master')
                rawData.append((pymongo.UpdateOne(
                    {
                        'id_str':item['id_str']
                    },
                    {
                        '$set': {
                            'addData':'complete'
                        }
                    },upsert=True)))
            else:
                strTime = addHours(item['data']['retweeted_status']['created_at'])
                getDate = strTime.split(" ")
                getHour = getDate[3].split(":")
                month = getMonth(getDate[1])
                if "extended_entities" in item['data']['retweeted_status']:
                    db.master_data.insert_one(
                        {
                            'university':item['university'],
                            'keyword':item['keyword'],
                            'id_str': item['data']['retweeted_status']['id_str'],
                            'created_at': strTime,
                            'text':item['data']['retweeted_status']['text'],
                            'user_id':item['data']['retweeted_status']['user']['id_str'],
                            'user_name':item['data']['retweeted_status']['user']['name'],
                            'user_img':item['data']['retweeted_status']['user']['profile_image_url_https'],
                            'user_followers':item['data']['retweeted_status']['user']['followers_count'],
                            'retweet_count':item['data']['retweeted_status']['retweet_count'],
                            'favorite_count':item['data']['retweeted_status']['favorite_count'],
                            'hashtags':item['data']['retweeted_status']['entities']['hashtags'],
                            'checkImg':True,
                            'img':item['data']['retweeted_status']['extended_entities']['media'][0]['media_url_https'],
                            'retweet_1Day':0,
                            'timeCreate':dt.datetime(int(getDate[5]), month, int(getDate[2]),int(getHour[0]),int(getHour[1])),
                            'timeUpdate':dt.datetime.today()
                        }
                    )
                else:
                    db.master_data.insert_one(
                        {
                            'university':item['university'],
                            'keyword':item['keyword'],
                            'id_str': item['data']['retweeted_status']['id_str'],
                            'created_at': strTime,
                            'text':item['data']['retweeted_status']['text'],
                            'user_id':item['data']['retweeted_status']['user']['id_str'],
                            'user_name':item['data']['retweeted_status']['user']['name'],
                            'user_img':item['data']['retweeted_status']['user']['profile_image_url_https'],
                            'user_followers':item['data']['retweeted_status']['user']['followers_count'],
                            'retweet_count':item['data']['retweeted_status']['retweet_count'],
                            'favorite_count':item['data']['retweeted_status']['favorite_count'],
                            'hashtags':item['data']['retweeted_status']['entities']['hashtags'],
                            'retweet_1Day':0,
                            'timeCreate':dt.datetime(int(getDate[5]), month, int(getDate[2]),int(getHour[0]),int(getHour[1])),
                            'timeUpdate':dt.datetime.today()
                        }
                    )
                db.retweet_update_data.insert_one(
                    {
                        'university':item['university'],
                        'id_str':item['data']['retweeted_status']['id_str'],
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'state_rt':0,
                        'state_check':0,
                        'timeUpdate':dt.datetime.today()
                    }
                )
                db.retweet_state_data.insert_one(
                    {
                        'university':item['university'],
                        'id_str':item['data']['retweeted_status']['id_str'],
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'timeUpdate':dt.datetime.today() - dt.timedelta(minutes=1)
                    }
                ) 
                print('create_to_master')
                rawData.append((pymongo.UpdateOne(
                    {
                        'id_str':item['id_str']
                    },
                    {
                        '$set': {
                            'addData':'complete'
                        }
                    },upsert=True)))
        except pymongo.errors.DuplicateKeyError:
            masterData.append((pymongo.UpdateOne(
                {
                    'id_str':item['data']['retweeted_status']['id_str']
                },
                {
                    '$set': {
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'timeUpdate':dt.datetime.today()
                    }
                },upsert=True)))
            retweetCountUpdate.append((pymongo.UpdateOne(
                {
                    'id_str':item['data']['retweeted_status']['id_str']
                },
                {
                    '$set': {
                        'retweet_count':item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':item['data']['retweeted_status']['favorite_count'],
                        'timeUpdate':dt.datetime.today()
                    }
                },upsert=True)))
            print('update_to_master')
            rawData.append((pymongo.UpdateMany(
                {
                    'id_str':item['id_str']
                },
                {
                    '$set': {
                        'addData':'complete'
                    }
                },upsert=True)))
    if(len(masterData)>0):
        db.master_data.bulk_write(masterData,ordered=False)
        db.retweet_update_data.bulk_write(retweetCountUpdate,ordered=False)
    if(len(rawData)>0):
        db.retweet_raw_data.bulk_write(rawData,ordered=False)
예제 #10
0
def retweetFilterCleansing():
    masterData = []
    rawData = []
    retweetCountUpdate = []
    dataA = db.retweet_raw_data.find({"addData": "incomplete"})
    for item in dataA:
        try:
            if "extended_tweet" in item['data']['retweeted_status']:
                strTime = addHours(
                    item['data']['retweeted_status']['created_at'])
                db.master_data.insert_one({
                    'university':
                    item['university'],
                    'keyword':
                    item['keyword'],
                    'id_str':
                    item['data']['retweeted_status']['id_str'],
                    'created_at':
                    strTime,
                    'text':
                    item['data']['retweeted_status']['extended_tweet']
                    ['full_text'],
                    'user_id':
                    item['data']['retweeted_status']['user']['id_str'],
                    'user_name':
                    item['data']['retweeted_status']['user']['name'],
                    'user_screen-name':
                    item['data']['retweeted_status']['user']['screen_name'],
                    'user_followers':
                    item['data']['retweeted_status']['user']
                    ['followers_count'],
                    'retweet_count':
                    item['data']['retweeted_status']['retweet_count'],
                    'favorite_count':
                    item['data']['retweeted_status']['favorite_count'],
                    'hashtags':
                    item['data']['retweeted_status']['extended_tweet']
                    ['entities']['hashtags'],
                    'timeUpdate':
                    dt.datetime.today()
                })
                db.retweet_update_data.insert_one({
                    'university':
                    item['university'],
                    'id_str':
                    item['data']['retweeted_status']['id_str'],
                    'retweet_count':
                    0,
                    'favorite_count':
                    0,
                    'timeUpdate':
                    dt.datetime.today()
                })
                print('create_to_master')
                rawData.append(
                    (pymongo.UpdateOne({'id_str': item['id_str']},
                                       {'$set': {
                                           'addData': 'complete'
                                       }},
                                       upsert=True)))
            else:
                strTime = addHours(
                    item['data']['retweeted_status']['created_at'])
                db.master_data.insert_one({
                    'university':
                    item['university'],
                    'keyword':
                    item['keyword'],
                    'id_str':
                    item['data']['retweeted_status']['id_str'],
                    'created_at':
                    strTime,
                    'text':
                    item['data']['retweeted_status']['text'],
                    'user_id':
                    item['data']['retweeted_status']['user']['id_str'],
                    'user_name':
                    item['data']['retweeted_status']['user']['name'],
                    'user_screen-name':
                    item['data']['retweeted_status']['user']['screen_name'],
                    'user_followers':
                    item['data']['retweeted_status']['user']
                    ['followers_count'],
                    'retweet_count':
                    item['data']['retweeted_status']['retweet_count'],
                    'favorite_count':
                    item['data']['retweeted_status']['favorite_count'],
                    'hashtags':
                    item['data']['retweeted_status']['entities']['hashtags'],
                    'timeUpdate':
                    dt.datetime.today()
                })
                db.retweet_update_data.insert_one({
                    'university':
                    item['university'],
                    'id_str':
                    item['data']['retweeted_status']['id_str'],
                    'retweet_count':
                    0,
                    'favorite_count':
                    0,
                    'timeUpdate':
                    dt.datetime.today()
                })
                print('create_to_master')
                rawData.append(
                    (pymongo.UpdateOne({'id_str': item['id_str']},
                                       {'$set': {
                                           'addData': 'complete'
                                       }},
                                       upsert=True)))
        except pymongo.errors.DuplicateKeyError:
            masterData.append((pymongo.UpdateOne(
                {'id_str': item['data']['retweeted_status']['id_str']}, {
                    '$set': {
                        'user_name':
                        item['data']['retweeted_status']['user']['name'],
                        'user_screen-name':
                        item['data']['retweeted_status']['user']
                        ['screen_name'],
                        'user_followers':
                        item['data']['retweeted_status']['user']
                        ['followers_count'],
                        'retweet_count':
                        item['data']['retweeted_status']['retweet_count'],
                        'favorite_count':
                        item['data']['retweeted_status']['favorite_count'],
                        'timeUpdate':
                        dt.datetime.today()
                    }
                },
                upsert=True)))
            idUpdate = db.retweet_update_data.find_one(
                {'id_str': item['data']['retweeted_status']['id_str']})
            masterId = db.master_data.find_one(
                {'id_str': item['data']['retweeted_status']['id_str']})
            retweet = item['data']['retweeted_status'][
                'retweet_count'] - masterId['retweet_count']
            favourite = item['data']['retweeted_status'][
                'favorite_count'] - masterId['favorite_count']
            retweetCountUpdate.append((pymongo.UpdateOne(
                {'id_str': item['data']['retweeted_status']['id_str']}, {
                    '$set': {
                        'retweet_count': idUpdate['retweet_count'] + retweet,
                        'favorite_count':
                        idUpdate['favorite_count'] + favourite,
                        'timeUpdate': dt.datetime.today()
                    }
                },
                upsert=True)))
            print('update_to_master')
            rawData.append(
                (pymongo.UpdateMany({'id_str': item['id_str']},
                                    {'$set': {
                                        'addData': 'complete'
                                    }},
                                    upsert=True)))
    if (len(masterData) > 0):
        db.master_data.bulk_write(masterData, ordered=False)
        db.retweet_update_data.bulk_write(retweetCountUpdate, ordered=False)
    if (len(rawData) > 0):
        db.retweet_raw_data.bulk_write(rawData, ordered=False)