def add_omitted(self): total_value = self.mongo_word.count_documents({}) if self.mongo_word.count_documents({}) < 100: return t = ProgressTask("更新查询关键词字典", total_value=total_value, collection=db['tracer']) d = open('./biliob_analyzer/dict.txt', 'r', encoding='utf8').read().split('\n') for each in self.mongo_word.find(): if 'aid' in each and each['aid'] not in d: d.append(each['aid']) elif 'mid' in each and each['mid'] not in d: d.append(each['mid']) t.current_value += 1 pass t.finished = True o = open('./biliob_analyzer/dict.txt', 'w', encoding='utf8', newline='') for each in d: o.write(each + '\n') o.close() self.mongo_word.delete_many({}) jieba.load_userdict('./biliob_analyzer/dict.txt') self.refresh_all_author() self.refresh_all_video()
def __judge_author(self, author_filter): author_cursor = author_coll.find(author_filter) count = author_cursor.count() a = author_coll.aggregate([{ '$match': author_filter }, { '$project': { "mid": 1, "face": 1, "name": 1, "data": { "$filter": { "input": "$data", "as": "data", "cond": { "$gt": [ "$$data.datetime", datetime.datetime.now() - datetime.timedelta(32) ] } } } } }, { "$match": { "data.0": { "$exists": True } } }]) print("待爬取作者数量:{}".format(count)) t = ProgressTask("粉丝数变动探测", total_value=count, collection=db['tracer']) for each_author in a: print(each_author['mid']) t.current_value += 1 self.__judge(each_author) t.finished = True pass