class PDFParseproducer(Thread): def __init__(self, q, date=None): super(PDFParseproducer, self).__init__() if date is None: self.date = datetime.datetime.now().strftime('%Y-%m-%d') else: self.date = date self.doc = DBSelector().mongo('qq')['db_stock']['jucao_announcement'] self.q = q print('Producer start') def run(self): pending_data = self.doc.find({ 'analysis': { '$exists': False }, 'announcementTime': self.date }) pending_data_list = list(pending_data) if len(pending_data_list) == 0: # 数据已为空了 return for item in pending_data_list: task_data = { 'url': item['url'], 'announcementId': item['announcementId'], 'title': item['title'], 'secName': item['secName'], } print('pushing data') self.q.put(task_data)
class PDFParseproducer(Thread): def __init__(self, q, date=None): super(PDFParseproducer, self).__init__() if date is None: self.date = datetime.datetime.now().strftime('%Y-%m-%d') else: self.date = date self.doc = DBSelector().mongo('qq')['db_stock']['jucao_announcement'] self.q = q print('Producer start') def gen_date_list(self): current = datetime.datetime.now() + datetime.timedelta(days=1) last_day_count = 20 date_list = [] for i in range(last_day_count): slide_day = (current + datetime.timedelta(days=-1 * i)).strftime('%Y-%m-%d') date_list.append(slide_day) return date_list def run(self): for d in self.gen_date_list(): print(d) # pending_data = self.doc.find({'analysis': {'$exists': False},'announcementTime':self.date}) pending_data = self.doc.find({ 'analysis': { '$exists': False }, 'announcementTime': d }) pending_data_list = list(pending_data) if len(pending_data_list) == 0: # 数据已为空了 continue for item in pending_data_list: code = item['code'] code_list = code.split(',') if any( map(lambda x: x.startswith(('16', '501', '502')), code_list)): task_data = { 'url': item['url'], 'announcementId': item['announcementId'], 'title': item['title'], 'secName': item['secName'], 'date': item['announcementTime'], 'code': code[:6], } print('pushing data', code, item['secName']) self.q.put(task_data)