Ejemplo n.º 1
0
class PDFParseproducer(Thread):
    def __init__(self, q, date=None):
        super(PDFParseproducer, self).__init__()
        if date is None:
            self.date = datetime.datetime.now().strftime('%Y-%m-%d')
        else:
            self.date = date
        self.doc = DBSelector().mongo('qq')['db_stock']['jucao_announcement']
        self.q = q
        print('Producer start')

    def run(self):
        pending_data = self.doc.find({
            'analysis': {
                '$exists': False
            },
            'announcementTime': self.date
        })
        pending_data_list = list(pending_data)

        if len(pending_data_list) == 0:
            # 数据已为空了
            return

        for item in pending_data_list:
            task_data = {
                'url': item['url'],
                'announcementId': item['announcementId'],
                'title': item['title'],
                'secName': item['secName'],
            }
            print('pushing data')
            self.q.put(task_data)
Ejemplo n.º 2
0
class PDFParseproducer(Thread):
    def __init__(self, q, date=None):
        super(PDFParseproducer, self).__init__()
        if date is None:
            self.date = datetime.datetime.now().strftime('%Y-%m-%d')
        else:
            self.date = date
        self.doc = DBSelector().mongo('qq')['db_stock']['jucao_announcement']
        self.q = q
        print('Producer start')

    def gen_date_list(self):
        current = datetime.datetime.now() + datetime.timedelta(days=1)
        last_day_count = 20
        date_list = []
        for i in range(last_day_count):
            slide_day = (current +
                         datetime.timedelta(days=-1 * i)).strftime('%Y-%m-%d')
            date_list.append(slide_day)
        return date_list

    def run(self):
        for d in self.gen_date_list():
            print(d)
            # pending_data = self.doc.find({'analysis': {'$exists': False},'announcementTime':self.date})
            pending_data = self.doc.find({
                'analysis': {
                    '$exists': False
                },
                'announcementTime': d
            })
            pending_data_list = list(pending_data)

            if len(pending_data_list) == 0:
                # 数据已为空了
                continue

            for item in pending_data_list:
                code = item['code']
                code_list = code.split(',')
                if any(
                        map(lambda x: x.startswith(('16', '501', '502')),
                            code_list)):

                    task_data = {
                        'url': item['url'],
                        'announcementId': item['announcementId'],
                        'title': item['title'],
                        'secName': item['secName'],
                        'date': item['announcementTime'],
                        'code': code[:6],
                    }
                    print('pushing data', code, item['secName'])
                    self.q.put(task_data)