Esempio n. 1
0
    async def deal_with_data(self):
        '''
        处理并存储相关拼团商品的数据
        :return:
        '''
        goods_list = await self.get_pintuan_goods_info()

        my_pipeline = SqlServerMyPageInfoSaveItemPipeline()

        if my_pipeline.is_connect_success:
            db_goods_id_list = [item[0] for item in list(await my_pipeline.select_jumeiyoupin_pintuan_all_goods_id(logger=self.my_lg))]
            # self.my_lg.info(str(db_goods_id_list))

            index = 1
            for item in goods_list:
                if index % 20 == 0:
                    my_pipeline = SqlServerMyPageInfoSaveItemPipeline()

                if item.get('goods_id', '') in db_goods_id_list:
                    self.my_lg.info('该goods_id已经存在于数据库中, 此处跳过')
                    pass
                else:
                    goods_id = item.get('goods_id', '')
                    tmp_url = 'https://s.h5.jumei.com/yiqituan/detail?item_id={0}&type={1}'.format(goods_id, item.get('type', ''))

                    s_time = time.time()

                    jumeiyoupin = JuMeiYouPinPinTuanParse(logger=self.my_lg)
                    goods_data = await jumeiyoupin.deal_with_data(jumei_pintuan_url=tmp_url)

                    if goods_data == {} or goods_data.get('is_delete', 0) == 1:
                        pass
                    else:
                        # 规范化
                        goods_data['goods_id'] = goods_id
                        goods_data['pintuan_time'] = item.get('pintuan_time', {})
                        goods_data['pintuan_begin_time'], goods_data['pintuan_end_time'] = await self.get_pintuan_begin_time_and_pintuan_end_time(pintuan_time=item.get('pintuan_time', {}))
                        goods_data['sort'] = item.get('sort')
                        goods_data['page'] = item.get('page')
                        goods_data['tab'] = item.get('tab')

                        # pprint(goods_data)
                        # print(goods_data)
                        await jumeiyoupin.insert_into_jumeiyoupin_pintuan_table(data=goods_data, pipeline=my_pipeline, logger=self.my_lg)

                    e_time = time.time()
                    if e_time - s_time > JUMEIYOUPIN_SLEEP_TIME:    # 使其更智能点
                        pass
                    else:
                        await asyncio.sleep(JUMEIYOUPIN_SLEEP_TIME - (e_time-s_time))
                    index += 1

        else:
            self.my_lg.error('数据库连接失败,此处跳过!')
            pass

        gc.collect()
        return None
    async def run_forever(self):
        '''
        实时更新数据
        :return:
        '''
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = await tmp_sql_server.select_jumeiyoupin_pintuan_all_goods_id(
                logger=self.my_lg)
        except TypeError:
            self.my_lg.error('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            self.my_lg.info('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            self.my_lg.info(result)

            self.my_lg.info('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1

            for item in result:
                pintuan_end_time = json.loads(item[1]).get('end_time')
                pintuan_end_time = int(
                    str(
                        time.mktime(
                            time.strptime(pintuan_end_time,
                                          '%Y-%m-%d %H:%M:%S')))[0:10])
                # print(miaosha_end_time)

                data = {}

                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    self.my_lg.info('正在重置,并与数据库建立新连接中...')
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    self.my_lg.info('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    time_number = await self.is_recent_time(pintuan_end_time)
                    if time_number == 0:
                        await tmp_sql_server.delete_jumeiyoupin_pintuan_expired_goods_id(
                            goods_id=item[0], logger=self.my_lg)
                        self.msg = '过期的goods_id为(%s)' % item[
                            0] + ', 拼团结束时间为(%s), 删除成功!' % str(
                                json.loads(item[1]).get('begin_time'))
                        self.my_lg.info(self.msg)

                    elif time_number == 2:
                        pass  # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的

                    else:  # 返回1,表示在待更新区间内
                        self.msg = '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%s)' % (
                            item[0], str(index))
                        self.my_lg.info(self.msg)
                        data['goods_id'] = item[0]
                        jumeiyoupin_2 = JuMeiYouPinPinTuan(logger=self.my_lg)

                        _ = item[2] + '-' + str(
                            item[3])  # 格式: 'coutuan_baby-1'
                        item_list = self.api_all_goods_id.get(
                            _, [])  # 用于判断tab, index已在self.api_all_goods_id中

                        if item_list == []:
                            driver = BaseDriver(
                                executable_path=PHANTOMJS_DRIVER_PATH,
                                ip_pool_type=IP_POOL_TYPE)
                            item_list = await jumeiyoupin_2.get_one_page_goods_list(
                                driver=driver, tab=item[2], index=item[3])
                            try:
                                del driver
                            except:
                                pass

                        if item_list == []:
                            self.my_lg.info('获取到的body为空str, 网络原因, 此处先跳过!')
                            pass
                        else:
                            if self.api_all_goods_id.get(_) is None:
                                self.api_all_goods_id[_] = item_list

                            pintuan_goods_all_goods_id = [
                                item_1.get('goods_id', '')
                                for item_1 in item_list
                            ]

                            jumeiyoupin_pintuan = JuMeiYouPinPinTuanParse(
                                logger=self.my_lg)
                            # 内部已经下架的(测试发现官方不会提前下架活动商品)
                            if item[0] not in pintuan_goods_all_goods_id:
                                await self.update_data_2(
                                    jumeiyoupin_pintuan=jumeiyoupin_pintuan,
                                    jumei_pintuan_url=item[4],
                                    goods_id=item[0],
                                    pipeline=tmp_sql_server)

                            else:  # 未内部下架
                                await self.update_data_1(
                                    jumeiyoupin_pintuan=jumeiyoupin_pintuan,
                                    jumeiyoupin_2=jumeiyoupin_2,
                                    jumei_pintuan_url=item[4],
                                    goods_id=item[0],
                                    item_list=item_list,
                                    pipeline=tmp_sql_server)

                else:
                    self.my_lg.error('数据库连接失败,此处跳过!')
                    pass

                index += 1
                gc.collect()
            self.my_lg.info('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
            if get_shanghai_time().hour == 0:  # 0点以后不更新
                sleep(60 * 60 * 5.5)
            else:
                sleep(5)
            gc.collect()

        return None
    async def run_forever(self):
        '''
        实时更新数据
        :return:
        '''
        sql_cli = SqlServerMyPageInfoSaveItemPipeline()
        try:
            sql_cli._delete_table(sql_str=jm_delete_str_3, )
            await async_sleep(5)
            result = sql_cli._select_table(sql_str=jm_select_str_3,
                                           logger=self.lg)
        except TypeError:
            self.lg.error('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            await _print_db_old_data(result=result, logger=self.lg)
            index = 1
            for item in result:
                pintuan_end_time = json.loads(item[1]).get('end_time')
                pintuan_end_time = int(
                    str(
                        time.mktime(
                            time.strptime(pintuan_end_time,
                                          '%Y-%m-%d %H:%M:%S')))[0:10])
                # print(miaosha_end_time)

                data = {}
                sql_cli = await _get_new_db_conn(db_obj=sql_cli,
                                                 index=index,
                                                 logger=self.lg,
                                                 remainder=50)
                if sql_cli.is_connect_success:
                    time_number = await self.is_recent_time(pintuan_end_time)
                    if time_number == 0:
                        await sql_cli._update_table_3(
                            sql_str=jm_update_str_5,
                            params=(str(get_shanghai_time()), item[0]),
                            logger=self.lg)
                        await async_sleep(.5)
                        self.msg = '过期的goods_id为(%s)' % item[
                            0] + ', 拼团结束时间为(%s), 删除成功!' % str(
                                json.loads(item[1]).get('begin_time'))
                        self.lg.info(self.msg)

                    elif time_number == 2:
                        pass  # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的

                    else:  # 返回1,表示在待更新区间内
                        self.msg = '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%s)' % (
                            item[0], str(index))
                        self.lg.info(self.msg)
                        data['goods_id'] = item[0]
                        jumeiyoupin_2 = JuMeiYouPinPinTuan(logger=self.lg)

                        _ = item[2] + '-' + str(
                            item[3])  # 格式: 'coutuan_baby-1'
                        item_list = self.api_all_goods_id.get(
                            _, [])  # 用于判断tab, index已在self.api_all_goods_id中

                        if item_list == []:
                            driver = BaseDriver(
                                executable_path=PHANTOMJS_DRIVER_PATH,
                                ip_pool_type=self.ip_pool_type)
                            item_list = await jumeiyoupin_2.get_one_page_goods_list(
                                driver=driver, tab=item[2], index=item[3])
                            try:
                                del driver
                            except:
                                pass

                        if item_list == []:
                            self.lg.info('获取到的body为空str, 网络原因, 此处先跳过!')
                            pass
                        else:
                            if self.api_all_goods_id.get(_) is None:
                                self.api_all_goods_id[_] = item_list

                            pintuan_goods_all_goods_id = [
                                item_1.get('goods_id', '')
                                for item_1 in item_list
                            ]

                            jumeiyoupin_pintuan = JuMeiYouPinPinTuanParse(
                                logger=self.lg)
                            # 内部已经下架的(测试发现官方不会提前下架活动商品)
                            if item[0] not in pintuan_goods_all_goods_id:
                                await self.update_data_2(
                                    jumeiyoupin_pintuan=jumeiyoupin_pintuan,
                                    jumei_pintuan_url=item[4],
                                    goods_id=item[0],
                                    pipeline=sql_cli)

                            else:  # 未内部下架
                                await self.update_data_1(
                                    jumeiyoupin_pintuan=jumeiyoupin_pintuan,
                                    jumeiyoupin_2=jumeiyoupin_2,
                                    jumei_pintuan_url=item[4],
                                    goods_id=item[0],
                                    item_list=item_list,
                                    pipeline=sql_cli)

                else:
                    self.lg.error('数据库连接失败,此处跳过!')
                    pass

                index += 1
                gc.collect()
            self.lg.info('全部数据更新完毕'.center(100, '#'))
            if get_shanghai_time().hour == 0:  # 0点以后不更新
                await async_sleep(60 * 60 * 5.5)
            else:
                await async_sleep(10 * 60)
            gc.collect()

        return None