Esempio n. 1
0
 async def _get_new_ali_obj(self, index) -> None:
     if index % 10 == 0:
         try:
             del self.zhe_800
         except:
             try:
                 del self.zhe_800
             except:
                 pass
         collect()
         self.zhe_800 = Zhe800Parse()
    async def _one_update(self, **kwargs) -> bool:
        '''
        未下架的更新
        :return:
        '''
        miaosha_goods_list = kwargs.get('miaosha_goods_list')
        goods_id = kwargs.get('goods_id')

        zhe_800_miaosha = Zhe800Parse()
        res = False
        for item_1 in miaosha_goods_list:
            if item_1.get('zid', '') == goods_id:
                zhe_800_miaosha.get_goods_data(goods_id=goods_id)
                goods_data = zhe_800_miaosha.deal_with_data()
                if goods_data == {}:  # 返回的data为空则跳过
                    break

                else:  # 否则就解析并且插入
                    goods_data['stock_info'] = item_1.get('stock_info')
                    goods_data['goods_id'] = str(item_1.get('zid'))
                    if item_1.get('stock_info').get('activity_stock') > 0:
                        # self.lg.info(item_1.get('price'))
                        # self.lg.info(item_1.get('taobao_price'))
                        goods_data['price'] = item_1.get('price')
                        goods_data['taobao_price'] = item_1.get('taobao_price')
                    else:
                        self.lg.info('该商品参与活动的对应库存为0')
                        res = _handle_goods_shelves_in_auto_goods_table(
                            goods_id=goods_id,
                            logger=self.lg,
                            update_sql_str=z8_update_str_6,
                            sql_cli=self.sql_cli,
                        )
                        break

                    goods_data['sub_title'] = item_1.get('sub_title')
                    goods_data['miaosha_time'] = item_1.get('miaosha_time')
                    goods_data['miaosha_begin_time'], goods_data[
                        'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time(
                            miaosha_time=item_1.get('miaosha_time'))

                    if goods_data.get('is_delete', 0) == 1:
                        self.lg.info('该商品[{0}]已售罄...'.format(goods_id))

                    res = zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table(
                        data=goods_data, pipeline=self.sql_cli)
                    break
            else:
                pass
        collect()

        return res
Esempio n. 3
0
    async def _update_db(self):
        while True:
            self.lg = await self._get_new_logger(logger_name=get_uuid1())
            result = await self._get_db_old_data()
            if result is None:
                pass
            else:
                self.goods_index = 1
                tasks_params_list = TasksParamsListObj(
                    tasks_params_list=result, step=self.concurrency)
                self.zhe_800 = Zhe800Parse()
                index = 1
                while True:
                    try:
                        slice_params_list = tasks_params_list.__next__()
                        # self.lg.info(str(slice_params_list))
                    except AssertionError:  # 全部提取完毕, 正常退出
                        break

                    tasks = []
                    for item in slice_params_list:
                        db_goods_info_obj = Z8DbGoodsInfoObj(item=item,
                                                             logger=self.lg)
                        self.lg.info('创建 task goods_id: {}'.format(
                            db_goods_info_obj.goods_id))
                        tasks.append(
                            self.loop.create_task(
                                self._update_one_goods_info(
                                    db_goods_info_obj=db_goods_info_obj,
                                    index=index)))
                        index += 1

                    await _get_async_task_result(tasks=tasks, logger=self.lg)
                    try:
                        del tasks
                    except:
                        pass

                self.lg.info('全部数据更新完毕'.center(100, '#'))
            if get_shanghai_time().hour == 0:  # 0点以后不更新
                await async_sleep(60 * 60 * 5.5)
            else:
                await async_sleep(10)
            try:
                del self.zhe_800
            except:
                pass
            collect()
Esempio n. 4
0
    def _update_old_goods_info(self, tmp_sql_server, result):
        '''
        更新old goods info
        :param result:
        :return:
        '''
        index = 1
        for item in result:  # 实时更新数据
            miaosha_begin_time = json.loads(item[1]).get('miaosha_begin_time')
            miaosha_begin_time = int(
                str(
                    time.mktime(
                        time.strptime(miaosha_begin_time,
                                      '%Y-%m-%d %H:%M:%S')))[0:10])
            # print(miaosha_begin_time)

            data = {}
            # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
            zhe_800_miaosha = Zhe800Parse()
            if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                print('正在重置,并与数据库建立新连接中...')
                tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                print('与数据库的新连接成功建立...')

            if tmp_sql_server.is_connect_success:
                if self.is_recent_time(miaosha_begin_time) == 0:
                    tmp_sql_server._delete_table(sql_str=self.delete_sql_str,
                                                 params=(item[0]))
                    print(
                        '过期的goods_id为(%s)' % item[0],
                        ', 限时秒杀开始时间为(%s), 删除成功!' %
                        json.loads(item[1]).get('miaosha_begin_time'))

                elif self.is_recent_time(miaosha_begin_time) == 2:
                    # break       # 跳出循环
                    pass  # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的

                else:  # 返回1,表示在待更新区间内
                    print(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                        % (item[0], index))
                    data['goods_id'] = item[0]

                    try:
                        tmp_data = self.zhe_800_spike._get_one_session_id_data(
                            base_session_id=str(item[2]))
                    except Exception as e:
                        print(e)
                        continue

                    if tmp_data.get('data', {}).get('blocks',
                                                    []) == []:  # session_id不存在
                        print('该session_id不存在,此处跳过')
                        pass

                    else:
                        tmp_data = [
                            item_s.get('deal', {}) for item_s in tmp_data.get(
                                'data', {}).get('blocks', [])
                        ]
                        if tmp_data != []:  # 否则说明里面有数据
                            miaosha_goods_list = self.get_miaoshao_goods_info_list(
                                data=tmp_data)
                            # pprint(miaosha_goods_list)

                            # 该session_id中现有的所有zid的list
                            miaosha_goods_all_goods_id = [
                                i.get('zid') for i in miaosha_goods_list
                            ]

                            if item[0] not in miaosha_goods_all_goods_id:  # 内部已经下架的
                                print('该商品已被下架限时秒杀活动,此处将其删除')
                                tmp_sql_server._delete_table(
                                    sql_str=self.delete_sql_str,
                                    params=(item[0]))
                                print('下架的goods_id为(%s)' % item[0], ', 删除成功!')
                                pass

                            else:  # 未下架的
                                for item_1 in miaosha_goods_list:
                                    if item_1.get('zid', '') == item[0]:
                                        zhe_800_miaosha.get_goods_data(
                                            goods_id=item[0])
                                        goods_data = zhe_800_miaosha.deal_with_data(
                                        )

                                        if goods_data == {}:  # 返回的data为空则跳过
                                            pass
                                        else:  # 否则就解析并且插入
                                            goods_data[
                                                'stock_info'] = item_1.get(
                                                    'stock_info')
                                            goods_data['goods_id'] = str(
                                                item_1.get('zid'))
                                            # goods_data['username'] = '******'
                                            if item_1.get('stock_info').get(
                                                    'activity_stock') > 0:
                                                goods_data[
                                                    'price'] = item_1.get(
                                                        'price')
                                                goods_data[
                                                    'taobao_price'] = item_1.get(
                                                        'taobao_price')
                                            else:
                                                pass
                                            goods_data[
                                                'sub_title'] = item_1.get(
                                                    'sub_title')
                                            goods_data[
                                                'miaosha_time'] = item_1.get(
                                                    'miaosha_time')
                                            goods_data[
                                                'miaosha_begin_time'], goods_data[
                                                    'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time(
                                                        miaosha_time=item_1.
                                                        get('miaosha_time'))

                                            # print(goods_data['stock_info'])
                                            # print(goods_data['miaosha_time'])
                                            zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table(
                                                data=goods_data,
                                                pipeline=tmp_sql_server)
                                    else:
                                        pass

                        else:  # 说明这个sessionid没有数据, 就删除对应这个sessionid的限时秒杀商品
                            print('该sessionid没有相关key为jsons的数据')
                            # return {}
                            tmp_sql_server._delete_table(
                                sql_str=self.delete_sql_str, params=(item[0]))
                            print(
                                '过期的goods_id为(%s)' % item[0],
                                ', 限时秒杀开始时间为(%s), 删除成功!' %
                                json.loads(item[1]).get('miaosha_begin_time'))
                            pass

            else:  # 表示返回的data值为空值
                print('数据库连接失败,数据库可能关闭或者维护中')
                pass
            index += 1
            # try:
            #     del tmall
            # except:
            #     pass
            # sleep(.8)
            gc.collect()
        print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        gc.collect()

        return
Esempio n. 5
0
    def get_spike_hour_goods_info(self):
        '''
        模拟构造得到data的url,得到近期所有的限时秒杀商品信息
        :return:
        '''
        base_session_id = BASE_SESSION_ID
        while base_session_id < MAX_SESSION_ID:
            print('待抓取的session_id为: ', base_session_id)
            data = self._get_one_session_id_data(
                base_session_id=base_session_id)
            sleep(.3)

            if data.get('data', {}).get('blocks', []) == []:  # session_id不存在
                pass

            else:  # 否则session_id存在
                try:
                    _ = str(
                        data.get('data',
                                 {}).get('blocks',
                                         [])[0].get('deal',
                                                    {}).get('begin_time',
                                                            ''))[:10]
                    if _ != '':
                        pass
                    elif data.get('data', {}).get('blocks', [])[0].get(
                            'showcase', {}) != {}:  # 未来时间
                        print('*** 未来时间 ***')
                        # pprint(data.get('data', {}))
                        _ = str(
                            data.get('data', {}).get('blocks', [])[1].get(
                                'deal', {}).get('begin_time', ''))[:10]
                    else:
                        raise Exception
                    begin_times_timestamp = int(
                        _)  # 将如 "2017-09-28 10:00:00"的时间字符串转化为时间戳,然后再将时间戳取整

                except Exception as e:
                    print('遇到严重错误: ', e)
                    base_session_id += 2
                    continue

                print('秒杀时间为: ',
                      timestamp_to_regulartime(begin_times_timestamp))

                if self.is_recent_time(
                        timestamp=begin_times_timestamp):  # 说明秒杀日期合法
                    try:
                        data = [
                            item_s.get('deal', {}) for item_s in data.get(
                                'data', {}).get('blocks', [])
                        ]
                    except Exception as e:
                        print('遇到严重错误: ', e)
                        base_session_id += 2
                        continue
                    # pprint(data)

                    if data != []:  # 否则说明里面有数据
                        miaosha_goods_list = self.get_miaoshao_goods_info_list(
                            data=data)
                        # pprint(miaosha_goods_list)

                        zhe_800 = Zhe800Parse()
                        my_pipeline = SqlServerMyPageInfoSaveItemPipeline()
                        if my_pipeline.is_connect_success:
                            sql_str = 'select goods_id, miaosha_time, session_id from dbo.zhe_800_xianshimiaosha where site_id=14'
                            db_goods_id_list = [
                                item[0] for item in list(
                                    my_pipeline._select_table(sql_str=sql_str))
                            ]
                            for item in miaosha_goods_list:
                                if item.get('zid', '') in db_goods_id_list:
                                    print('该goods_id已经存在于数据库中, 此处跳过')
                                    pass
                                else:
                                    tmp_url = 'https://shop.zhe800.com/products/' + str(
                                        item.get('zid', ''))
                                    goods_id = zhe_800.get_goods_id_from_url(
                                        tmp_url)

                                    zhe_800.get_goods_data(goods_id=goods_id)
                                    goods_data = zhe_800.deal_with_data()

                                    if goods_data == {}:  # 返回的data为空则跳过
                                        pass
                                    else:  # 否则就解析并且插入
                                        goods_data['stock_info'] = item.get(
                                            'stock_info')
                                        goods_data['goods_id'] = str(
                                            item.get('zid'))
                                        goods_data['spider_url'] = tmp_url
                                        goods_data['username'] = '******'
                                        goods_data['price'] = item.get('price')
                                        goods_data['taobao_price'] = item.get(
                                            'taobao_price')
                                        goods_data['sub_title'] = item.get(
                                            'sub_title')
                                        # goods_data['is_baoyou'] = item.get('is_baoyou')
                                        goods_data['miaosha_time'] = item.get(
                                            'miaosha_time')
                                        goods_data[
                                            'miaosha_begin_time'], goods_data[
                                                'miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time(
                                                    miaosha_time=item.get(
                                                        'miaosha_time'))
                                        goods_data['session_id'] = str(
                                            base_session_id)
                                        # print(goods_data['miaosha_time'])

                                        # print(goods_data)
                                        zhe_800.insert_into_zhe_800_xianshimiaosha_table(
                                            data=goods_data,
                                            pipeline=my_pipeline)
                                        sleep(ZHE_800_SPIKE_SLEEP_TIME)  # 放慢速度

                            # sleep(2)
                        else:
                            pass
                        try:
                            del zhe_800
                        except:
                            pass
                        gc.collect()

                    else:  # 说明这个sessionid没有数据
                        print('该sessionid没有相关key为jsons的数据')
                        # return {}
                        pass
                else:
                    pass

            base_session_id += 2
Esempio n. 6
0
    def get_spike_hour_goods_info(self):
        '''
        模拟构造得到data的url,得到近期所有的限时秒杀商品信息
        :return:
        '''
        base_session_id = BASE_SESSION_ID
        while base_session_id < MAX_SESSION_ID:
            print('待抓取的session_id为: ', base_session_id)
            tmp_url = 'https://zapi.zhe800.com/zhe800_n_api/xsq/get?sessionId={0}&page=1&per_page=1000'.format(
                str(base_session_id), )

            body = self.get_url_body(url=tmp_url)

            body_1 = re.compile(r'<pre.*?>(.*)</pre>').findall(body)
            if body_1 != []:
                data = body_1[0]
                data = json.loads(data)
                # pprint(data)

                if data.get('status') == 0:  # session_id不存在
                    print('该session_id不存在,此处跳过')
                    pass

                else:  # 否则session_id存在
                    begin_times = data.get('begin_times')[0]
                    print('秒杀时间为: ', begin_times)
                    begin_times_timestamp = int(
                        time.mktime(
                            time.strptime(begin_times, '%Y-%m-%d %H:%M:%S'))
                    )  # 将如 "2017-09-28 10:00:00"的时间字符串转化为时间戳,然后再将时间戳取整

                    if self.is_recent_time(
                            timestamp=begin_times_timestamp):  # 说明秒杀日期合法
                        data = data.get('jsons', [])
                        if data != []:  # 否则说明里面有数据
                            miaosha_goods_list = self.get_miaoshao_goods_info_list(
                                data=data)

                            zhe_800 = Zhe800Parse()
                            my_pipeline = SqlServerMyPageInfoSaveItemPipeline()
                            if my_pipeline.is_connect_success:
                                db_goods_id_list = [
                                    item[0] for item in list(
                                        my_pipeline.
                                        select_zhe_800_xianshimiaosha_all_goods_id(
                                        ))
                                ]
                                for item in miaosha_goods_list:
                                    if item.get('zid', '') in db_goods_id_list:
                                        print('该goods_id已经存在于数据库中, 此处跳过')
                                        pass
                                    else:
                                        tmp_url = 'https://shop.zhe800.com/products/' + str(
                                            item.get('zid', ''))
                                        goods_id = zhe_800.get_goods_id_from_url(
                                            tmp_url)

                                        zhe_800.get_goods_data(
                                            goods_id=goods_id)
                                        goods_data = zhe_800.deal_with_data()

                                        if goods_data == {}:  # 返回的data为空则跳过
                                            pass
                                        else:  # 否则就解析并且插入
                                            goods_data[
                                                'stock_info'] = item.get(
                                                    'stock_info')
                                            goods_data['goods_id'] = str(
                                                item.get('zid'))
                                            goods_data['spider_url'] = tmp_url
                                            goods_data[
                                                'username'] = '******'
                                            goods_data['price'] = item.get(
                                                'price')
                                            goods_data[
                                                'taobao_price'] = item.get(
                                                    'taobao_price')
                                            goods_data['sub_title'] = item.get(
                                                'sub_title')
                                            # goods_data['is_baoyou'] = item.get('is_baoyou')
                                            goods_data[
                                                'miaosha_time'] = item.get(
                                                    'miaosha_time')
                                            goods_data['session_id'] = str(
                                                base_session_id)

                                            # print(goods_data)
                                            zhe_800.insert_into_zhe_800_xianshimiaosha_table(
                                                data=goods_data,
                                                pipeline=my_pipeline)
                                            sleep(ZHE_800_SPIKE_SLEEP_TIME
                                                  )  # 放慢速度

                                # sleep(2)
                            else:
                                pass
                            try:
                                del zhe_800
                            except:
                                pass
                            gc.collect()

                        else:  # 说明这个sessionid没有数据
                            print('该sessionid没有相关key为jsons的数据')
                            # return {}
                            pass
                    else:
                        pass

            else:
                print('获取到的data为空!')
                # return {}
                pass
            base_session_id += 2
Esempio n. 7
0
    def get_spike_hour_goods_info(self):
        '''
        模拟构造得到data的url,得到近期所有的限时秒杀商品信息
        :return:
        '''
        base_session_id = BASE_SESSION_ID
        while base_session_id < MAX_SESSION_ID:
            print('待抓取的session_id为: ', base_session_id)
            data = self._get_one_session_id_data(base_session_id=base_session_id)
            sleep(.5)
            if data.get('data', {}).get('blocks', []) == []:     # session_id不存在
                base_session_id += 2
                continue

            try:
                begin_times_timestamp = self._get_begin_times_timestamp(data)
            except Exception as e:
                print('遇到严重错误: ', e)
                base_session_id += 2
                continue

            print('秒杀时间为: ', timestamp_to_regulartime(begin_times_timestamp))
            is_recent_time = self.is_recent_time(timestamp=begin_times_timestamp)
            if not is_recent_time:  # 说明秒杀日期合法
                base_session_id += 2
                continue

            try:
                data = [item_s.get('deal', {}) for item_s in data.get('data', {}).get('blocks', [])]
            except Exception as e:
                print('遇到严重错误: ', e)
                base_session_id += 2
                continue
            # pprint(data)

            if data != []:  # 否则说明里面有数据
                miaosha_goods_list = self.get_miaoshao_goods_info_list(data=data)
                # pprint(miaosha_goods_list)

                zhe_800 = Zhe800Parse()
                my_pipeline = SqlServerMyPageInfoSaveItemPipeline()
                if my_pipeline.is_connect_success:
                    db_goods_id_list = self._get_db_goods_id_list(my_pipeline)
                    for item in miaosha_goods_list:
                        if item.get('zid', '') in db_goods_id_list:
                            print('该goods_id已经存在于数据库中, 此处跳过')
                            pass
                        else:
                            tmp_url = 'https://shop.zhe800.com/products/' + str(item.get('zid', ''))
                            goods_id = zhe_800.get_goods_id_from_url(tmp_url)

                            zhe_800.get_goods_data(goods_id=goods_id)
                            goods_data = zhe_800.deal_with_data()
                            if goods_data == {}:    # 返回的data为空则跳过
                                pass
                            else:       # 否则就解析并且插入
                                goods_data['stock_info'] = item.get('stock_info')
                                goods_data['goods_id'] = str(item.get('zid'))
                                goods_data['spider_url'] = tmp_url
                                goods_data['username'] = '******'
                                goods_data['price'] = item.get('price')
                                goods_data['taobao_price'] = item.get('taobao_price')
                                goods_data['sub_title'] = item.get('sub_title')
                                # goods_data['is_baoyou'] = item.get('is_baoyou')
                                goods_data['miaosha_time'] = item.get('miaosha_time')
                                goods_data['miaosha_begin_time'], goods_data['miaosha_end_time'] = get_miaosha_begin_time_and_miaosha_end_time(miaosha_time=item.get('miaosha_time'))
                                goods_data['session_id'] = str(base_session_id)

                                # print(goods_data)
                                res = zhe_800.insert_into_zhe_800_xianshimiaosha_table(data=goods_data, pipeline=my_pipeline)
                                if res:
                                    if goods_id not in db_goods_id_list:
                                        db_goods_id_list.append(goods_id)

                                sleep(ZHE_800_SPIKE_SLEEP_TIME)   # 放慢速度

                    sleep(4)
                else:
                    pass
                try:
                    del zhe_800
                except:
                    pass
                gc.collect()

            else:       # 说明这个sessionid没有数据
                print('该sessionid没有相关key为jsons的数据')
                pass

            base_session_id += 2
Esempio n. 8
0
def run_forever():
    while True:
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(tmp_sql_server.select_zhe_800_all_goods_id())
        except TypeError as e:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            for item in result:  # 实时更新数据
                data = {}
                # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
                zhe_800 = Zhe800Parse()
                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    # try:
                    #     del tmp_sql_server
                    # except:
                    #     pass
                    # gc.collect()
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    print(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                        % (item[0], index))
                    zhe_800.get_goods_data(goods_id=item[0])
                    data = zhe_800.deal_with_data()
                    if data != {}:
                        data['goods_id'] = item[0]
                        '''
                        设置最后刷新的商品状态上下架时间
                        '''
                        # 1.is_delete由0->1 为下架时间down_time  2. is_delete由1->0 为上架时间shelf_time
                        my_shelf_and_down_time = {
                            'shelf_time': '',
                            'down_time': '',
                        }
                        if data['is_delete'] != item[1]:
                            if data['is_delete'] == 0 and item[1] == 1:
                                # is_delete由0->1 表示商品状态上架变为下架
                                my_shelf_and_down_time['down_time'] = str(
                                    get_shanghai_time())
                            else:
                                # is_delete由1->0 表示商品状态下架变为上架
                                my_shelf_and_down_time['shelf_time'] = str(
                                    get_shanghai_time())
                        else:
                            if item[2] is None or item[
                                    2] == '{"shelf_time": "", "down_time": ""}' or len(
                                        item[2]) == 35:  # 35就是那串初始str
                                if data['is_delete'] == 0:  # 上架的状态
                                    my_shelf_and_down_time['shelf_time'] = str(
                                        get_shanghai_time())
                                else:  # 下架的状态
                                    my_shelf_and_down_time['down_time'] = str(
                                        get_shanghai_time())
                            else:
                                # 否则保存原始值不变
                                tmp_shelf_and_down_time = item[2]
                                my_shelf_and_down_time = json.loads(
                                    tmp_shelf_and_down_time)  # 先转换为dict
                        data['my_shelf_and_down_time'] = my_shelf_and_down_time
                        # print(my_shlef_and_down_time)

                        # print('------>>>| 爬取到的数据为: ', data)
                        zhe_800.to_right_and_update_data(
                            data, pipeline=tmp_sql_server)
                    else:  # 表示返回的data值为空值
                        pass
                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                gc.collect()
                # sleep(1)
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()
    def run_forever(self):
        '''
        这个实时更新的想法是只更新当天前天未来两小时的上架商品的信息,再未来信息价格(全为原价)暂不更新
        :return:
        '''
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(
                tmp_sql_server.select_zhe_800_xianshimiaosha_all_goods_id())
        except TypeError as e:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            for item in result:  # 实时更新数据
                miaosha_begin_time = json.loads(
                    item[1]).get('miaosha_begin_time')
                miaosha_begin_time = int(
                    str(
                        time.mktime(
                            time.strptime(miaosha_begin_time,
                                          '%Y-%m-%d %H:%M:%S')))[0:10])
                # print(miaosha_begin_time)

                data = {}
                # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
                zhe_800_miaosha = Zhe800Parse()
                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    if self.is_recent_time(miaosha_begin_time) == 0:
                        tmp_sql_server.delete_zhe_800_expired_goods_id(
                            goods_id=item[0])
                        print(
                            '过期的goods_id为(%s)' % item[0],
                            ', 限时秒杀开始时间为(%s), 删除成功!' %
                            json.loads(item[1]).get('miaosha_begin_time'))

                    elif self.is_recent_time(miaosha_begin_time) == 2:
                        # break       # 跳出循环
                        pass  # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的

                    else:  # 返回1,表示在待更新区间内
                        print(
                            '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                            % (item[0], index))
                        data['goods_id'] = item[0]
                        # print('------>>>| 爬取到的数据为: ', data)

                        tmp_url = 'https://zapi.zhe800.com/zhe800_n_api/xsq/m/session_deals?session_id={0}&page=1&per_page=1000'.format(
                            str(item[2]))

                        body = self.my_phantomjs.use_phantomjs_to_get_url_body(
                            url=tmp_url)
                        body_1 = re.compile(r'<pre.*?>(.*)</pre>').findall(
                            body)

                        if body_1 != []:
                            tmp_data = body_1[0]
                            tmp_data = json.loads(tmp_data)
                            # pprint(tmp_data)

                            if tmp_data.get('data',
                                            {}).get('blocks',
                                                    []) == []:  # session_id不存在
                                print('该session_id不存在,此处跳过')
                                pass

                            else:
                                tmp_data = [
                                    item_s.get('deal', {})
                                    for item_s in tmp_data.get('data', {}).get(
                                        'blocks', [])
                                ]
                                if tmp_data != []:  # 否则说明里面有数据
                                    miaosha_goods_list = self.get_miaoshao_goods_info_list(
                                        data=tmp_data)
                                    # pprint(miaosha_goods_list)

                                    # 该session_id中现有的所有zid的list
                                    miaosha_goods_all_goods_id = [
                                        i.get('zid')
                                        for i in miaosha_goods_list
                                    ]

                                    if item[0] not in miaosha_goods_all_goods_id:  # 内部已经下架的
                                        print('该商品已被下架限时秒杀活动,此处将其删除')
                                        tmp_sql_server.delete_zhe_800_expired_goods_id(
                                            goods_id=item[0])
                                        print('下架的goods_id为(%s)' % item[0],
                                              ', 删除成功!')
                                        pass

                                    else:  # 未下架的
                                        for item_1 in miaosha_goods_list:
                                            if item_1.get('zid',
                                                          '') == item[0]:
                                                zhe_800_miaosha.get_goods_data(
                                                    goods_id=item[0])
                                                goods_data = zhe_800_miaosha.deal_with_data(
                                                )

                                                if goods_data == {}:  # 返回的data为空则跳过
                                                    pass
                                                else:  # 否则就解析并且插入
                                                    goods_data[
                                                        'stock_info'] = item_1.get(
                                                            'stock_info')
                                                    goods_data[
                                                        'goods_id'] = str(
                                                            item_1.get('zid'))
                                                    # goods_data['username'] = '******'
                                                    if item_1.get(
                                                            'stock_info'
                                                    ).get('activity_stock'
                                                          ) > 0:
                                                        goods_data[
                                                            'price'] = item_1.get(
                                                                'price')
                                                        goods_data[
                                                            'taobao_price'] = item_1.get(
                                                                'taobao_price')
                                                    else:
                                                        pass
                                                    goods_data[
                                                        'sub_title'] = item_1.get(
                                                            'sub_title')
                                                    goods_data[
                                                        'miaosha_time'] = item_1.get(
                                                            'miaosha_time')
                                                    goods_data[
                                                        'miaosha_begin_time'], goods_data[
                                                            'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time(
                                                                miaosha_time=
                                                                item_1.get(
                                                                    'miaosha_time'
                                                                ))

                                                    # print(goods_data['stock_info'])
                                                    # print(goods_data['miaosha_time'])
                                                    zhe_800_miaosha.to_update_zhe_800_xianshimiaosha_table(
                                                        data=goods_data,
                                                        pipeline=tmp_sql_server
                                                    )
                                            else:
                                                pass

                                else:  # 说明这个sessionid没有数据, 就删除对应这个sessionid的限时秒杀商品
                                    print('该sessionid没有相关key为jsons的数据')
                                    # return {}
                                    tmp_sql_server.delete_zhe_800_expired_goods_id(
                                        goods_id=item[0])
                                    print(
                                        '过期的goods_id为(%s)' % item[0],
                                        ', 限时秒杀开始时间为(%s), 删除成功!' % json.loads(
                                            item[1]).get('miaosha_begin_time'))
                                    pass
                        else:
                            print('获取到的data为空!')
                            # return {}
                            pass

                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                # sleep(.8)
                gc.collect()
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()
Esempio n. 10
0
def run_forever():
    while True:
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        sql_str = '''
        select GoodsID, IsDelete, Price, TaoBaoPrice, shelf_time, delete_time
        from dbo.GoodsInfoAutoGet 
        where SiteID=11 and MainGoodsID is not null'''
        try:
            result = list(tmp_sql_server._select_table(sql_str=sql_str))
        except TypeError:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            for item in result:  # 实时更新数据
                # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
                zhe_800 = Zhe800Parse()
                if index % 50 == 0:    # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    # try:
                    #     del tmp_sql_server
                    # except:
                    #     pass
                    # gc.collect()
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    print('------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index))
                    zhe_800.get_goods_data(goods_id=item[0])
                    data = zhe_800.deal_with_data()
                    if data != {}:
                        data['goods_id'] = item[0]

                        data['shelf_time'], data['delete_time'] = get_shelf_time_and_delete_time(
                            tmp_data=data,
                            is_delete=item[1],
                            shelf_time=item[4],
                            delete_time=item[5])
                        data['_is_price_change'], data['_price_change_info'] = _get_price_change_info(
                            old_price=item[2],
                            old_taobao_price=item[3],
                            new_price=data['price'],
                            new_taobao_price=data['taobao_price']
                        )

                        # print('------>>>| 爬取到的数据为: ', data)
                        zhe_800.to_right_and_update_data(data, pipeline=tmp_sql_server)
                    else:  # 表示返回的data值为空值
                        sleep(2)
                        pass
                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                gc.collect()
                sleep(1.5)
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:   # 0点以后不更新
            sleep(60*60*5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()
Esempio n. 11
0
def run_forever():
    while True:
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(
                tmp_sql_server._select_table(sql_str=z8_select_str_3))
        except TypeError:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            for item in result:  # 实时更新数据
                # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
                zhe_800 = Zhe800Parse()
                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    print(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                        % (item[0], index))
                    zhe_800.get_goods_data(goods_id=item[0])
                    data = zhe_800.deal_with_data()
                    if data != {}:
                        data['goods_id'] = item[0]
                        data['shelf_time'], data[
                            'delete_time'] = get_shelf_time_and_delete_time(
                                tmp_data=data,
                                is_delete=item[1],
                                shelf_time=item[4],
                                delete_time=item[5])
                        data['_is_price_change'], data[
                            '_price_change_info'] = _get_price_change_info(
                                old_price=item[2],
                                old_taobao_price=item[3],
                                new_price=data['price'],
                                new_taobao_price=data['taobao_price'])
                        try:
                            old_sku_info = format_price_info_list(
                                price_info_list=json_2_dict(item[6]),
                                site_id=11)
                        except AttributeError:  # 处理已被格式化过的
                            old_sku_info = item[6]
                        data['_is_price_change'], data[
                            'sku_info_trans_time'] = get_sku_info_trans_record(
                                old_sku_info=old_sku_info,
                                new_sku_info=format_price_info_list(
                                    data['price_info_list'], site_id=11),
                                is_price_change=item[7]
                                if item[7] is not None else 0)

                        zhe_800.to_right_and_update_data(
                            data, pipeline=tmp_sql_server)
                    else:  # 表示返回的data值为空值
                        sleep(2)
                        pass
                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                gc.collect()
                sleep(1.5)
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()