예제 #1
0
    def deal_with_data(self, *param):
        '''
        处理并存储相关秒杀商品的数据
        :param param: 相关参数
        :return:
        '''
        print(60 * '*')
        event_time = param[0]
        print('秒杀开始时间:', self.timestamp_to_regulartime(event_time), '\t',
              '对应时间戳为: ', event_time)
        print(60 * '*')

        item_list = param[1]

        mogujie = MoGuJieMiaoShaParse()
        my_pipeline = SqlServerMyPageInfoSaveItemPipeline()

        if my_pipeline.is_connect_success:
            db_goods_id_list = [
                item[0] for item in list(
                    my_pipeline.select_mogujie_xianshimiaosha_all_goods_id())
            ]
            # print(db_goods_id_list)

            for item in item_list:
                if item.get('iid', '') in db_goods_id_list:
                    print('该goods_id已经存在于数据库中, 此处跳过')
                    pass

                else:
                    goods_id = str(item.get('iid', ''))
                    tmp_url = item.get('link', '')

                    try:
                        object_id = re.compile(r'objectId=(.*?)&').findall(
                            tmp_url)[0]
                    except IndexError:  # 表示匹配到的地址不是秒杀商品的地址
                        print('+++++++ 这个url不是秒杀的url: ', tmp_url)
                        continue

                    tmp_url = 'https://shop.mogujie.com/rushdetail/{0}?objectId={1}&type=rush'.format(
                        goods_id, object_id)

                    tmp_ = mogujie.get_goods_id_from_url(tmp_url)
                    mogujie.get_goods_data(goods_id=tmp_)
                    goods_data = mogujie.deal_with_data()

                    if goods_data == {}:  # 返回的data为空则跳过
                        pass

                    else:  # 否则就解析并且插入
                        goods_data['goods_url'] = tmp_url
                        goods_data['goods_id'] = str(goods_id)

                        # price设置为原价
                        try:
                            tmp_price_list = sorted([
                                round(float(item_4.get('normal_price', '')), 2)
                                for item_4 in goods_data['price_info_list']
                            ])
                            price = Decimal(tmp_price_list[-1]).__round__(
                                2)  # 商品原价
                            goods_data['price'] = price
                        except:
                            print('设置price为原价时出错!请检查')
                            continue

                        goods_data['miaosha_time'] = {
                            'miaosha_begin_time':
                            self.timestamp_to_regulartime(
                                int(item.get('startTime', 0))),
                            'miaosha_end_time':
                            self.timestamp_to_regulartime(
                                int(item.get('endTime', 0))),
                        }
                        goods_data['miaosha_begin_time'], goods_data[
                            'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time(
                                miaosha_time=goods_data['miaosha_time'])
                        goods_data['event_time'] = str(event_time)

                        # pprint(goods_data)
                        # print(goods_data)
                        mogujie.insert_into_mogujie_xianshimiaosha_table(
                            data=goods_data, pipeline=my_pipeline)
                        sleep(MOGUJIE_SLEEP_TIME)  # 放慢速度

        else:
            print('数据库连接失败,此处跳过!')
            pass

        try:
            del mogujie
        except:
            pass
        gc.collect()
    def run_forever(self):
        '''
        实时更新数据
        :return:
        '''
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(
                tmp_sql_server.select_mogujie_xianshimiaosha_all_goods_id())
        except TypeError:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1

            for item in result:  # 实时更新数据
                miaosha_end_time = json.loads(item[1]).get('miaosha_end_time')
                miaosha_end_time = int(
                    str(
                        time.mktime(
                            time.strptime(miaosha_end_time,
                                          '%Y-%m-%d %H:%M:%S')))[0:10])
                # print(miaosha_end_time)

                data = {}
                # 释放内存, 在外面声明就会占用很大的, 所以此处优化内存的方法是声明后再删除释放
                mogujie_miaosha = MoGuJieMiaoShaParse()
                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    if self.is_recent_time(miaosha_end_time) == 0:
                        tmp_sql_server.delete_mogujie_miaosha_expired_goods_id(
                            goods_id=item[0])
                        print(
                            '过期的goods_id为(%s)' % item[0],
                            ', 限时秒杀开始时间为(%s), 删除成功!' %
                            json.loads(item[1]).get('miaosha_begin_time'))

                    elif self.is_recent_time(miaosha_end_time) == 2:
                        # break       # 跳出循环
                        pass  # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的

                    else:  # 返回1,表示在待更新区间内
                        print(
                            '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                            % (item[0], index))
                        data['goods_id'] = item[0]

                        item_list = self.get_item_list(event_time=str(item[2]))
                        if item_list == '':
                            # 可能网络状况导致, 先跳过
                            pass

                        elif item_list == []:
                            print('该商品已被下架限时秒杀活动,此处将其删除')
                            tmp_sql_server.delete_mogujie_miaosha_expired_goods_id(
                                goods_id=item[0])
                            print('下架的goods_id为(%s)' % item[0], ', 删除成功!')
                            pass

                        else:
                            # 该event_time中现有的所有goods_id的list
                            miaosha_goods_all_goods_id = [
                                item_1.get('iid', '') for item_1 in item_list
                            ]

                            if item[0] not in miaosha_goods_all_goods_id:  # 内部已经下架的
                                print('该商品已被下架限时秒杀活动,此处将其删除')
                                tmp_sql_server.delete_mogujie_miaosha_expired_goods_id(
                                    goods_id=item[0])
                                print('下架的goods_id为(%s)' % item[0], ', 删除成功!')
                                pass

                            else:  # 未下架的
                                for item_2 in item_list:
                                    if item_2.get('iid', '') == item[0]:
                                        spider_url = item[3]
                                        mogujie_miaosha.get_goods_data(
                                            goods_id=spider_url)
                                        goods_data = mogujie_miaosha.deal_with_data(
                                        )

                                        if goods_data == {}:  # 返回的data为空则跳过
                                            pass
                                        else:
                                            goods_data['goods_id'] = str(
                                                item[0])

                                            # price设置为原价
                                            try:
                                                tmp_price_list = sorted([
                                                    round(
                                                        float(
                                                            item_4.get(
                                                                'normal_price',
                                                                '')), 2)
                                                    for item_4 in goods_data[
                                                        'price_info_list']
                                                ])
                                                price = Decimal(
                                                    tmp_price_list[-1]
                                                ).__round__(2)  # 商品原价
                                                goods_data['price'] = price
                                            except:
                                                print('设置price为原价时出错!请检查')
                                                continue

                                            goods_data['miaosha_time'] = {
                                                'miaosha_begin_time':
                                                self.timestamp_to_regulartime(
                                                    int(
                                                        item_2.get(
                                                            'startTime', 0))),
                                                'miaosha_end_time':
                                                self.timestamp_to_regulartime(
                                                    int(
                                                        item_2.get(
                                                            'endTime', 0))),
                                            }
                                            goods_data[
                                                'miaosha_begin_time'], goods_data[
                                                    'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time(
                                                        miaosha_time=goods_data[
                                                            'miaosha_time'])

                                            # pprint(goods_data)
                                            # print(goods_data)
                                            mogujie_miaosha.update_mogujie_xianshimiaosha_table(
                                                data=goods_data,
                                                pipeline=tmp_sql_server)
                                            sleep(MOGUJIE_SLEEP_TIME)  # 放慢速度
                                    else:
                                        pass

                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                gc.collect()
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        gc.collect()