예제 #1
0
def run_forever():
    while True:
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(tmp_sql_server.select_tmall_all_goods_id_url())
        except TypeError as e:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            for item in result:  # 实时更新数据
                data = {}
                # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
                tmall = TmallParse()
                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    # try:
                    #     del tmp_sql_server
                    # except:
                    #     pass
                    # gc.collect()
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    print(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                        % (item[1], index))
                    tmp_item = []
                    if item[0] == 3:  # 从数据库中取出时,先转换为对应的类型
                        tmp_item.append(0)
                    elif item[0] == 4:
                        tmp_item.append(1)
                    elif item[0] == 6:
                        tmp_item.append(2)
                    tmp_item.append(item[1])
                    tmall.get_goods_data(goods_id=tmp_item)
                    data = tmall.deal_with_data()
                    if data != {}:
                        data['goods_id'] = item[1]
                        '''
                        设置最后刷新的商品状态上下架时间
                        '''
                        # 1.is_delete由0->1 为下架时间down_time  2. is_delete由1->0 为上架时间shelf_time
                        my_shelf_and_down_time = {
                            'shelf_time': '',
                            'down_time': '',
                        }
                        if data['is_delete'] != item[2]:
                            if data['is_delete'] == 0 and item[2] == 1:
                                # is_delete由0->1 表示商品状态上架变为下架
                                my_shelf_and_down_time['down_time'] = str(
                                    get_shanghai_time())
                            else:
                                # is_delete由1->0 表示商品状态下架变为上架
                                my_shelf_and_down_time['shelf_time'] = str(
                                    get_shanghai_time())
                        else:
                            if item[3] is None or item[
                                    3] == '{"shelf_time": "", "down_time": ""}' or len(
                                        item[3]) == 35:  # 35就是那串初始str
                                if data['is_delete'] == 0:  # 上架的状态
                                    my_shelf_and_down_time['shelf_time'] = str(
                                        get_shanghai_time())
                                else:  # 下架的状态
                                    my_shelf_and_down_time['down_time'] = str(
                                        get_shanghai_time())
                            else:
                                # 否则保存原始值不变
                                tmp_shelf_and_down_time = item[3]
                                my_shelf_and_down_time = json.loads(
                                    tmp_shelf_and_down_time)  # 先转换为dict
                        data['my_shelf_and_down_time'] = my_shelf_and_down_time
                        # print(my_shlef_and_down_time)

                        # print('------>>>| 爬取到的数据为: ', data)
                        tmall.to_right_and_update_data(data,
                                                       pipeline=tmp_sql_server)
                    else:  # 表示返回的data值为空值
                        pass
                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                gc.collect()
                # sleep(1)
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()
예제 #2
0
def run_forever():
    while True:
        #### 实时更新数据
        tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
        try:
            result = list(tmp_sql_server.select_tmall_all_goods_id_url())
        except TypeError as e:
            print('TypeError错误, 原因数据库连接失败...(可能维护中)')
            result = None
        if result is None:
            pass
        else:
            print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------')
            print(result)
            print('--------------------------------------------------------')

            print('即将开始实时更新数据, 请耐心等待...'.center(100, '#'))
            index = 1
            # 释放内存,在外面声明就会占用很大的,所以此处优化内存的方法是声明后再删除释放
            tmall = TmallParse()
            for item in result:  # 实时更新数据
                data = {}
                if index % 5 == 0:
                    tmall = TmallParse()
                    gc.collect()

                if index % 50 == 0:  # 每50次重连一次,避免单次长连无响应报错
                    print('正在重置,并与数据库建立新连接中...')
                    # try:
                    #     del tmp_sql_server
                    # except:
                    #     pass
                    # gc.collect()
                    tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline()
                    print('与数据库的新连接成功建立...')

                if tmp_sql_server.is_connect_success:
                    print(
                        '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)'
                        % (item[1], index))
                    tmp_item = []
                    if item[0] == 3:  # 从数据库中取出时,先转换为对应的类型
                        tmp_item.append(0)
                    elif item[0] == 4:
                        tmp_item.append(1)
                    elif item[0] == 6:
                        tmp_item.append(2)
                    tmp_item.append(item[1])
                    data = tmall.get_goods_data(goods_id=tmp_item)
                    if isinstance(data, int):  # 单独处理return 4041
                        continue

                    if data.get('is_delete') == 1:  # 单独处理下架商品
                        data['goods_id'] = item[1]

                        data['my_shelf_and_down_time'], data[
                            'delete_time'] = get_my_shelf_and_down_time_and_delete_time(
                                tmp_data=data,
                                is_delete=item[2],
                                MyShelfAndDownTime=item[3])

                        # print('------>>>| 爬取到的数据为: ', data)
                        tmall.to_right_and_update_data(data,
                                                       pipeline=tmp_sql_server)

                        sleep(1.5)
                        index += 1
                        gc.collect()
                        continue

                    data = tmall.deal_with_data()
                    if data != {}:
                        data['goods_id'] = item[1]

                        data['my_shelf_and_down_time'], data[
                            'delete_time'] = get_my_shelf_and_down_time_and_delete_time(
                                tmp_data=data,
                                is_delete=item[2],
                                MyShelfAndDownTime=item[3])

                        # print('------>>>| 爬取到的数据为: ', data)
                        tmall.to_right_and_update_data(data,
                                                       pipeline=tmp_sql_server)
                    else:  # 表示返回的data值为空值
                        pass
                else:  # 表示返回的data值为空值
                    print('数据库连接失败,数据库可能关闭或者维护中')
                    pass
                index += 1
                # try:
                #     del tmall
                # except:
                #     pass
                gc.collect()
                sleep(2)
            print('全部数据更新完毕'.center(100, '#'))  # sleep(60*60)
        if get_shanghai_time().hour == 0:  # 0点以后不更新
            sleep(60 * 60 * 5.5)
        else:
            sleep(5)
        # del ali_1688
        gc.collect()