async def _get_new_cc_obj(self, index): if index % 10 == 0: # 不能共享一个对象了, 否则驱动访问会异常! try: del self.chuchujie_miaosha except: pass collect() self.chuchujie_miaosha = ChuChuJie_9_9_Parse() return
async def _update_db(self) -> None: ''' 秒杀数据更新 :return: ''' while True: self.lg = await self._get_new_logger(logger_name=get_uuid1()) result = await self._get_db_old_data() if result is None: pass else: self.goods_index = 1 tasks_params_list = TasksParamsListObj( tasks_params_list=result, step=self.concurrency) self.chuchujie_miaosha = ChuChuJie_9_9_Parse() index = 1 while True: try: slice_params_list = tasks_params_list.__next__() # self.lg.info(str(slice_params_list)) except AssertionError: # 全部提取完毕, 正常退出 break tasks = [] for item in slice_params_list: self.lg.info('创建 task goods_id: {}'.format(item[0])) tasks.append( self.loop.create_task( self._update_one_goods_info(item=item, index=index))) index += 1 await _get_async_task_result(tasks=tasks, logger=self.lg) self.lg.info('全部数据更新完毕'.center(100, '#')) if get_shanghai_time().hour == 0: # 0点以后不更新 await async_sleep(60 * 60 * 5.5) else: await async_sleep(2.5 * 60) try: del self.chuchujie_miaosha except: pass collect()
def deal_with_data(self, *params): ''' 处理并存储相关秒杀商品数据 :param params: 相关参数 :return: ''' item_list = params[0] chuchujie = ChuChuJie_9_9_Parse() my_pipeline = SqlServerMyPageInfoSaveItemPipeline() if my_pipeline.is_connect_success: db_goods_id_list = [ item[0] for item in list( my_pipeline.select_chuchujie_xianshimiaosha_all_goods_id()) ] # print(db_goods_id_list) # my_phantomjs = MyPhantomjs() # my_phantomjs.init_phantomjs() # index = 1 for item in item_list: if item.get('goods_id', '') in db_goods_id_list: print('该goods_id已经存在于数据库中, 此处跳过') pass else: goods_id = item.get('goods_id', '') tmp_url = 'https://m.chuchujie.com/details/detail.html?id=' + str( goods_id) chuchujie.get_goods_data(goods_id=goods_id) goods_data = chuchujie.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 pass elif goods_data.get('is_delete', 0) == 1: # is_delete=1(即库存为0)则跳过 print('------>>>| 该商品库存为0,已被抢光!') pass else: # 否则就解析并且插入 my_phantomjs = MyPhantomjs() my_phantomjs.init_phantomjs() # 获取剩余时间 tmp_body = my_phantomjs.use_phantomjs_to_get_url_body( url=tmp_url, css_selector='p#activityTime span') # print(tmp_body) try: del my_phantomjs except: pass gc.collect() if tmp_body == '': # 获取手机版的页面完整html失败 sleep(.4) pass else: # p#activityTime span _t = Selector(text=tmp_body).css( 'p#activityTime span::text').extract_first() _t = re.compile(r'剩余').sub('', _t) # print(_t) if _t == '' or _t is None: print('获取到的_t为空值, 严重错误! 请检查!') miaosha_end_time = self.get_miaosha_end_time(_t) goods_data['goods_url'] = tmp_url goods_data['goods_id'] = str(goods_id) goods_data['sub_title'] = item.get('sub_title', '') goods_data['miaosha_time'] = { 'miaosha_begin_time': self.timestamp_to_regulartime(int( time.time())), 'miaosha_end_time': self.timestamp_to_regulartime( int(miaosha_end_time)), } goods_data['miaosha_begin_time'], goods_data[ 'miaosha_end_time'] = self.get_miaosha_begin_time_and_miaosha_end_time( miaosha_time=goods_data['miaosha_time']) goods_data['gender'] = str(item.get('gender', '0')) goods_data['page'] = item.get('page') # pprint(goods_data) # print(goods_data) chuchujie.insert_into_chuchujie_xianshimiaosha_table( data=goods_data, pipeline=my_pipeline) # sleep(CHUCHUJIE_SLEEP_TIME) # 放慢速度 由于初始化用了phantomjs时间久,于是就不睡眠 # index += 1 else: print('数据库连接失败,此处跳过!') pass try: del chuchujie except: pass gc.collect()
def run_forever(self): ''' 实时更新数据 :return: ''' tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() try: tmp_sql_server._delete_table(sql_str=cc_delete_str_2) result = list( tmp_sql_server._select_table(sql_str=cc_select_str_1)) except TypeError: print('TypeError错误, 原因数据库连接失败...(可能维护中)') result = None if result is None: pass else: print('------>>> 下面是数据库返回的所有符合条件的goods_id <<<------') print(result) print('--------------------------------------------------------') print('即将开始实时更新数据, 请耐心等待...'.center(100, '#')) index = 1 for item in result: # 实时更新数据 miaosha_end_time = json.loads(item[1]).get('miaosha_end_time') miaosha_end_time = int( str( time.mktime( time.strptime(miaosha_end_time, '%Y-%m-%d %H:%M:%S')))[0:10]) # print(miaosha_end_time) data = {} if index % 50 == 0: # 每50次重连一次,避免单次长连无响应报错 print('正在重置,并与数据库建立新连接中...') tmp_sql_server = SqlServerMyPageInfoSaveItemPipeline() print('与数据库的新连接成功建立...') if tmp_sql_server.is_connect_success: if self.is_recent_time(miaosha_end_time) == 0: tmp_sql_server._delete_table( sql_str=self.delete_sql_str, params=(item[0])) print( '过期的goods_id为(%s)' % item[0], ', 限时秒杀结束时间为(%s), 删除成功!' % json.loads(item[1]).get('miaosha_end_time')) elif self.is_recent_time(miaosha_end_time) == 2: # break # 跳出循环 pass # 此处应该是pass,而不是break,因为数据库传回的goods_id不都是按照顺序的 else: # 返回1,表示在待更新区间内 # 释放内存, 在外面声明就会占用很大的, 所以此处优化内存的方法是声明后再删除释放 chuchujie_miaosha = ChuChuJie_9_9_Parse() print( '------>>>| 正在更新的goods_id为(%s) | --------->>>@ 索引值为(%d)' % (item[0], index)) data['goods_id'] = item[0] body = self.get_one_page_goods_info(item[2], item[3]) if body == '{}': # 可能是网络原因导致, 先跳过 pass else: try: json_body = json.loads(body) # print(json_body) except: print('json.loads转换body时出错!请检查') json_body = {} pass try: this_page_total_count = json_body.get( 'data', {}).get('groupList', [])[0].get('totalCount', 0) except IndexError: print('获取this_page_total_count时出错, 请检查!') this_page_total_count = 0 # 获取对应gender, page的商品list if this_page_total_count == 0: item_list = [] else: tmp_goods_list = json_body.get('data', {}).get( 'groupList', [])[0].get('dataList', []) item_list = [{ 'goods_id': str(item_s.get('chuchuId', '')), 'sub_title': item_s.get('description', ''), } for item_s in tmp_goods_list] if item_list == []: print('#### 该gender, page对应得到的item_list为空[]!') print('该商品已被下架限时秒杀活动,此处将其删除') tmp_sql_server._delete_table( sql_str=self.delete_sql_str, params=(item[0])) print('下架的goods_id为(%s)' % item[0], ', 删除成功!') pass else: # miaosha_goods_all_goods_id = [item_1.get('goods_id', '') for item_1 in item_list] """ 由于不会内部提前下架,所以在售卖时间内的全部进行相关更新 """ # if item[0] not in miaosha_goods_all_goods_id: # 内部已经下架的 # print('该商品已被下架限时秒杀活动,此处将其删除') # tmp_sql_server._delete_table(sql_str=self.delete_sql_str, params=(item[0])) # print('下架的goods_id为(%s)' % item[0], ', 删除成功!') # pass # # else: # 未下架的 ''' 不更新秒杀时间和sub_title, 只更新其他相关数据 ''' # for item_2 in item_list: # if item_2.get('goods_id', '') == item[0]: chuchujie_miaosha.get_goods_data( goods_id=item[0]) goods_data = chuchujie_miaosha.deal_with_data() if goods_data == {}: # 返回的data为空则跳过 pass else: goods_data['goods_id'] = str(item[0]) # goods_data['sub_title'] = item_2.get('sub_title', '') # print(goods_data) chuchujie_miaosha.update_chuchujie_xianshimiaosha_table( data=goods_data, pipeline=tmp_sql_server) sleep(CHUCHUJIE_SLEEP_TIME) else: # 表示返回的data值为空值 print('数据库连接失败,数据库可能关闭或者维护中') pass index += 1 gc.collect() print('全部数据更新完毕'.center(100, '#')) # sleep(60*60) if get_shanghai_time().hour == 0: # 0点以后不更新 sleep(60 * 60 * 5.5) else: sleep(5 * 60) gc.collect()