def brandHourList(self): # 查找需要每小时统计的列表 # 得到需要的时间段 val = (Common.add_hours(self.begin_time), Common.add_hours(self.begin_time, -1)) print '# hour crawler time:',val # 商品默认信息列表 all_item_num = 0 hour_val_list = [] act_items = {} item_results = self.mysqlAccess.selectJhsItemsHouralive(val) if item_results: for item in item_results: if act_items.has_key(str(item[0])): act_items[str(item[0])]["items"].append(item[2:]) else: act_items[str(item[0])] = {'act_name':item[1],'items':[]} act_items[str(item[0])]["items"].append(item[2:]) all_item_num += 1 for key in act_items.keys(): hour_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"])) else: print '# not find need hour items...' print '# hour all item nums:',all_item_num print '# hour all acts nums:',len(hour_val_list) # 清空每小时抓取redis队列 self.item_queue.clearQ() # 保存每小时抓取redis队列 self.item_queue.putlistQ(hour_val_list) print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
def antPage(self): try: # 主机器需要配置redis队列 if self.m_type == 'm': category_list = self.mysqlAccess.selectJhsGroupItemCategory() if not category_list or len(category_list) == 0: category_list = self.category_list if category_list and len(category_list) > 0: cate_val_list = [] for cate in category_list: cate_val_list.append((cate[0],cate[2],cate[1],Config.ju_home_today,Config.JHS_GroupItem)) # 清空category redis队列 self.cat_queue.clearQ() # 保存category redis队列 self.cat_queue.putlistQ(cate_val_list) # 清空act redis队列 self.act_queue.clearQ() print '# category queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) else: print '# not find category...' # 类目的活动Json obj = 'cat' crawl_type = 'main' # 获取还没有开团的活动id val = (Common.time_s(Common.now()),) acts = self.mysqlAccess.selectJhsActNotStart(val) brandact_id_list = [] if acts: for act in acts: brandact_id_list.append(str(act[1])) _val = (self.begin_time, brandact_id_list) self.work.process(obj,crawl_type,_val) # 活动数据 act_val_list = self.work.items print '# act nums:', len(act_val_list) # 保存到redis队列 self.act_queue.putlistQ(act_val_list) print '# act queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if self.m_type == 'm': val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1)) # 删除Redis中上个小时结束的活动 _acts = self.mysqlAccess.selectJhsActEndLastOneHour(val) print '# end acts num:',len(_acts) self.work.delAct(_acts) # 删除Redis中上个小时结束的商品 _items = self.mysqlAccess.selectJhsItemEndLastOneHour(val) print '# end items num:',len(_items) self.work.delItem(_items) except Exception as e: print '# antpage error :',e Common.traceback_log()
def scanEndItemsLasthour(self): val = (Common.add_hours(self.crawling_time, -2),Common.add_hours(self.crawling_time, -2),Common.add_hours(self.crawling_time, -1)) _items = self.mysqlAccess.selectJhsGroupItemEndLastOneHour(val) end_items = [] # 遍历商品 for _item in _items: item_juid = _item[0] end_items.append({"item_juId":str(item_juid)}) print '# del item nums for last hour end:',len(end_items) # 删除已经结束的商品 self.delItem(end_items)
def scanAliveItems(self): # 到结束时间后的一个小时 val = (Common.time_s(self.crawling_time), Common.add_hours(self.crawling_time, -1)) # 查找已经开团但是没有结束的商品 _items = self.mysqlAccess.selectJhsGroupItemAlive(val) print "# hour all item nums:",len(_items) return _items
def brandDayList(self): # 查找需要每天统计的活动列表 # 当前时刻减去24小时 val = (Common.today_s()+" 00:00:00",Common.add_hours(self.begin_time, -24)) print '# day crawler time:',val # 商品默认信息列表 all_item_num = 0 day_val_list = [] act_items = {} item_results = self.mysqlAccess.selectJhsItemsDayalive(val) if item_results: for item in item_results: if act_items.has_key(str(item[0])): act_items[str(item[0])]["items"].append(item[2:]) else: act_items[str(item[0])] = {'act_name':item[1],'items':[]} act_items[str(item[0])]["items"].append(item[2:]) all_item_num += 1 for key in act_items.keys(): day_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"])) else: print '# not find need day items...' print '# day all item nums:',all_item_num print '# need update all acts nums:',len(day_val_list) # 清空每天抓取redis队列 self.item_queue.clearQ() # 保存每天抓取redis队列 self.item_queue.putlistQ(day_val_list) print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
def antPage(self): try: # 更新即将开团活动的商品信息 # 主机器需要配置redis队列 if self.m_type == 'm': # 一个小时即将开团 val = (Common.time_s(self.begin_time),Common.add_hours(self.begin_time, self.min_hourslot)) print '# update time:',val # 商品默认信息列表 all_item_num = 0 update_val_list = [] act_items = {} item_results = self.mysqlAccess.selectJhsItemsForUpdate(val) if item_results: for item in item_results: if act_items.has_key(str(item[0])): act_items[str(item[0])]["items"].append(item[2:]) else: act_items[str(item[0])] = {'act_name':item[1],'items':[]} act_items[str(item[0])]["items"].append(item[2:]) all_item_num += 1 for key in act_items.keys(): update_val_list.append((key,act_items[key]["act_name"],act_items[key]["items"])) else: print '# not find need update items...' print '# need update all items nums:',all_item_num print '# need update all acts nums:',len(update_val_list) # 清空redis队列 self.item_queue.clearQ() # 保存到redis队列 self.item_queue.putlistQ(update_val_list) print '# item queue end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) """ # 附加的信息 a_val = (self.begin_time,) self.work.process(self._obj, self._crawl_type, a_val) """ except Exception as e: print '# exception err in antPage info:',e Common.traceback_log()