def antPageComing(self, val): page, catId, catName, position, begin_time = val self.initItemComing(page, catId, catName, position, begin_time) self.itemConfig() time_gap = Common.subTS_hours(int(float(self.brandact_starttime)/1000), self.crawling_time) if 0 <= time_gap: try: # 品牌团页面html self.brandPage() # 活动优惠 self.brandActConpons() except Exception as e: print '# exception err brand coming get brand page:', e Common.traceback_log() else: self.crawling_confirm = 2
def antPageMain(self, val): page, catId, catName, position, begin_time, brandid_list = val self.initItem(page, catId, catName, position, begin_time) self.itemConfig() # 还没有开团的活动 time_gap = Common.subTS_hours(int(float(self.brandact_starttime)/1000), self.crawling_time) if 0 <= time_gap: # 不抓俪人购的商品 if self.brandact_sign != 3: # 品牌团页面html self.brandPage() # 活动优惠 self.brandActConpons() if str(self.brandact_id) not in brandid_list or self.beginH_gap > time_gap: # 活动页面商品 self.brandActItems() if self.beginH_gap > time_gap: self.crawling_confirm = 0 else: self.crawling_confirm = 2
def run_act(self, msg): # 默认数据 msg_val = msg["val"] print '# act start:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) act_obj = None if self._crawl_type == 'main': act_obj = JHSAct() act_obj.antPageMain(msg_val) elif self._crawl_type == 'check': act_obj = JHSAct() act_obj.antPageCheck(msg_val) elif self._crawl_type == 'position': act_obj = JHSAct() act_obj.antPageParser(msg_val) print '# act end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if self._crawl_type == 'position': brandact_id,brandact_name,brandact_url,brandact_sign,brandact_status,val = act_obj.outTupleForPosition() if int(brandact_sign) != 3: if act_obj.brandact_starttime and act_obj.brandact_starttime != 0.0 and 1 >= Common.subTS_hours(int(float(act_obj.brandact_starttime)/1000), self.crawling_time): print '# insert activity position, id:%s name:%s'%(act_obj.brandact_id, act_obj.brandact_name) self.mysqlAccess.insertJhsActPosition_hour(val) elif brandact_status != '' and brandact_status != 'blank': print '# insert activity position, id:%s name:%s'%(act_obj.brandact_id, act_obj.brandact_name) self.mysqlAccess.insertJhsActPosition_hour(val) else: act_keys = [self.worker_type, str(act_obj.brandact_id)] prev_act = self.redisAccess.read_jhsact(act_keys) # 是否需要抓取商品 if act_obj and act_obj.crawling_confirm != 2: # 保存的活动信息 self.putActDB(act_obj, prev_act) # 活动中的商品 items_list = [] # 只取非俪人购商品 if int(act_obj.brandact_sign) != 3: if act_obj.crawling_confirm == 0: #更新马上开团活动中商品位置 self.update_actItems_position(act_obj) # 多线程抓商品 items_list = self.run_actItems(act_obj, prev_act) else: print '# ladygo activity id:%s name:%s'%(act_obj.brandact_id, act_obj.brandact_name) #print '# pro act start:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 处理活动信息 #self.procAct(act_obj, prev_act, items_list) # 处理活动redis信息 self.procActRedis(act_obj, prev_act, items_list) #print '# pro act end:',time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) else: self.update_startact(act_obj, prev_act) print '# Already start activity, id:%s name:%s'%(act_obj.brandact_id, act_obj.brandact_name)