def __init__(self, m_type): # DB #self.mysqlAccess = MysqlAccess() # mysql access # channel queue self.chan_queue = JMQ('channel','main') # act queue self.act_queue = JMQ('act','main') self.work = JMWorker() # 默认类别 self.channel_list = [ (1,'美妆','http://beauty.jumei.com/?from=all_null_index_top_nav_cosmetics&lo=3481&mat=30573') ] # 页面 self.site_page = None # 抓取开始时间 self.begin_time = Common.now() # 分布式主机标志 self.m_type = m_type
def __init__(self, m_type): # DB # self.mysqlAccess = MysqlAccess() # mysql access # channel queue self.chan_queue = JMQ("channel", "global") # item queue self.item_queue = JMQ("globalitem", "main") self.work = JMWorker() # 默认类别 self.channel_list = [(2, "聚美极速免税店", "http://www.jumeiglobal.com")] # 页面 self.site_page = None # 抓取开始时间 self.begin_time = Common.now() # 分布式主机标志 self.m_type = m_type
class JMBrand(): '''A class of JM channel''' def __init__(self, m_type): # DB #self.mysqlAccess = MysqlAccess() # mysql access # channel queue self.chan_queue = JMQ('channel','main') # act queue self.act_queue = JMQ('act','main') self.work = JMWorker() # 默认类别 self.channel_list = [ (1,'美妆','http://beauty.jumei.com/?from=all_null_index_top_nav_cosmetics&lo=3481&mat=30573') ] # 页面 self.site_page = None # 抓取开始时间 self.begin_time = Common.now() # 分布式主机标志 self.m_type = m_type def antPage(self): try: # 主机器需要配置redis队列 if self.m_type == 'm': #channel_list = self.mysqlAccess.selectJMChannel() #if not channel_list or len(channel_list) == 0: channel_list = self.channel_list if channel_list and len(channel_list) > 0: channel_val_list = [] for c in channel_list: channel_val_list.append(c+(self.begin_time,)) # 清空channel redis队列 self.chan_queue.clearQ() # 保存channel redis队列 self.chan_queue.putlistQ(channel_val_list) # 清空act redis队列 self.act_queue.clearQ() Common.log('# channel queue end') else: Common.log('# not find channel...') # channel acts obj = 'channel' crawl_type = 'main' # 获取还没有开团的活动id #val = (Common.time_s(Common.now()),) #acts = self.mysqlAccess.selectJMActNotStart(val) #act_id_list = [] #if acts: # for act in acts: # act_id_list.append(str(act[1])) #_val = (self.begin_time, brandact_id_list) _val = None self.work.process(obj,crawl_type,_val) # 活动数据 act_val_list = [] act_val = self.work.items if act_val and len(act_val.keys()) > 0: if act_val.has_key('sale'): Common.log('# act on sale nums: %d'%len(act_val['sale'])) act_val_list.extend(act_val['sale']) if act_val.has_key('coming'): Common.log('# act will coming nums: %d'%len(act_val['coming'])) act_val_list.extend(act_val['coming']) # 保存到redis队列 self.act_queue.putlistQ(act_val_list) Common.log('# act queue end') #if self.m_type == 'm': # val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1)) # # 删除Redis中上个小时结束的活动 # _acts = self.mysqlAccess.selectJMActEndLastOneHour(val) # Common.log('# end acts num: %d' % len(_acts)) # self.work.delAct(_acts) # # 删除Redis中上个小时结束的商品 # _items = self.mysqlAccess.selectJMItemEndLastOneHour(val) # Common.log('# end items num: %d' % len(_items)) # self.work.delItem(_items) except Exception as e: Common.log('# JMbrand antpage error: %s'%e) Common.traceback_log()
class JMGlobal: """A class of JM global""" def __init__(self, m_type): # DB # self.mysqlAccess = MysqlAccess() # mysql access # channel queue self.chan_queue = JMQ("channel", "global") # item queue self.item_queue = JMQ("globalitem", "main") self.work = JMWorker() # 默认类别 self.channel_list = [(2, "聚美极速免税店", "http://www.jumeiglobal.com")] # 页面 self.site_page = None # 抓取开始时间 self.begin_time = Common.now() # 分布式主机标志 self.m_type = m_type def antPage(self): try: # 主机器需要配置redis队列 if self.m_type == "m": # channel_list = self.mysqlAccess.selectJMChannel() # if not channel_list or len(channel_list) == 0: channel_list = self.channel_list if channel_list and len(channel_list) > 0: channel_val_list = [] for c in channel_list: channel_val_list.append(c + (self.begin_time,)) # 清空channel redis队列 self.chan_queue.clearQ() # 保存channel redis队列 self.chan_queue.putlistQ(channel_val_list) # 清空item redis队列 self.item_queue.clearQ() Common.log("# channel queue end") else: Common.log("# not find channel...") # global items obj = "channel" crawl_type = "global" _val = None self.work.process(obj, crawl_type, _val) # 商品数据 item_val_list = [] item_val = self.work.items if item_val and len(item_val.keys()) > 0: if item_val.has_key("sale"): Common.log("# item on sale nums: %d" % len(item_val["sale"])) item_val_list.extend(item_val["sale"]) if item_val.has_key("coming"): Common.log("# item will coming nums: %s" % len(item_val["coming"])) item_val_list.extend(item_val["coming"]) Common.log("# item val nums: %s" % len(item_val_list)) # 保存到redis队列 self.item_queue.putlistQ(item_val_list) Common.log("# item queue end") # if self.m_type == 'm': # val = (Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -2),Common.add_hours(self.begin_time, -1)) # # 删除Redis中上个小时结束的商品 # _items = self.mysqlAccess.selectJMItemEndLastOneHour(val) # Common.log('# end items num: %d' % len(_items)) # self.work.delItem(_items) except Exception as e: Common.log("# JMGlobal antpage error: %s" % e) Common.traceback_log()