def reload_handler(self,signum,frame): tmp = get_Maps() if tmp: base.url_maps = tmp print 'reload the maps config file ok ...' print base.url_maps else: print 'reload the maps config file failed ...'
def Rules(self): #linkbase linkbase = getRedis() url_list = DQueue(linkbase,'url_list') url_set = Record(linkbase, 'crawled_set') base.url_maps = get_Maps() signal.signal(60, self.reload_handler) list = { 'url':url_list, 'url_set':url_set, } self.AddRules(list, 'Parse_json', 'url', 10)
def Rules(self): #linkbase linkbase = getRedis(2) #linkbase.flushdb() db = BaseDb() db.connectdb() db.getAllCategorys() category_links = Categoryids(linkbase) url_list = DQueue(linkbase,'url_news') # category_links.set('aaaasw222','zhz') # print category_links.get('zhz') # sys.exit(0) for store in drugstoreurl: url_set = Record(linkbase, store) #print url_list.len() #sys.exit() #for i in xrange(30): #url = url_list.pop() #url_set.delete(url,store) #url_set.delete(url,'crawled_set') #print url_list.len() # print url_list.len() if(url_list.len() == 0): for item in base.category_ids: if(store == 'http://search.jianke.com/prod'): url = store+'?wd='+item['name']+'&catagoryid='+str(item['id']) elif(store == 'http://www.jxdyf.com/search'): url = store+'/'+item['name']+'.html?catagoryid='+str(item['id']) elif(store == 'http://search.360kad.com'): url = store+'?pageText='+item['name']+'&catagoryid='+str(item['id']) elif(store == 'http://www.ehaoyao.com/search'): url = store+'/search/'+item['name']+'?catagoryid='+str(item['id']) elif(store == 'http://www.yaofang.cn/n/public/search'): url = store+'?s_words='+item['name']+'&sort=interrelated&catagoryid='+str(item['id']) url_list.push(url) #url_list.pop() #print url_list.len() #sys.exit(0) base.url_maps = get_Maps() signal.signal(60, self.reload_handler) list = { 'url':url_list, 'url_set':url_set, 'category_links':category_links } self.AddRules(list, 'Parse_url', 'url', 10)
def Rules(self): #linkbase linkbase = getRedis() url_list = DQueue(linkbase,'url_list') url_set = Record(linkbase, 'crawled_set') base.url_maps = get_Maps() signal.signal(60, self.reload_handler) list = { 'url':url_list, 'url_set':url_set, } self.AddRules(list, 'Parse_url', 'url', 10)