Exemple #1
0
 def __init__(self,list):
     self.parser = etree.HTMLParser(encoding = 'utf-8')
     self.db = BaseDb()
     self.db.connectdb()
     self.category_links = list['category_links']
     self.url_list = list['url']
     self.url_set = list['url_set']
Exemple #2
0
    def Rules(self):
        #linkbase
        linkbase = getRedis(2)
        #linkbase.flushdb()
        db = BaseDb()
        db.connectdb()
        db.getAllCategorys()
        
        category_links = Categoryids(linkbase)
        url_list = DQueue(linkbase,'url_news')
#         category_links.set('aaaasw222','zhz')
#         print category_links.get('zhz')
#         sys.exit(0)
        for store in drugstoreurl:
            url_set = Record(linkbase, store)
            #print url_list.len()
            #sys.exit()
            #for i in xrange(30):
                #url = url_list.pop()
                #url_set.delete(url,store)
                #url_set.delete(url,'crawled_set')
                #print url_list.len()
#              print url_list.len()
            if(url_list.len() == 0):
                for item in base.category_ids:
                    if(store == 'http://search.jianke.com/prod'):
                        url = store+'?wd='+item['name']+'&catagoryid='+str(item['id'])
                    elif(store == 'http://www.jxdyf.com/search'):
                        url = store+'/'+item['name']+'.html?catagoryid='+str(item['id'])
                    elif(store == 'http://search.360kad.com'):
                        url = store+'?pageText='+item['name']+'&catagoryid='+str(item['id'])
                    elif(store == 'http://www.ehaoyao.com/search'):
                        url = store+'/search/'+item['name']+'?catagoryid='+str(item['id'])
                    elif(store == 'http://www.yaofang.cn/n/public/search'):
                        url = store+'?s_words='+item['name']+'&sort=interrelated&catagoryid='+str(item['id'])
                    
                    url_list.push(url)
            #url_list.pop()
            #print url_list.len()
            #sys.exit(0)
            base.url_maps = get_Maps()
            signal.signal(60, self.reload_handler)
            list = {
                    'url':url_list,
                    'url_set':url_set,
                    'category_links':category_links
                    }
            self.AddRules(list, 'Parse_url', 'url', 10)