Exemple #1
0
class XingZhengQuHua(WebSpyder):
    def __init__(self):
        super(XingZhengQuHua,self).__init__()
        self.top_url = 'http://www.xzqh.org/html/'
        self.encoding = 'gbk'
        self.LOG_FILE = 'XingZhengQuHua.log'
        self.location = Location()
        self.logger = LOG('XingZhengQuHua',self.LOG_FILE)
    
    
    def get_location_by_name(self,name):
        '''根据名字获得经纬度'''
        locate = self.location.geocode(name)
        return (locate.latitude, locate.longitude)
    
    
    def get_xiangzheng_jiedao(self,url):
        '''获得乡镇街道的信息'''
        data = self.get_data(url)
        soup = BeautifulSoup(data,'lxml')
        lis = soup.findAll('ul',attrs={'class':'text_list text_list_f14'})[0].findAll('li')
        parse = lambda li:(li.findAll('span')[0].get_text(),'http://www.xzqh.org/html/'+li.findAll('a')[0]['href'],li.findAll('a')[0].get_text())
        result = map(parse,lis)
        result = filter(lambda x:x[0] not in ['概况地图','历史沿革'],result)
        r_dict = {}
        r_dict['link'] = url
        for x in result:
            r_dict[x[2]] = {}
            r_dict[x[2]]['link'] = x[1]
        return r_dict
    
    def get_xianshi(self,url):
        '''获得县市的信息'''
        data = self.get_data(url)
        soup = BeautifulSoup(data,'lxml')
        uls = soup.findAll('div',attrs={'class':'cate'})[0].findAll('ul')
      
        def tmp_fun(ul):
            if ul == None:
                return []
            lis = ul.findAll('li')
            result = []
            for li in lis:
                if li == None:
                    continue
                result.append((li.get_text(),'http://www.xzqh.org/html/'+li.findAll('a')[0].attrs['href']))
            return result
            
        result = map(tmp_fun,uls)
        result = filter(lambda x:x[0] not in ['概况地图','历史沿革'],result)
            
        result_dict = {}
        key_0 = result[0][0][0]
        result_dict[key_0] = {}
        
        def get_line_dict(line):
            tmp = {}
            tmp['link'] = line[0][1]
            for v in line[1:]:
                tmp[v[0]] = {}
                tmp[v[0]] = self.get_xiangzheng_jiedao(v[1])
            return (line[0][0],tmp)
        
        def get_total_dict(line):
            item = get_line_dict(line)
            result_dict[key_0][item[0]] = item[1]
        
        map(get_total_dict,result[1:])

        import json
        f = open('a.txt','w')
        f.write(json.dumps(result_dict).encode('utf8'))
        f.close()
        
        return result_dict