def bu_info(self, bu_list, co_id): for bu in bu_list: try: bu_url = 'http://www.fxfdcw.com/' + bu res = requests.get(bu_url, headers=self.headers) con = res.content.decode('gbk') html = etree.HTML(con) build = Building(co_index) build.co_id = co_id build.bu_id = re.search('bdid=(\d+)', bu).group(1) build.bu_num = re.search('楼号.*?">(.*?)</', con, re.S | re.M).group(1) build.bu_address = re.search('坐落.*?">(.*?)</', con, re.S | re.M).group(1) build.bu_floor = re.search('地上层数.*?">(.*?)</', con, re.S | re.M).group(1) build.bu_build_size = re.search('建筑面积.*?wrap">(.*?)</', con, re.S | re.M).group(1) build.bu_all_house = re.search('套 数.*?">(.*?)</', con, re.S | re.M).group(1) build.bu_type = re.search('用 途.*?wrap">(.*?)</', con, re.S | re.M).group(1) build.insert_db() ho_list = html.xpath("//span[@title]") except Exception as e: # log.error("楼栋信息错误{}".format(e)) print("楼栋信息错误{}".format(e)) continue self.ho_info(ho_list, co_id, build.bu_id)
def build_parse(self, co_id): list_url = 'http://www.ccfdw.gov.cn/ecdomain/lpcs/xmxx/loulist.jsp?Id_xmxq=' + co_id res = requests.get(list_url, headers=self.headers) con = res.content.decode() build_id_list = re.findall("searchByLid\('(\d+)'\)", con) for build_id in build_id_list: try: bu_url = 'http://www.ccfdw.gov.cn/ecdomain/lpcs/xmxx/lpbxx_new.jsp?lid=' + build_id bu_res = requests.get(bu_url, headers=self.headers) bu_con = bu_res.content.decode('gbk') bu = Building(co_index) bu.co_id = co_id bu.bu_id = build_id bu.bu_num = re.search('楼栋名称.*?">(.*?)</td', bu_con, re.S | re.M).group(1) bu.bu_all_house = re.search('总套数.*?">总(.*?)套</td', bu_con, re.S | re.M).group(1) bu.bu_floor = re.search('地上层数.*?">共(.*?)层</td', bu_con, re.S | re.M).group(1) bu.bu_build_size = re.search('总建筑面积.*?">(.*?)</td', bu_con, re.S | re.M).group(1) bu.bu_pre_sale = re.search("searchysxk\('(.*?)'\)", bu_con, re.S | re.M).group(1) bu.bu_type = re.search('房屋用途.*?">(.*?)</td', bu_con, re.S | re.M).group(1) bu.insert_db() except Exception as e: log.error('{}楼栋错误{}'.format(build_id, e)) self.house_parse(co_id, build_id, bu_con)
def detail_parse(self, id, build_list): for build in build_list: bu_temp = re.search('<a href="(.*?)"', build).group(1) build_url = self.start_url + bu_temp try: bu_res = requests.get(build_url, headers=self.headers) time.sleep(2) bu_text = bu_res.content.decode() bu = Building(co_index) bu.bu_num = re.search('幢号:(.*?) 许', bu_text).group(1) bu.bu_pre_sale = re.search('许可证号:<span>(.*?)</span>', bu_text).group(1) bu.bu_id = int(bu.bu_pre_sale) bu.bu_all_house = re.search('套数:<span>(.*?)</span', bu_text).group(1) bu.bu_floor = re.search('地上层数:<span>(.*?)</span', bu_text).group(1) bu.bo_build_end_time = re.search('竣工日期:<span>(.*?)</span', bu_text).group(1) bu.bu_build_size = re.search('预售许可面积:<span>(.*?)</span', bu_text).group(1) bu.bu_type = re.search('用途:<span>(.*?)</span', bu_text).group(1) bu.insert_db() except Exception as e: log.error("楼栋出错{}".format(e)) continue self.house_detail(bu_text, id, bu.bu_id)
def analyzer_comm_url(self, comm_url_list): all_url = [] for i in comm_url_list: try: res = requests.get(i) html = res.content.decode('gbk') c = Comm(self.co_index) c.co_name = re.search('项目名称:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 项目名称 c.co_address = re.search('项目地址:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 项目地址 c.co_develops = re.search('开发商:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 开发商 c.co_build_size = re.search( '总建筑面积:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 建筑面积 c.co_land_type = re.search( '用地依据:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 土地使用证 c.co_all_house = re.search( '>总套数:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 总套数 c.area = re.search('所在区域:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 地区 area c.co_work_pro = re.search( '施工许可证:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 施工许可证 c.co_plan_pro = re.search( '建设工程规划许可证:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1) # 规划许可证 c.insert_db() buildlist = re.findall('onmouseover.*?</TR>', html, re.S | re.M) url_list = [] for k in buildlist: try: b = Building(self.co_index) build_list = re.findall('<TD.*?>(.*?)</TD>', k, re.S | re.M) b.co_name = build_list[1] b.bu_num = build_list[2] b.bu_type = build_list[4] b.insert_db() house_url = re.findall('href="(.*?)"', k, re.S | re.M) for j in house_url: url_list.append( 'http://www.stfcj.gov.cn/stsite/ProjectList/' + j) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, i), e) all_url = all_url + url_list except Exception as e: print('小区错误,co_index={},url={}'.format(co_index, i), e) return all_url
def bu_info(self, bu_list, co_id): for bu_ in bu_list[1:]: bu = Building(co_index) bu.co_id = co_id bu.bu_num = bu_.xpath("./td/a/text()")[0] bu.bu_pre_sale = bu_.xpath("./td[2]/text()")[0] bu.bu_type = bu_.xpath("./td[4]/text()")[0] bu_url = bu_.xpath("./td/a/@href")[0] bu.bu_id = re.search('buildid=(\d+)', bu_url).group(1) bu.insert_db() self.ho_info(bu_url, co_id, bu.bu_id)
def comm(self, id): bu = Building(co_index) house_url = self.start_url + "/api/buildInfos/getHouseInfosByPannelNumber?pannelNumber=" + str( id) comm_url = self.start_url + "/api/buildInfos/getHomePageBuildingInfo?blockNumber=" + str( id) comm_detail_url = self.start_url + "/api/buildInfos/getDetailsBuildingInfo?blockNumber=" + str( id) comm_res = requests.get(comm_url) comm_detail_res = requests.get(comm_detail_url) house_res = requests.get(house_url) comm_dict = json.loads(comm_res.text) comm_detail_dict = json.loads(comm_detail_res.text) house_dict = json.loads(house_res.text) bu.bu_id = id bu.bu_num = comm_dict["data"]["nameBuildings"] bu.area = comm_detail_dict['data']['houseingArea'] bu.bu_address = comm_dict["data"]["houseaddress"] bu.bu_pre_sale = comm_detail_dict["data"]["yszh"] bu.bu_type = comm_dict["data"]["propertycategory"] bu.bo_develops = comm_dict["data"]["companyName"] bu.insert_db() house_num = house_dict["data"] for hu in house_num: ho = House(co_index) h = hu["data"] if len(h) > 0: for i in h: try: room_id = i["houseNumber"] room_url = self.start_url + "/api/buildInfos/getHouseInfoByHouseNumber?houseNumber=" + str( room_id) res = requests.get(room_url, headers=self.headers) dict = json.loads(res.text) ho.bu_id = id # ho.ho_num = room_id ho.ho_name = dict["data"]["houseNo"] ho.ho_build_size = dict["data"]["buildArea"] ho.ho_true_size = dict["data"]["jacketArea"] ho.ho_share_size = dict["data"]["apportionedArea"] ho.ho_floor = dict["data"]["nominalLevel"] ho.insert_db() except Exception as e: print(e) else: continue
def build_parse(self, co_id): bu = Building(co_index) url = "http://spf.tlfdc.cn/prjleft.aspx?projectid=" + str(co_id) res = requests.get(url, headers=self.headers) con_html = etree.HTML(res.text) build_url_list = con_html.xpath("//td[@colspan='2']/a/@href")[4:-1] a = con_html.xpath("//td[@width='54%']") for index in range(0, len(build_url_list)): try: build_info_url = "http://spf.tlfdc.cn/" + build_url_list[index] res = requests.get(build_info_url, headers=self.headers) con = res.text bu.co_id = co_id bu.bu_pre_sale_date = re.search('发证日期.*?Date">(.*?)<', con, re.S | re.M).group(1) bu.bu_num = re.search('幢.*?did">(.*?)<', con, re.S | re.M).group(1) bu.bu_pre_sale = re.search('编号.*?no">(.*?)<', con, re.S | re.M).group(1) bu.bu_address = re.search('位置.*?ss">(.*?)<', con, re.S | re.M).group(1) bu.bu_build_size = re.search('面积.*?Area">(.*?)<', con, re.S | re.M).group(1) bu.bu_type = re.search('性质.*?type">(.*?)<', con, re.S | re.M).group(1) bu.bu_all_house = re.search('套数.*?number">(.*?)<', con, re.S | re.M).group(1) bu.bu_id = re.search('id=(\d+)', build_url_list[index]).group(1) bu.insert_db() except Exception as e: print( '楼栋错误,co_index={},url={}'.format(co_index, build_info_url), e) continue try: house_url = a[index].xpath("./a/@href")[0] self.house_parse(house_url, co_id, bu.bu_id) except Exception as e: continue
def get_build_info(self, presell_url_list, co_id): for presell_url in presell_url_list: pre_url = self.url + presell_url res = requests.get(pre_url, headers=self.headers) build_url_list = re.findall('【<a href="(.*?)" target="_self"', res.text, re.S | re.M) for build_url in build_url_list: build_info_url = self.url + build_url try: build_res = requests.get(build_info_url, headers=self.headers) con = build_res.text bu = Building(co_index) bu.co_id = co_id bu.bu_id = re.search('ID=(\d+)', build_url).group(1) bu.bu_num = re.search('栋.*?号.*?BuildingName">(.*?)</span', con, re.S | re.M).group(1) bu.bu_floor = re.search('总 层 数.*?(\d+)</span', con, re.S | re.M).group(1) bu.bu_build_size = re.search('建筑面积.*?Jzmj">(.*?)</span', con, re.S | re.M).group(1) bu.bu_live_size = re.search('住宅面积.*?Zzmj">(.*?)</span', con, re.S | re.M).group(1) bu.bu_not_live_size = re.search( '非住宅面积.*?Fzzmj">(.*?)</span', con, re.S | re.M).group(1) bu.bu_pre_sale = re.search('预售许可证.*?xkzh">(.*?)</span', con, re.S | re.M).group(1) bu.bu_pre_sale_date = re.search('发证日期.*?fzrq">(.*?)</span', con, re.S | re.M).group(1) bu.bu_type = re.search('项目类型.*?Type">(.*?)</span', con, re.S | re.M).group(1) bu.insert_db() except Exception as e: print("co_index={},楼栋信息错误".format(co_index), e) continue house_detail_list = re.findall("getMoreHouseInfo\('(.*?)'\)\"", con, re.S | re.M) self.get_house_info(co_id, bu.bu_id, house_detail_list)
def bu_parse(self, co_id, bulist): for bo in bulist: bu_url = "http://110.89.45.7:8082" + bo bu_res = requests.get(bu_url, headers=self.headers) con = bu_res.text bu = Building(co_index) bu.co_id = co_id bu.bu_id = re.search('buildingInfoID=(.*?)&', bo).group(1) bu.bu_num = re.search('幢号.*?">(.*?)</', con, re.S | re.M).group(1) bu.bu_floor = re.search('总 层 数.*?">(.*?)</', con, re.S | re.M).group(1) bu.bu_live_size = re.search('批准销售.*?">.*?</td.*?">(.*?)</td', con, re.S | re.M).group(1) bu.bu_all_size = re.search('总面积.*?">(.*?)</', con, re.S | re.M).group(1) bu.bu_type = re.search('设计用途.*?">(.*?)</', con, re.S | re.M).group(1) bu.insert_db() bu_html = etree.HTML(con) ho_list = bu_html.xpath("//td[@style]/a") self.ho_parse(co_id, bu.bu_id, ho_list)
def get_build_info(self, build_url_list, co_id): for i in build_url_list: build_url = 'http://www.fjlyfdc.com.cn/' + i try: build = Building(co_index) response = requests.get(build_url, headers=self.headers) html = response.text build.bu_id = re.search('buildingInfoID=(.*?)&', build_url).group(1) build.co_id = co_id build.bo_develops = re.search('开发商:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_name = re.search('项目名称:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_address = re.search('坐落位置:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_num = re.search('幢号:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_build_structural = re.search('建筑结构:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_type = re.search('设计用途:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_floor = re.search('总 层 数:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_all_size = re.search('总面积:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bo_build_start_time = re.search('开工日期:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.insert_db() house_url_list = re.findall('href="(/House/HouseInfo\?HouseCenterID=.*?)"', html, re.S | re.M) self.get_house_info(house_url_list, build.bu_id, co_id) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
def get_comm_info(self,comm_info): co = Comm(co_index) co.co_name = re.search('_blank">(.*?)</a',comm_info).group(1) try: co.co_address = re.findall('px">(.*?)</td',comm_info)[1] except: co.co_address = None co.area = re.search('center">(.*?)</td>',comm_info).group(1) co_detail_url = re.search("href='(.*?)'",comm_info).group(1) co_url = "http://www.qyfgj.cn/newys/"+co_detail_url try: res = requests.get(co_url,headers=self.headers) except Exception as e: print("co_index={}小区未请求到".format(co_index),e) con = res.content.decode('gbk') try: co.co_develops = re.search('开发商名称.*?px;">(.*?)</a',con,re.S|re.M).group(1) co.co_all_house = re.search('总套数.*?">(\d+) ',con,re.S|re.M).group(1) co.co_all_size = re.search('总面积.*?">(\d+.\d+) m',con,re.S|re.M).group(1) except: print("小区无开发商等信息") co.insert_db() try: build = re.findall('<tr bgcolor="white">(.*?)</tr>',con,re.S|re.M) except: print("小区没有楼栋信息") build_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36', 'Cookie': 'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j', 'Referer': co_url } for build_info in build: if "进入" in build_info: build_url = re.search('href="(.*?)"><font',build_info).group(1) build_url = "http://www.qyfgj.cn/newys/" + build_url ho_headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36', 'Cookie': 'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j', 'Referer': build_url } build_res = requests.get(build_url, headers=build_headers) build_con = build_res.content.decode('gbk') if re.search('ID=(\d+)',build_url): #现售 bu = Building(co_index) bu_id = re.search('ID=(\d+)',build_url).group(1) bu.bu_id = bu_id bu.co_name =co.co_name bu.insert_db() self.get_house_info(headers=ho_headers,bu_id=bu_id,url=build_url) else: #预售 bu = Building(co_index) bu.co_name = co.co_name bu.bu_type = re.search('用途.*?">(.*?)</td>', build_con, re.S | re.M).group(1) bu.bu_pre_sale = re.search('许可证编号.*?_blank">(.*?)</a>', build_con, re.S | re.M).group(1) bu.bu_pre_sale_date = re.search('有效日期.*?">(.*?)</td>', build_con, re.S | re.M).group(1) bu.bu_address = re.search('项目座落.*?">(.*?)</td>', build_con, re.S | re.M).group(1) ret = re.findall('<tr onmouseover(.*?)</tr',build_con,re.S|re.M) for i in ret: house_url = re.search('href="(.*?)"',i).group(1) house_url = "http://www.qyfgj.cn/newys/" + house_url bu.bu_id = re.search('dbh=(.*?)&',i).group(1) bu.bu_num = re.search('<td width="89.*?">(.*?)</',i).group(1) bu.bu_floor = re.search('<td width="84.*?">(\d+)</td',i).group(1) bu.insert_db() ho_res = requests.get(house_url,headers=ho_headers) ho_con = ho_res.content.decode('gbk') new_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36', 'Cookie': 'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j', 'Referer': house_url } self.get_house_info(ho_con=ho_con,headers=new_headers,bu_id=bu.bu_id) else: print("楼栋无链接地址")
def get_build_detail(self, all_building_url_list): house_url_list = [] for i in all_building_url_list: try: response = requests.get(i, headers=self.headers) html = response.text tree = etree.HTML(html) bo_develops = tree.xpath( '//*[@id="content_1"]/div[3]/text()[2]')[0] # 开发商 bu_build_size = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[6]/a/text()') # 销售面积 if bu_build_size: bu_build_size = bu_build_size[0] bu_pre_sale = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[1]/a/text()') # 预售证书 if bu_pre_sale: bu_pre_sale = bu_pre_sale[0] bu_floor = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[3]/a/text()')[0] # 总层数 bu_all_house = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[4]/a/text()')[0] # 总套数 bu_type = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[5]/a/text()')[0] # 房屋用途 build_html = re.search('houseTable_1.*?当前共有', html, re.S | re.M).group() build_detail_html = re.findall( 'class.*?</a></td>.*?</a></td>.*?</a></td>', build_html, re.S | re.M) bu_num = re.findall('项目名称:</b>(.*?)</div>', html, re.S | re.M)[0].strip() url_list = [] for bu in build_detail_html: try: build = Building(co_index) build.bu_id = re.search( "href='roomTable.aspx\?id=(.*?)&", bu, re.S | re.M).group(1) build.bu_address = re.search( "_blank.*?_blank'>(.*?)</a></td><td>", bu, re.S | re.M).group(1).strip() build.bo_develops = bo_develops build.bu_build_size = bu_build_size build.bu_pre_sale = bu_pre_sale build.bu_num = bu_num build.bu_floor = bu_floor build.bu_all_house = bu_all_house build.bu_type = bu_type for k in self.area_list: if k in build.bu_address: build.area = k continue build.insert_db() house_url = re.search( "(roomTable.aspx\?id=.*?&vc=.*?)'", bu, re.S | re.M).group(1) url_list.append( 'http://dgfc.dg.gov.cn/dgwebsite_v2/Vendition/' + house_url) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, i), e) house_url_list = url_list + house_url_list except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, i), e) return house_url_list
def start_crawler(self): response = requests.get(url) html = response.text tree = etree.HTML(html) comm_list = tree.xpath('//tr[@class="Row"]/td[1]/text()') co_develops_list = tree.xpath('//tr[@class="Row"]/td[3]/text()') co_address_list = tree.xpath('//tr[@class="Row"]/td[8]/text()') co_open_time_list = tree.xpath('//tr[@class="Row"]/td[9]/text()') co_pre_sale_list = tree.xpath('//tr[@class="Row"]/td[5]/text()') co_all_house_list = tree.xpath('//tr[@class="Row"]/td[11]/text()') co_build_size_list = tree.xpath('//tr[@class="Row"]/td[10]/text()') co_name_list = tree.xpath('//tr[@class="Row"]/td[4]/text()') for co in range(0, len(comm_list)): try: comm = Comm(co_index) comm_url = 'http://www.jyfg.cn/HouseWebSetup/PublicReport/PreSellLicenceDetailInfo.aspx?PreSellLicenceSN=' + \ comm_list[ co] result = requests.get(comm_url) html_build = result.text tree = etree.HTML(html_build) build_list = tree.xpath('//tr[@class="Row"]/td[1]/text()') area = tree.xpath('//*[@id="LabSCFW"]/text()')[0] comm.co_id = comm_list[co] comm.area = area comm.co_develops = co_develops_list[co] comm.co_address = co_address_list[co] comm.co_open_time = co_open_time_list[co] comm.co_pre_sale = co_pre_sale_list[co] comm.co_all_house = co_all_house_list[co] comm.co_build_size = co_build_size_list[co] comm.co_develops = co_develops_list[co] comm.co_name = co_name_list[co] comm.insert_db() for bu in range(0, len(build_list)): try: build_url = 'http://www.jyfg.cn/HouseWebSetup/PublicReport/PubRptHouseList.aspx?BuildingSN=' + \ build_list[bu] res = requests.get(build_url, headers=self.headers) con = res.content.decode('gbk') building = Building(co_index) building.co_id = comm.co_id building.bu_id = build_list[bu] building.bu_num = re.search( '栋号.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_build_size = re.search( '总建筑面积.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_floor = re.search( '层数.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_all_house = re.search( '预售套数.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_pre_sale_date = re.search( '有效期.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_type = re.search( '土地用途.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.bu_pre_sale = re.search( '许可证编号.*?<span.*?">(.*?)</span', con, re.S | re.M).group(1) building.insert_db() house_list = re.findall('房号:<a href="(.*?)"', con) for ho in house_list: try: house = House(co_index) house_url = 'http://www.jyfg.cn/HouseWebSetup/PublicReport/' + ho respon = requests.get(house_url) html = respon.text house.co_id = comm.co_id house.bu_id = building.bu_id house.ho_name = re.search( '房号:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.ho_build_size = re.search( '预测建筑面积:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.ho_true_size = re.search( '预测套内面积:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.ho_share_size = re.search( '预测分摊面积:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.ho_type = re.search( '房屋用途:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.ho_room_type = re.search( '户型结构:.*?<span.*?>(.*?)<', html, re.M | re.S).group(1) house.insert_db() except Exception as e: print("co_index={},房屋{}信息提取失败".format( co_index, house_url)) print(e) continue except Exception as e: print(e) print('co_idnex={},楼栋{}提取失败'.format( co_index, build_url)) continue except Exception as e: print('co_index={},小区{}提取失败'.format(co_index, comm_url)) print(e) continue