def detail_parse(self, id, build_list): for build in build_list: bu_temp = re.search('<a href="(.*?)"', build).group(1) build_url = self.start_url + bu_temp try: bu_res = requests.get(build_url, headers=self.headers) time.sleep(2) bu_text = bu_res.content.decode() bu = Building(co_index) bu.bu_num = re.search('幢号:(.*?) 许', bu_text).group(1) bu.bu_pre_sale = re.search('许可证号:<span>(.*?)</span>', bu_text).group(1) bu.bu_id = int(bu.bu_pre_sale) bu.bu_all_house = re.search('套数:<span>(.*?)</span', bu_text).group(1) bu.bu_floor = re.search('地上层数:<span>(.*?)</span', bu_text).group(1) bu.bo_build_end_time = re.search('竣工日期:<span>(.*?)</span', bu_text).group(1) bu.bu_build_size = re.search('预售许可面积:<span>(.*?)</span', bu_text).group(1) bu.bu_type = re.search('用途:<span>(.*?)</span', bu_text).group(1) bu.insert_db() except Exception as e: log.error("楼栋出错{}".format(e)) continue self.house_detail(bu_text, id, bu.bu_id)
def get_build_info(self, build_lis, co_id): for build_ in build_lis: build_url = "http://xx.yyfdcw.com" + build_ try: build_res = requests.get(build_url, headers=self.headers) except Exception as e: print("co_index={},楼栋信息错误".format(co_index), e) continue con = build_res.text bu = Building(co_index) bu.co_id = co_id bu.bu_id = re.search('Bid=(\d+)', build_).group(1) bu.bu_num = re.search('名称.*?">(.*?)</spa', con).group(1) bu.bu_pre_sale = re.search("编.*?red'>(.*?)</a", con).group(1) bu.bu_pre_sale_date = re.search('颁发日期.*?Date">(.*?)</span', con).group(1) bu.bo_build_start_time = re.search('开工日期.*?">(.*?)</span', con).group(1) bu.bo_build_end_time = re.search('竣工日期.*?">(.*?)</span', con).group(1) bu.bo_develops = re.search('单位.*?">(.*?)</span', con).group(1) bu.bu_floor = re.search('层数.*?">(.*?)</span', con).group(1) bu.bu_live_size = re.search('住宅面积.*?">(.*?)</span', con).group(1) bu.size = re.search('总面积.*?">(.*?)</span', con).group(1) bu.insert_db() id = re.search('测量号.*?">(.*?)</span', con).group(1) self.get_house_info(co_id, bu.bu_id, id)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[ 0] response = requests.get(build_url, headers=self.headers) html = response.text build.bu_id = i[0] build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1) build.insert_db() self.get_house_info(build.bu_id) except Exception as e: print('请求错误,url={}'.format(build_url), e)