예제 #1
0
    def ho_parse(self, bid, co_id):

        payload = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><param funname="SouthDigital.CMS.CBuildTableEx.GetBuildHTMLEx"><item>'\
              +bid+'</item><item>1</item><item>1</item><item>100</item><item>1000</item><item>g_oBuildTable</item><item> 1=1</item><item>1</item></param>'
        payload = parse.quote(payload)
        try:
            res = requests.post(
                'http://www.hbsfdc.com/Common/Agents/ExeFunCommon.aspx',
                data=payload,
                headers=self.headers)
        except Exception as e:
            log.error("{}楼栋请求失败".format(bid))
        con = res.content.decode()
        ho_list = re.findall("title='(.*?)'>", con, re.S | re.M)
        for ho in ho_list:
            house = House(co_index)
            house.co_id = co_id
            house.bu_id = bid
            house.ho_name = re.search('房号:(.*)', ho).group(1)
            house.ho_type = re.search('用途:(.*)', ho).group(1)
            house.ho_room_type = re.search('户型:(.*)', ho).group(1)
            house.ho_build_size = re.search('总面积:(.*)', ho).group(1)
            if re.search('售价:(.*)', ho):
                house.ho_price = re.search('售价:(.*)', ho).group(1)
            else:
                house.ho_price = None
            house.insert_db()
예제 #2
0
 def house_info(self,ho_url,co_id,bu_id):
     url = "http://222.77.178.63:7002/" + ho_url
     url.rstrip('=')
     res = requests.get(url,headers=self.headers)
     res.encoding = 'gbk'
     html = etree.HTML(res.text)
     house_detail_list = html.xpath("//td/a[@target]/@href")
     for house_detail in house_detail_list:
         try:
             detail_url = "http://222.77.178.63:7002/" + house_detail
             detail_res = requests.get(detail_url,headers=self.headers)
             detail_res.encoding = 'gbk'
             con = detail_res.text
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('室号.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_floor = re.search('实际层.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_type = re.search('房屋类型.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_build_size = re.search('预测建筑面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_true_size = re.search('预测套内面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_share_size = re.search('预测分摊面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_price = re.search('总价.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.insert_db()
         except Exception as e:
             # log.error('房屋信息错误{}'.format(e))
             print('房屋信息错误{}'.format(e))
예제 #3
0
    def ho_info(self, url, co_id, bu_id):
        ho_url = 'http://www.aqhouse.net/' + url
        while True:
            try:
                proxy = self.proxies[random.randint(0, 9)]
                ho_res = requests.get(ho_url,
                                      headers=self.headers,
                                      proxies=proxy)
                break
            except Exception as e:
                print(e)
        ho_html = etree.HTML(ho_res.text)
        room_list = ho_html.xpath("//td[@nowrap]/a/..")
        for room in room_list:
            try:
                room_info = room.xpath("./@title")[0]
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = room.xpath("./a/text()")[0]
                ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_true_size = re.search('套内面积:(.*?)平方米',
                                            room_info).group(1)
                ho.ho_share_size = re.search('分摊面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_room_type = re.search('套型:(.*)', room_info).group(1)
                ho.ho_price = re.search('价格.*?:(.*?)元/平方米', room_info).group(1)

                ho.insert_db()
            except:
                print('房屋解析失败')
예제 #4
0
 def house_parse(self,bu_id,co_id,sid,propertyid):
     data = {
         'propertyid':propertyid,
         'sid':sid,
         'buildingid':bu_id,
         'tid':'price',
         'page':1
     }
     res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm',data=data,headers=self.headers)
     page = re.search('页数.*?/(\d+)',res.text).group(1)
     for i in range(1,int(page)+1):
         data['page'] = i
         ho_res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm', data=data, headers=self.headers)
         con  = ho_res.text
         ho_html = etree.HTML(con)
         house_list = ho_html.xpath("//tr[@onmouseout]")
         for house in house_list:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = house.xpath("./td[3]/a/div/text()")[0]
             ho.unit = house.xpath("./td[2]/a/div/text()")[0]
             buildsize = house.xpath("./td[4]/a/div/span/@class")
             truesize = house.xpath("./td[5]/a/div/span/@class")
             price = house.xpath("./td[9]/a/div/span/@class")
             ho.ho_build_size = self.number_replace(buildsize)
             ho.ho_true_size = self.number_replace(truesize)
             ho.ho_price = self.number_replace(price)
             ho.insert_db()
예제 #5
0
    def house_parse(self, house_url, co_id, bu_id):
        ho = House(co_index)
        url = "http://spf.tlfdc.cn/" + house_url
        res = requests.get(url, headers=self.headers)
        con = res.text

        ho_name = re.findall('室号:(.*?)套', con, re.S | re.M)
        ho_room_type = re.findall('套型:(.*?)建', con, re.S | re.M)
        ho_build_size = re.findall('建筑面积:(.*?)参', con, re.S | re.M)
        ho_price = re.findall('价格:(.*?)元', con, re.S | re.M)
        ho_detail = re.findall('href="(show.*?\?id=\d+&id2=\d+&prjid=\d+)"',
                               con, re.S | re.M)
        for index in range(0, len(ho_name)):
            try:
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = ho_name[index]
                ho.ho_room_type = ho_room_type[index]
                ho.ho_build_size = ho_build_size[index]
                ho.ho_price = ho_price[index]
                ho_detail_url = "http://spf.tlfdc.cn/" + ho_detail[index]
                res = requests.get(ho_detail_url, headers=self.headers)
                res = res.content.decode('gb2312')
                ho.ho_floor = re.findall('楼层.*?">(.*?)</td>', res,
                                         re.S | re.M)[0].strip()

                ho.insert_db()
            except:
                print('房号错误,co_index={},url={}'.format(co_index, url), e)
                continue
예제 #6
0
 def house_parse(self, ho_url, co_id, bu_id):
     house_url = "http://61.143.241.154/" + ho_url
     ho_res = requests.get(house_url, headers=headers)
     html = etree.HTML(ho_res.content.decode('gbk'))
     detail_list = html.xpath("//td[@height='80']/a/@href")
     for detail in detail_list:
         try:
             detail_url = 'http://61.143.241.154/' + detail
             res = requests.get(detail_url, headers=headers)
             con = res.content.decode('gbk')
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('房屋号.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_true_size = re.search('套内面积.*?">(.*?)</td', con,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search('建筑面积.*?">(.*?)</td', con,
                                          re.S | re.M).group(1)
             ho.orientation = re.search('房屋朝向.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
             ho.ho_type = re.search('用途.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_price = re.search('申报总价.*?">(.*?)</td', con,
                                     re.S | re.M).group(1)
             ho.insert_db()
         except Exception as e:
             log.error("{}房屋请求解析失败{}".format(detail, e))
예제 #7
0
    def get_build_info(self, url, response,co_id, bu_id):
        house = House(co_index)
        json_html = json.loads(response.text)
        for i in json_html:
                ho_name = i['roomno']  # 房号
                ho_type = i['ghyt']  # 用途
                ho_true_size = i['tnmj']  # 预测套内面积
                ho_floor = i['floorindex']  # 楼层
                ho_build_size = i['jzmj']  # 建筑面积
                house.co_id = co_id
                house.bu_id = bu_id
                house_code = i["fwcode"]
                house.ho_name = ho_name
                house.ho_type = ho_type
                house.ho_true_size = ho_true_size
                house.ho_floor = ho_floor
                house.ho_build_size = ho_build_size

                house_detail_url = "http://fsfc.fsjw.gov.cn/hpms_project/roomview.jhtml?id="+str(house_code)
                try:
                    res = requests.get(house_detail_url,headers=self.headers)
                    house.ho_share_size = re.search('实测分摊面积.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                    house.ho_price = re.search('总价.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                except Exception as e:
                    print("co_index={},房屋详情页{}请求失败!".format(co_index,house_detail_url))
                    print(e)
                    continue

                house.insert_db()
예제 #8
0
    def get_house_info(self, co_id, bu_id, id):

        house_list_url = "http://xx.yyfdcw.com/hetong/fdc_xxdxx.asp?id=" + str(
            id)
        res = requests.get(house_list_url, headers=self.headers)
        con = res.content.decode('gbk')
        house_list = re.findall("onClick=.*?open\('(.*?)',", con, re.S | re.M)
        for house_ in house_list:
            try:
                house_url = "http://xx.yyfdcw.com/hetong/" + house_
            except Exception as e:
                print("co_index={},房屋信息错误".format(co_index), e)
                continue
            ho_res = requests.get(house_url, headers=self.headers)
            ho_con = ho_res.content.decode('gbk')

            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = re.search('室号.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)
            ho.ho_floor = re.search('实际层.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?fafa>(.*?)</TD', ho_con,
                                        re.S | re.M).group(1)
            ho.ho_share_size = re.search('分摊面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_price = re.search('价格.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_type = re.search('用途.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)

            ho.insert_db()
예제 #9
0
    def house_parse(self, bu_id, co_id):  # 房屋信息解析
        ho = House(co_index)
        house_url = "http://ys.tyfdc.gov.cn/Firsthand/tyfc/publish/probld/NBView.do?"
        formdata = {"nid": bu_id, "projectid": co_id}
        try:
            res = requests.post(house_url, data=formdata, headers=self.headers)
        except Exception as e:
            print("co_index={},房屋详情页无法访问".format(co_index), e)
        con = res.text

        ho_name = re.findall('\'\);">(.*?)&nbsp;', con, re.S | re.M)
        ho_build_size = re.findall('<span.*?建筑面积:(.*?)㎡', con, re.S | re.M)
        ho_true_size = re.findall('<span.*?套内面积:(.*?)分', con, re.S | re.M)
        ho_share_size = re.findall('<span.*?分摊面积:(.*?)㎡', con, re.S | re.M)
        ho_type = re.findall('<span.*?用途:(.*?)房', con, re.S | re.M)
        ho_price = re.findall('<span.*?单价:(.*?)"', con, re.S | re.M)
        ho_id = re.findall("getHouseBaseInfo\('(.*?)'\)", con, re.S | re.M)
        for index in range(0, len(ho_id)):
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = ho_name[index]
            ho.ho_build_size = ho_build_size[index]
            ho.ho_type = ho_type[index]
            ho.ho_share_size = ho_share_size[index]
            ho.ho_price = ho_price[index]
            ho.ho_true_size = ho_true_size[index]
            ho.ho_num = ho_id[index]
            ho.insert_db()
예제 #10
0
    def house_info(self, co_id, bu_id, house_url_list):
        for house_ in house_url_list:
            house_url = "http://www.njhouse.com.cn/2016/spf/" + house_
            try:
                # ho_res = requests.get(house_url,headers=self.headers)
                ho_pro = Proxy_contact(app_name="nanjing",
                                       method='get',
                                       url=house_url,
                                       headers=self.headers)
                ho_con = ho_pro.contact()
                ho_con = ho_con.decode('gbk')

                # ho_con = ho_res.content.decode('gbk')
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = re.search('房号.*?;">(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
                ho.ho_price = re.search('价格.*?<td>(.*?)元', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_floor = re.search('楼层.*?;">(.*?)</td', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_build_size = re.search('建筑面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('套内面积.*?<td>(.*?)m', ho_con,
                                            re.S | re.M).group(1)
                ho.ho_share_size = re.search('分摊面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_type = re.search('房屋类型.*?<td>(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
            except Exception as e:
                log.error("房屋详情页错误{}".format(e))
                continue

            ho.insert_db()
예제 #11
0
 def get_house_info(self, build_num, sid):
     try:
         house_url = 'http://www.tmsf.com/newhouse/NewPropertyHz_showbox.jspx?buildingid=' + build_num + '&sid=' + sid
         house = House(co_index)
         house.bu_id = 'buildingid":(.*?),'
         house.co_build_size = 'builtuparea":(.*?),'
         house.ho_price = 'declarationofroughprice":(.*?),'
         house.ho_name = 'houseno":(.*?),'
         house.ho_true_size = 'setinsidefloorarea":(.*?),'
         house.ho_share_size = 'poolconstructionarea":(.*?),'
         house.ho_type = 'houseusage":(.*?),'
         p_2 = ProducerListUrl(page_url=house_url,
                               request_type='get',
                               encode='utf-8',
                               analyzer_rules_dict=house.to_dict(),
                               analyzer_type='regex',
                               headers=self.headers)
         p_2.get_details()
     except Exception as e:
         print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
예제 #12
0
    def get_house_info(self, bu_id):
        house_url = 'http://www.ytfcjy.com/Common/Agents/ExeFunCommon.aspx'

        payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
                  bu_id + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>80</item>\r\n<item>720</item>\r\n<item>g_oBuildTable</item>\r\n<item> 1=1</item>\r\n</param>\r\n"
        headers = {
            'Content-Type': "text/xml",
        }

        response = requests.request("POST", house_url, data=payload, headers=headers)
        html = response.text
        house_info_list = re.findall("title='(.*?)'", html, re.S | re.M)
        for i in house_info_list:
            house = House(co_index)
            house.ho_name = re.search('房号:(.*?)单元', i, re.S | re.M).group(1)
            house.ho_build_size = re.search('总面积:(.*?) 平方米', i, re.S | re.M).group(1)
            house.ho_type =  re.search('用途:(.*?)户', i, re.S | re.M).group(1)
            house.ho_price =  re.search('价格:(.*?) 元', i, re.S | re.M).group(1)
            house.bu_id = bu_id
            house.info = i
            house.insert_db()
예제 #13
0
    def ho_info(self, bu_url_list, co_id):
        for bu_url in bu_url_list:
            try:
                res = requests.get(bu_url, headers=self.headers)
                html = etree.HTML(res.text)
                house_info_list = html.xpath("//li[@class='tjCor4']")
                for house_info in house_info_list:
                    house = house_info.xpath("./@title")[0]
                    ho = House(co_index)
                    ho.co_id = co_id
                    ho.bu_id = re.search('dbh=(\d+)', bu_url).group(1)
                    ho.ho_name = re.search('房号:(.*?)<br', house).group(1)
                    ho.ho_room_type = re.search('户型:(.*?)<br', house).group(1)
                    ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                                 house).group(1)
                    ho.ho_price = re.search('单价:(.*?)元', house).group(1)
                    ho.ho_type = re.search('用途:(.*?)<br', house).group(1)

                    ho.insert_db()
            except Exception as e:
                log.error('房号信息错误', e)
예제 #14
0
    def get_house_info(self, house_url_list):
        for url in house_url_list:
            response = requests.get(url)

            html = etree.HTML(response.text)
            con = html.xpath("//tr[@align='center']")
            for i in con:
                try:
                    house = House(co_index)
                    # house.ho_num = 'NHOUSENO">(.*?)<'
                    house.ho_name = i.xpath("./td/text()")[1]
                    house.ho_floor = i.xpath("./td/text()")[0]
                    house.ho_build_size = i.xpath("./td/text()")[3]
                    house.ho_true_size = i.xpath("./td/text()")[4]
                    house.ho_share_size = i.xpath("./td/text()")[5]
                    house.ho_room_type = i.xpath("./td/text()")[2]
                    house.ho_price = i.xpath("./td/text()")[-1]
                    house.orientation = i.xpath("./td/text()")[-2]
                    house.bu_id = re.search('ID=(\d+)',url).group(1)
                    house.insert_db()
                except Exception as e:
                    print('房号错误,co_index={},url={}'.format(co_index, url), e)
예제 #15
0
 def ho_parse(self, co_id, bu_id, ho_list):
     for ho in ho_list:
         ho_url = ho.xpath("./@href")[0]
         house_url = "http://110.89.45.7:8082" + ho_url
         # while True:
         #     try:
         #         proxy = self.proxies[random.randint(0,9)]
         try:
             ho_res = requests.get(
                 house_url,
                 headers=self.headers,
             )
         except:
             continue
             #     break
             # except:
             #     continue
         con = ho_res.text
         house = House(co_index)
         house.co_id = co_id
         house.bu_id = bu_id
         house.ho_name = re.search('房  号.*?<td>(.*?)</td', con,
                                   re.S | re.M).group(1)
         house.ho_build_size = re.search('建筑面积.*?<td>(.*?)</td', con,
                                         re.S | re.M).group(1)
         house.ho_true_size = re.search('套内面积.*?<td>(.*?)</td', con,
                                        re.S | re.M).group(1)
         house.ho_share_size = re.search('分摊面积.*?<td>(.*?)</td', con,
                                         re.S | re.M).group(1)
         house.ho_floor = re.search('所 在 层.*?<td>(.*?)</td', con,
                                    re.S | re.M).group(1)
         house.ho_price = re.search('申报单价.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
         house.ho_type = re.search('房屋用途.*?<td>(.*?)</td', con,
                                   re.S | re.M).group(1)
         house.insert_db()
         time.sleep(random.randint(0, 3))
예제 #16
0
 def house_info(self,co_id,bu_id,dong_url):
     url = self.start_url + "/" +dong_url
     res = requests.get(url,headers=self.headers)
     res.encoding = 'gbk'
     con = res.text
     house_list = re.findall('房屋号.*?<a href="(.*?)"',con,re.S|re.M)
     for house in house_list:
         house_url = self.start_url + "/" + house
         # while True:
         #     try:
         #         proxy = self.proxies[random.randint(0,9)]
         #         ho_res = requests.get(house_url,headers=self.headers,proxies=proxy)
         #         if ho_res.status_code == 200:
         #             break
         #     except:
         #         continue
         # ho_res.encoding = 'gbk'
         # ho_con = ho_res.text
         connect = Proxy_contact(app_name='maoming',method='get',url=house_url,headers=self.headers)
         content = connect.contact()
         if content is False:
             continue
         ho_con = content.decode('gbk')
         try:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('房屋号.*?">(.*?)</',ho_con,re.S|re.M).group(1)
             ho.ho_true_size = re.search('套内面积.*?">(.*?)m',ho_con,re.S|re.M).group(1)
             ho.ho_build_size = re.search('建筑面积.*?">(.*?)m',ho_con,re.S|re.M).group(1)
             ho.ho_type = re.search('房屋用途.*?">(.*?)<',ho_con,re.S|re.M).group(1)
             ho.ho_price = re.search('申报总价.*?">(.*?)<',ho_con,re.S|re.M).group(1)
             ho.orientation = re.search('朝向.*?">(.*?)<',ho_con,re.S|re.M).group(1)
             ho.insert_db()
         except Exception as e:
             print("房屋解析失败",e)
예제 #17
0
    def comm_crawler(self, comm_url, co_develops, co_pre_sale, co_name,
                     co_pre_sale_date):
        ho = House(co_index)
        comm_res = requests.get(comm_url, headers=self.headers)
        comm_html = etree.HTML(comm_res.text)
        value = comm_html.xpath("//input[@id='propertyid']/@value")[0]
        sid = comm_html.xpath("//input[@id='sid']/@value")[0]
        # detail_url = "http://hu.tmsf.com/newhouse/property_"+str(sid)+"_"+str(value)+"_price.htm"

        bu = Building(co_index)
        bu_num = comm_html.xpath("//div[@id='building_dd']//a")[1:]
        # bu_info,bu_num_list = self.build(comm_html,value)
        self.comm_info(co_develops, co_pre_sale, co_name, co_pre_sale_date,
                       value)
        # page_html = requests.get(detail_url,headers=self.headers)
        for bu_ in bu_num:
            bu.bu_num = bu_.xpath("./text()")[0]
            bu_id = bu_.xpath("./@id")[0]
            bu.bu_id = re.search('\d+', bu_id).group(0)
            bu.co_id = value
            bu.insert_db()
            detail_url = "http://hu.tmsf.com/newhouse/property_" + str(
                sid) + "_" + str(value) + "_price.htm?buildingid=" + str(
                    bu.bu_id)
            page_html = requests.get(detail_url, headers=self.headers)

            page = re.search('页数 \d+/(\d+)', page_html.text).group(1)
            for i in range(1, int(page) + 1):
                detail_url = detail_url + "?page=" + str(i)

                detail_res = requests.get(detail_url, headers=self.headers)
                house_html = etree.HTML(detail_res.text)
                house_url_list = house_html.xpath("//td[@width='100']/a/@href")
                house_bu_num = house_html.xpath("//td[@width='100']/a/text()")
                house_name = house_html.xpath(
                    "//td[@width='101'][1]/a/div/text()")

                for index in range(1, len(house_url_list) + 1):
                    try:
                        ho.bu_num = house_bu_num[index]  # 楼号 栋号
                        house_url = "http://hu.tmsf.com" + house_url_list[index]
                        house_res = requests.get(house_url,
                                                 headers=self.headers)
                        house_html = house_res.text
                        ho.bu_id = bu.bu_id
                        ho.co_id = re.search('楼盘主页.*?_\d+_(\d+)_info',
                                             house_html).group(1)  # 小区id
                        ho.ho_name = house_name[index]  # 房号:3单元403
                        # ho.ho_num =  re.search('_(\d+).htm',house_url).group(1) # 房号id

                        ho.ho_type = re.search('房屋用途:.*?>(.*?)<',
                                               house_html).group(
                                                   1)  # 房屋类型:普通住宅 / 车库仓库
                        ho.ho_floor = re.search('第(.*?)层', house_html).group(1)

                        build_text = re.search('建筑面积:(.*?)平方米',
                                               house_html).group(1)
                        build_num = re.findall('class="(.*?)"', build_text)
                        ho.ho_build_size = self.number(build_num)  # 建筑面积

                        size_text = re.search('套内面积:(.*?)平方米',
                                              house_html).group(1)
                        size_num = re.findall('class="(.*?)"', size_text)
                        ho.ho_true_size = self.number(size_num)  # 预测套内面积,实际面积

                        price_text = re.search('总  价:(.*?)万元',
                                               house_html).group(1)  # 价格
                        price_num = re.findall('class="(.*?)"', price_text)
                        ho.ho_price = self.number(price_num)

                        ho.insert_db()
                    except:
                        continue