def get_house_info(self, bu_id, co_id):
     house_url = 'http://b.fang99.com/buildinglistselect.aspx?buildingid=' + co_id + '&xmbh=&lzbh=' + bu_id
     response = self.request_proxy(house_url, headers=self.headers)
     html = response.content.decode('gbk')
     house_html = re.search('rpt_ewlpblc_fjlistdiv_0.*?erp_con_2', html,
                            re.S | re.M).group()
     house_info_list = re.findall('<span.*?</span>', house_html,
                                  re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_room_type = re.search('title="(.*?),', i,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('title=".*?,(.*?)"', i,
                                             re.S | re.M).group(1)
             if '<a' in i:
                 house.ho_name = re.search('<a.*?>(.*?)<', i,
                                           re.S | re.M).group(1)
             else:
                 house.ho_name = re.search('<span.*?>(.*?)<', i,
                                           re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemple #2
0
    def get_house_detail(self, house_url_list):
        for i in house_url_list:
            res = requests.get(i)
            html = res.content.decode('gbk')
            bu_name = re.search('楼号:.*?HouseNum">(.*?)</span>', html,
                                re.S | re.M).group(1)
            co_name = re.search('项目名称.*?PrjName">(.*?)</span>', html,
                                re.S | re.M).group(1)
            ho_id = re.findall("aspx\?Room=(.*?)'.*?<b>(.*?)</b>", html,
                               re.S | re.M)
            # 房号和房号id对应的字段
            ho_id_dict = {}
            for k in ho_id:
                ho_id_dict[k[0]] = k[1]

            house_info = re.findall(
                "<Room><Cell RoomID='(.*?)'.*?BArea='(.*?)'.*?HouseUse='(.*?)'.*?</Room>",
                html, re.S | re.M)
            for j in house_info:
                try:
                    h = House(self.co_index)
                    h.ho_name = ho_id_dict[j[0]]
                    h.ho_true_size = j[1]
                    h.ho_type = j[2]
                    h.co_name = co_name
                    h.bu_num = bu_name
                    h.insert_db()
                except Exception as e:
                    print('房屋错误,co_index={},url={}'.format(co_index, i), e)
                    continue
Exemple #3
0
    def get_house_info(self,ho_con=None,headers=None,bu_id=None,url=None):

        if ho_con == None:
            res = requests.get(url, headers=headers)

            con = res.content.decode('gbk')
            html = etree.HTML(con)

        else:
            html = etree.HTML(ho_con)

        ho_url_list = html.xpath("//td[@width='120']/a/@href")

        for ho_url in ho_url_list:
            ho_detail = 'http://www.qyfgj.cn/newys/'+ho_url
            res = requests.get(ho_detail,headers=headers)
            con = res.content.decode('gbk')
            ho = House(co_index)

            ho.bu_id = bu_id
            ho.ho_num = re.search('房屋号.*?">(.*?)</td',con,re.S|re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?">(.*?)m',con,re.S|re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?">(.*?)m',con,re.S|re.M).group(1)
            ho.ho_type = re.search('房屋用途.*?">(.*?)</td',con,re.S|re.M).group(1)

            ho.insert_db()
    def get_house_detail(self, house_url_list):
        print(house_url_list)
        for i in house_url_list:
            try:
                response = requests.get(i, headers=self.headers)
                html = response.text
                house_html = re.search('id=.roomTable.*?id="remarkDiv"', html,
                                       re.S | re.M).group()
                house_info_list = re.findall('<td class=.*?title.*?</td>',
                                             house_html, re.S | re.M)
                bu_id = re.search('roomTable.aspx\?id=(.*?)&', html,
                                  re.S | re.M).group(1)
                for i in house_info_list:
                    house = House(co_index)
                    house.bu_id = bu_id
                    house.ho_build_size = re.search('建筑面积:(.*?) ', i,
                                                    re.S | re.M).group(1)
                    house.info = re.search("(建筑面积:.*?)'>", i,
                                           re.S | re.M).group(1)
                    house.ho_name = re.search("<td.*?>(.*?)</td>", i,
                                              re.S | re.M).group(1)
                    if 'id' in house.ho_name:
                        house.ho_name = re.search('<a.*?>(.*?)</a>',
                                                  house.ho_name,
                                                  re.S | re.M).group(1)
                    house.insert_db()

            except Exception as e:
                print('房号错误,co_index={},url={}'.format(co_index, i), e)
        print('房号放入完成')
    def house_crawler(self, house_url, bu_num, co_id, bu_id):
        ho = House(co_index, bu_num=bu_num, co_id=co_id, bu_id=bu_id)

        url = self.url + house_url
        con = requests.get(url, headers=self.headers)
        tr = con.text
        ho_name = re.findall('室号:(.*?)户', tr, re.S | re.M)  # 房号:3单元403
        # ho_num = re.findall('_td(\d+)"', tr)  # 房号id
        ho_floor = re.findall('(\d+)层', tr)  # 楼层
        ho_type = re.findall('房屋属性:(.*?)"', tr, re.S | re.M)  # 房屋类型:普通住宅 / 车库仓库
        ho_room_type = re.findall('户型:(.*?)所', tr, re.S | re.M)  # 户型
        ho_build_size = re.findall('建筑面积:(.*?)房', tr, re.S | re.M)  # 建筑面积

        for floor in ho_floor:
            try:
                ho.ho_floor = floor
                for index in range(1, len(ho_name) + 1):
                    ho.ho_name = ho_name[index]
                    ho.ho_type = ho_type[index]
                    ho.ho_room_type = ho_room_type[index]
                    ho.ho_build_size = ho_build_size[index]
                    # ho.ho_num = ho_num[index]

                    ho.insert_db()
            except:
                continue
    def get_build_info(self, url, co_id):
        try:
            building = Building(co_index)
            response = requests.get(url)
            html = response.text
            tree = etree.HTML(html)
            co_name = tree.xpath('//*[@id="PageB_Location"]/text()')[0]  # 小区名字
            print(co_name)
            bu_name = tree.xpath('//*[@id="ItemName"]/text()')[0]  # 楼栋名称
            bu_num = tree.xpath('//*[@id="PageB_HouseNo"]/text()')[0]  # 楼号 栋号
            bu_all_house = tree.xpath('//*[@id="lb_countbulidtaoshu"]/text()')[
                0]  # 总套数
            bu_floor = tree.xpath('//*[@id="cell3-1"]/text()')
            bu_floor = self.is_none(bu_floor)  # 楼层
            bu_build_size = tree.xpath('//*[@id="lb_countbulidarea"]/text()')[
                0]  # 建筑面积
            bu_live_size = tree.xpath('//*[@id="lb_buildarea"]/text()')[
                0]  # 住宅面积
            bu_price = tree.xpath('//*[@id="lb_buildavg"]/text()')
            bu_price = self.is_none(bu_price)  # 住宅价格
            bu_id = re.search('\?(\d+)$', url).group(1)  # 楼栋id
            building.co_id = co_id
            building.bu_name = bu_name
            building.bu_num = bu_num
            building.bu_all_house = bu_all_house
            building.bu_floor = bu_floor
            building.bu_build_size = bu_build_size
            building.bu_live_size = bu_live_size
            building.bu_price = bu_price
            building.bu_id = bu_id
            building.insert_db()
            house_info_html = re.findall('<tr id="row3">(.*)$', html,
                                         re.S | re.M)[0]
            for i in re.findall('(<td.*?>.*?</td>)', house_info_html,
                                re.S | re.M):
                if '<br>' not in i:
                    continue
                ho_name_list = re.findall('<td.*?>(.*?)<br>', i, re.S | re.M)
                ho_true_size_list = re.findall('<td.*?>.*?<br>(.*?)<br>', i,
                                               re.S | re.M)
                ho_type = re.findall('<td.*?>.*?<br>.*?<br>(.*?)<br>', i,
                                     re.S | re.M)[0]
                for i in range(len(ho_name_list)):
                    try:
                        if 'font' in ho_name_list[i]:
                            ho_name = re.sub('<font.*?>', '', ho_name_list[i])
                        else:
                            ho_name = ho_name_list[i]
                        house = House(8)
                        house.ho_name = ho_name
                        house.ho_true_size = ho_true_size_list[i]
                        house.co_id = co_id
                        house.bu_id = bu_id
                        house.ho_type = ho_type
                        house.insert_db()

                    except Exception as e:
                        print(e)
        except BaseException as e:
            print(e)
Exemple #7
0
 def get_house_info(self, zu_house_url, bu_num, co_id):
     try:
         house = House(co_index)
         house.bu_num = bu_num
         house.co_id = co_id
         result = self.s.get(zu_house_url, headers=self.headers).text
         house.info = re.search('ItemName.*?>(.*?)<', result).group(1).strip()
         ho_code_list = re.findall("OnClick=.__doPostBack\(.*?,'(.*?)'\)", result, re.S | re.M)
         ho_msg_list = re.findall("OnClick=.__doPostBack\('(.*?)'", result, re.S | re.M)
         self.get_house_detail(zu_house_url, ho_msg_list, ho_code_list, house)
     except Exception as e:
         print(e)
    def comm(self, id):
        bu = Building(co_index)

        house_url = self.start_url + "/api/buildInfos/getHouseInfosByPannelNumber?pannelNumber=" + str(
            id)
        comm_url = self.start_url + "/api/buildInfos/getHomePageBuildingInfo?blockNumber=" + str(
            id)
        comm_detail_url = self.start_url + "/api/buildInfos/getDetailsBuildingInfo?blockNumber=" + str(
            id)

        comm_res = requests.get(comm_url)
        comm_detail_res = requests.get(comm_detail_url)
        house_res = requests.get(house_url)
        comm_dict = json.loads(comm_res.text)
        comm_detail_dict = json.loads(comm_detail_res.text)
        house_dict = json.loads(house_res.text)

        bu.bu_id = id
        bu.bu_num = comm_dict["data"]["nameBuildings"]
        bu.area = comm_detail_dict['data']['houseingArea']
        bu.bu_address = comm_dict["data"]["houseaddress"]
        bu.bu_pre_sale = comm_detail_dict["data"]["yszh"]
        bu.bu_type = comm_dict["data"]["propertycategory"]
        bu.bo_develops = comm_dict["data"]["companyName"]

        bu.insert_db()

        house_num = house_dict["data"]
        for hu in house_num:
            ho = House(co_index)
            h = hu["data"]
            if len(h) > 0:
                for i in h:
                    try:
                        room_id = i["houseNumber"]
                        room_url = self.start_url + "/api/buildInfos/getHouseInfoByHouseNumber?houseNumber=" + str(
                            room_id)
                        res = requests.get(room_url, headers=self.headers)
                        dict = json.loads(res.text)
                        ho.bu_id = id
                        # ho.ho_num = room_id
                        ho.ho_name = dict["data"]["houseNo"]
                        ho.ho_build_size = dict["data"]["buildArea"]
                        ho.ho_true_size = dict["data"]["jacketArea"]
                        ho.ho_share_size = dict["data"]["apportionedArea"]
                        ho.ho_floor = dict["data"]["nominalLevel"]
                        ho.insert_db()
                    except Exception as e:
                        print(e)
            else:
                continue
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             build_url = 'http://www.jmfc.com.cn' + i
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             bu_id = re.search('lzbm=(.*?)&', build_url).group(1)
             ho_name_list = re.findall('width="35%".*?房号:.*?<TD.*?>(.*?)<',
                                       html, re.S | re.M)
             ho_true_size_list = re.findall(
                 'width="35%".*?房号:.*?<TD.*?<TD.*?<TD.*?>(.*?)<', html,
                 re.S | re.M)
             ho_type_list = re.findall(
                 'width="35%".*?房号:.*?<font.*?<TD.*?<TD.*?>(.*?)<', html,
                 re.S | re.M)
             for i in range(0, len(ho_name_list)):
                 try:
                     house = House(co_index)
                     house.ho_name = ho_name_list[i].strip()
                     house.ho_true_size = ho_true_size_list[i].strip()
                     house.ho_type = ho_type_list[i].strip()
                     house.bu_id = bu_id
                     house.insert_db()
                 except Exception as e:
                     print(e)
         except Exception as e:
             print(e)
Exemple #10
0
 def build_info(self,build_detail,co_id):
     build_detail_url = 'http://as.gzfcxx.cn' + build_detail
     res = requests.get(build_detail_url,headers=self.headers)
     html = etree.HTML(res.text)
     build_info_list = html.xpath("//div[@class='box']//font/a/@href")
     for build_url in build_info_list:
         try:
             url = 'http://as.gzfcxx.cn'+build_url
             ho_res = requests.get(url,headers=self.headers)
             ho_html = etree.HTML(ho_res.text)
             bu = Building(co_index)
             bu.co_id = co_id
             bu.bu_id = re.search('dongID=(\d+)',build_url).group(1)
             bu.bu_num = ho_html.xpath("//option[@selected='selected']/text()")[0]
             bu.insert_db()
             temp  = re.search("\?(.*?dongID=\d+)", build_url).group(1)
             real_url = 'http://as.gzfcxx.cn/Controls/HouseControls/FloorView.aspx?' + temp
             house_res = requests.get(real_url,headers=self.headers)
             ho_html = etree.HTML(house_res.text)
             info = ho_html.xpath("//table[@class='C1 T0 F0']/..")
         except Exception as e:
             log.error('楼栋信息错误',e)
             continue
         for i in info:
             try:
                 ho = House(co_index)
                 ho_info = i.xpath("./@title")[0]
                 ho.ho_build_size = re.search('(\d+).(\d+)',ho_info,re.S|re.M).group(1)
                 ho.ho_name = i.xpath(".//span/text()")[0]
                 ho.bu_id = bu.bu_id
                 ho.co_id = co_id
                 ho.insert_db()
             except Exception as e:
                 log.error('房间信息错误',e)
Exemple #11
0
 def get_house_info(self, form_data_list):
     for data in form_data_list:
         house_url = 'http://www.gafdc.cn/newhouse/GetBuildTableByAjax.ashx'
         try:
             response = requests.post(url=house_url,
                                      data=data,
                                      headers=self.headers)
             html = response.text
             ho_info_html = re.findall("<td width='95'.*?</td>", html,
                                       re.S | re.M)
             bu_id_html = re.search("^.*?overflow-x:auto;", html,
                                    re.S | re.M).group()
             bu_id = re.findall("GetData\('.*?','(.*?)'\)", bu_id_html,
                                re.S | re.M)[-1]
             for i in ho_info_html:
                 try:
                     h = House(co_index)
                     h.bu_id = bu_id
                     h.ho_name = re.search('<td.*?>(.*?)<', i,
                                           re.S | re.M).group(1)
                     h.ho_type = re.search('物业类别:(.*?) ', i,
                                           re.S | re.M).group(1)
                     h.ho_build_size = re.search('建筑面积:(.*?) ',
                                                 html).group(1)
                     h.insert_db()
                 except Exception as e:
                     print(
                         '房屋报错,co_index={},url={}'.format(
                             co_index, house_url), e)
         except Exception as e:
             print('房屋报错,co_index={},url={}'.format(co_index, house_url), e)
 def ho_info(self, ho_list, co_id, bu_id):
     for hou in ho_list:
         try:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = hou.xpath("./text()")[0]
             ho_info = hou.xpath("./@title")[0]
             ho.ho_build_size = re.search('建筑面积:(.*?)\n', ho_info).group(1)
             ho.ho_share_size = re.search('分摊面积:(.*)', ho_info).group(1)
             ho.ho_true_size = re.search('套内面积:(.*?)\n', ho_info).group(1)
             ho.insert_db()
         except Exception as e:
             # log.error("房屋信息错误{}".format(e))
             print("房屋信息错误{}".format(e))
 def get_data_obj(self, analyzer, co_index):
     if analyzer == 'comm':
         return Comm(co_index)
     elif analyzer == 'build':
         return Building(co_index)
     elif analyzer == 'house':
         return House(co_index)
Exemple #14
0
    def get_build_detail(self, building_url, building, co_id):
        try:
            res = requests.get(url=building_url, headers=self.headers)
            html = res.content.decode('gb2312', 'ignore').replace(
                '\n', '').replace('\r', '').replace('\t', '').replace(' ', '')
            bu_id = building_url.split('=')[1].split('&')[0]  # 楼栋id
            bu_name = re.search(
                r'项目名称:</td><tdwidth="1"rowspan="6"background="images/trbg3.gif"></td><tdwidth="200"align="left"class="padingleft3px">(.*)?</td><tdwidth="1"rowspan="6"align="right"bgcolor="#CECFCE"></td><tdwidth="2"rowspan="6"align="right"bgcolor="#FFFFFF">',
                html).group(1)  # 楼栋名称
            bu_num = re.search(
                r'号:</td><tdwidth="1"rowspan="6"background="images/trbg3.gif"></td><tdalign="left"class="padingleft3px">(.*)?</td></tr><tr><tdheight="25"align="right">总&nbsp;套&nbsp;数',
                html).group(1)  # 栋号
            # print(bu_num)
            bu_all_house = re.search(
                r'总&nbsp;套&nbsp;数:</td><tdalign="left"class="padingleft3px">(.*?)</td><tdalign="right">可售套数',
                html).group(1)  # 总套数
            bu_floor = re.search(
                r'总层数:</td><tdalign="left"class="padingleft3px">(.*)?</td><tdalign="right">项目类型',
                html).group(1)  # 总层数
            bu_build_size = re.search(
                r'建筑面积:</td><tdalign="left"class="padingleft3px"><FONTcolor=#ff0000>(.*)?M&sup2;</FONT></td><tdalign="right">住宅面积',
                html).group(1)  # 建筑面积
            bu_live_size = re.search(
                r'住宅面积:</td><tdalign="left"class="padingleft3px">(.*)?M&sup2;</td></tr><tr><tdheight="25"align="right">幢套内建筑面积',
                html).group(1)  # 住宅面积
            bu_not_live_size = re.search(
                r'非住宅面积:</td><tdalign="left"class="padingleft3px">(.*)?M&sup2;</td></tr><tr><tdheight="25"align="right">预',
                html).group(1)  # 非住宅面积
            bu_price = re.search(
                r'拟销住宅价格:</td><tdbackground="images/trbg3.gif"></td><tdalign="left"class="padingleft3px">(.*)?</td><tdalign="right"bgcolor="#CECFCE"></td><tdalign="right"bgcolor="#FFFFFF"></td><tdalign="right"bgcolor="#CECFCE"></td><tdalign="right">拟销商业门面价格',
                html).group(1).split('元')[0]  # 住宅价格

            bu_type = re.search('项目类型:</td>.*?ft3px">(.*?)</td>', html,
                                re.S | re.M).group(1)
            building.co_id = co_id
            building.bu_id = bu_id
            building.bu_name = bu_name
            building.bu_num = bu_num
            building.bu_all_house = bu_all_house
            building.bu_floor = bu_floor
            building.bu_build_size = bu_build_size
            building.bu_live_size = bu_live_size
            building.bu_not_live_size = bu_not_live_size
            building.bu_price = bu_price
            building.bu_type = bu_type
            # 获取房号超链接
            house_url_list = re.findall(r"window.open\('(.+?)'\)", html)
            for i in house_url_list:
                house_url = 'http://www.bsfcj.com/PubInfo/' + i
                house = House(1)
                house_obj = self.get_house_detail(house_url, house, co_id,
                                                  bu_id)
                house_obj.insert_db()
            return building
        except Exception as e:
            print(
                '楼栋解析或者请求的过程中出现错误,co_index={},url={}'.format(
                    self.co_index, building_url), e)
 def get_house_info(self, house_url, bu_id):
     response = requests.get('http://thfdc.net/' + house_url, headers=self.headers)
     html = response.text
     house_info_list = re.findall('<tr onClick=.*?</tr>', html, re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_name = re.search('<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
             house.area = re.search('<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
             house.ho_build_size = re.search('<td.*?<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
             house.ho_type = re.search('<td.*?<td.*?<td.*?<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
             house.bu_id = bu_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format())
 def ho_parse(self, co_id, bu_id, res):
     ho_html = etree.HTML(res.text)
     house_list = ho_html.xpath("//td[@title]")
     for house in house_list:
         house_info = house.xpath("./@title")[0]
         house_name = house.xpath(".//span/text()")[0]
         ho = House(co_index)
         ho.co_id = co_id
         ho.bu_id = bu_id
         ho.ho_name = house_name
         try:
             ho.ho_build_size = re.search('建筑面积:(.*?)\r',
                                          house_info).group(1)
             ho.ho_true_size = re.search('套内面积:(.*?)\r',
                                         house_info).group(1)
             ho.ho_share_size = re.search('分摊面积:(.*)', house_info).group(1)
         except Exception as e:
             log.info("房间无面积")
         ho.insert_db()
    def house_info(self,co_id,bu_id,bu_url):

        ho_url = 'http://www.bdfdc.net' + bu_url
        res = requests.get(ho_url,headers=self.headers)
        time.sleep(5)
        html = etree.HTML(res.text)
        house_info_list = html.xpath("//a[@wf]")
        for house_info in house_info_list:
            ho = House(co_index)
            detail = house_info.xpath("./@wf")[0]
            ho.ho_name = house_info.xpath("./text()")[0]
            ho.bu_id = bu_id
            ho.co_id = co_id
            ho.ho_build_size = re.search('建筑面积:(.*?)m',detail).group(1)
            ho.ho_type = re.search('用途:(.*?)<br',detail).group(1)
            ho.insert_db()
 def get_house_info(self, con, co_id, build_id):
     html_str = re.search('houseTableData.*?特别申明', con, re.S | re.M).group()
     for info in re.findall('<div style.*?</div>', html_str, re.S | re.M):
         try:
             ho = House(co_index)
             ho.ho_name = re.search("'HC_HOUSENUMB':'(.*?)',", info,
                                    re.S | re.M).group(1)
             ho.ho_room_type = re.search("'HC_HOUSETYPE':'(.*?)',", info,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search("'HC_STCTAREA':'(.*?)',", info,
                                          re.S | re.M).group(1)
             ho.bu_id = build_id
             ho.co_id = co_id
             ho.insert_db()
         except Exception as e:
             print('house error, co_index={}'.format(co_index))
Exemple #19
0
 def get_house_info(self, house_url, ho_name, bu_id, co_id):
     house = House(co_index)
     url = 'http://www.bjjs.gov.cn' + house_url
     if '#' not in url:
         house = self.get_house_detail(url, house)
     house.ho_name = ho_name
     house.bu_id = bu_id
     house.co_id = co_id
     house.insert_db()
Exemple #20
0
    def house_parse(self, co_id, bu_id, bu_con):

        name_list = re.findall('<a style.*?\)>(.*?)</a', bu_con)
        for name in name_list:
            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = name
            ho.insert_db()
Exemple #21
0
 def get_house_info(self, build_num, sid):
     try:
         house_url = 'http://www.tmsf.com/newhouse/NewPropertyHz_showbox.jspx?buildingid=' + build_num + '&sid=' + sid
         house = House(co_index)
         house.bu_id = 'buildingid":(.*?),'
         house.co_build_size = 'builtuparea":(.*?),'
         house.ho_price = 'declarationofroughprice":(.*?),'
         house.ho_name = 'houseno":(.*?),'
         house.ho_true_size = 'setinsidefloorarea":(.*?),'
         house.ho_share_size = 'poolconstructionarea":(.*?),'
         house.ho_type = 'houseusage":(.*?),'
         p_2 = ProducerListUrl(page_url=house_url,
                               request_type='get',
                               encode='utf-8',
                               analyzer_rules_dict=house.to_dict(),
                               analyzer_type='regex',
                               headers=self.headers)
         p_2.get_details()
     except Exception as e:
         print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
 def house_parse(self, bu_id):
     logic_url = 'http://www.hyfc365.com/WebRecordManager/HouseTableControl/GetData.aspx?Building_ID=' + bu_id
     try:
         res = requests.get(logic_url)
         logic_id = re.search('<LOGICBUILDING_ID>(.*?)<', res.text).group(1)
         house_url = 'http://www.hyfc365.com/WebRecordManager/HouseTableControl/GetData.aspx?LogicBuilding_ID=' + logic_id
         ho_res = requests.get(house_url)
         html = etree.HTML(ho_res.text)
         house_list = html.xpath("//t_house")
         for i in house_list:
             ho = House(co_index)
             ho.bu_id = bu_id
             ho.ho_name = i.xpath('./room_number/text()')[0]
             ho.ho_build_size = i.xpath('./build_area/text()')[0]
             ho.ho_share_size = i.xpath('./build_area_share/text()')[0]
             ho.ho_true_size = i.xpath('./build_area_inside/text()')[0]
             ho.ho_floor = i.xpath('./floor_realright/text()')[0]
             ho.insert_db()
     except Exception as e:
         log.error("{}房屋解析失败{}".format(logic_url, e))
 def ho_parse(self, co_id, bu_id, house_url):
     res = requests.get(house_url, headers=self.headers)
     html = etree.HTML(res.text)
     ho_list = html.xpath("//td[@align]")
     for house in ho_list:
         ho = House(co_index)
         ho.co_id = co_id
         ho.bu_id = bu_id
         ho.ho_name = house.xpath("./text()")[0]
         ho.insert_db()
    def get_house_info(self, bu_id):
        house_url = 'http://www.ytfcjy.com/Common/Agents/ExeFunCommon.aspx'

        payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
                  bu_id + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>80</item>\r\n<item>720</item>\r\n<item>g_oBuildTable</item>\r\n<item> 1=1</item>\r\n</param>\r\n"
        headers = {
            'Content-Type': "text/xml",
        }

        response = requests.request("POST", house_url, data=payload, headers=headers)
        html = response.text
        house_info_list = re.findall("title='(.*?)'", html, re.S | re.M)
        for i in house_info_list:
            house = House(co_index)
            house.ho_name = re.search('房号:(.*?)单元', i, re.S | re.M).group(1)
            house.ho_build_size = re.search('总面积:(.*?) 平方米', i, re.S | re.M).group(1)
            house.ho_type =  re.search('用途:(.*?)户', i, re.S | re.M).group(1)
            house.ho_price =  re.search('价格:(.*?) 元', i, re.S | re.M).group(1)
            house.bu_id = bu_id
            house.info = i
            house.insert_db()
Exemple #25
0
 def get_house_info(self, house_url_list, bu_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_url = 'http://www.lpsfdc.cn/Templets/LPS/aspx/' + i
             response = requests.get(house_url)
             html = response.text
             ho_name = re.findall('ROOM_ROOMNO">(.*?)<', html, re.S | re.M)[0]
             ho_type = re.findall('ROOM_FWLX">(.*?)<', html, re.S | re.M)[0]
             ho_build_size = re.findall('ROOM_YCJZMJ">(.*?)<', html, re.S | re.M)[0]
             ho_true_size = re.findall('ROOM_YCTNMJ">(.*?)<', html, re.S | re.M)[0]
             ho_share_size = re.findall('ROOM_YCFTMJ">(.*?)<', html, re.S | re.M)[0]
             house.ho_name = ho_name
             house.ho_type = ho_type
             house.ho_build_size = ho_build_size
             house.ho_true_size = ho_true_size
             house.ho_share_size = ho_share_size
             house.bu_id = bu_id
             house.insert_db()
         except Exception as e:
             print(e)
    def get_build_info(self, co_id, co_name):
        url = 'http://www.czhome.com.cn/Presell.asp?projectID=' + co_id + '&projectname=' + co_name
        response = requests.get(url, headers=self.headers)
        html = response.content.decode('gbk')
        tree = etree.HTML(html)
        xpath_list = tree.xpath('//tr[@class="indextabletxt"]')
        for i in xpath_list[1:]:
            build_url = i.xpath('td[2]/a/@href')[0]
            url = 'http://www.czhome.com.cn/' + build_url
            result = requests.get(url, headers=self.headers)
            if result.status_code is not 200:
                print("co_index={},预售url:{}连接失败".format(co_index, url))
                continue
            html = result.content.decode('gbk')
            tree = etree.HTML(html)
            # 总套数
            bu_xpath = tree.xpath('/html/body/table/tr/td/table/tr/td/table/tr')[1:]
            for i in bu_xpath:
                try:
                    building = Building(7)
                    global building_id
                    building_id += 1
                    building.bu_id = building_id
                    bu_all_house = i.xpath('td[7]/text()')[0]
                    bu_url = i.xpath('td[1]/a/@href')[0]
                    url = 'http://www.czhome.com.cn/' + bu_url
                    response = requests.get(url, headers=self.headers)
                    if response.status_code is not 200:
                        print("co_index={},楼栋url:{}连接失败".format(co_index, url))
                        continue
                    html = response.content.decode('gbk')
                    tree = etree.HTML(html)
                    # 楼层
                    bu_floor = tree.xpath('//*[@id="Table4"]/tr[2]/td/table[3]/tr/td[1]/u/text()')[-1]
                    house_url_list = tree.xpath('//*[@id="Table4"]/tr[2]/td/table[3]/tr/td/a/@href')
                    bu_address = re.search('<center><font color=.*?&nbsp;&nbsp;(.*?)<', html, re.S | re.M).group(1)
                    building.bu_all_house = bu_all_house
                    building.bu_address = bu_address
                    building.bu_floor = bu_floor
                    building.bu_id = building_id
                    building.co_id = co_id
                    building.insert_db()
                    for i in house_url_list:
                        try:
                            house = House(7)
                            house_url = 'http://www.czhome.com.cn/' + i
                            self.get_house_info(house_url, house, co_id, building_id, building)
                        except Exception as e:
                            print(e)


                except Exception as e:
                    print(e)
 def get_house_info(self, house_url, co_id, bu_id):
     house_url_ = 'http://58.51.240.121:8503/' + house_url
     try:
         response = requests.get(house_url_, headers=self.headers)
         html = response.text
         house_info_list = re.findall('getMoreHouseInfo.*?</table>', html,
                                      re.S | re.M)[1:]
         for i in house_info_list:
             house = House(co_index)
             house.co_id = co_id
             house.bu_id = bu_id
             house.ho_name = re.search('>(.*?)<', i, re.S | re.M).group(1)
             house.ho_type = re.search('性质&nbsp;(.*?)<', i,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('面积&nbsp;(.*?)<', i,
                                             re.S | re.M).group(1)
             house.co_build_structural = re.search('结构&nbsp;(.*?)<', i,
                                                   re.S | re.M).group(1)
             house.insert_db()
     except Exception as e:
         print('请求错误,co_index={},url={}'.format(co_index, house_url_), e)
Exemple #28
0
 def get_house_info(self, house_url_list, bu_id):
     for i in house_url_list:
         house = House(co_index)
         house_url = 'http://183.63.60.194:8808/public/web/' + i
         time.sleep(1)
         response = self.s.get(house_url, headers=self.headers)
         if response.status_code is not 200:
             print('房号错误,co_index={},url={}'.format(co_index, house_url))
             continue
         html = response.text
         house.bu_id = bu_id
         house.ho_name = re.search('HouseNO.*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.ho_true_size = re.search('HouseArea.*?>(.*?)<', html,
                                        re.S | re.M).group(1)
         house.ho_build_size = re.search('SumBuildArea1.*?>(.*?)<', html,
                                         re.S | re.M).group(1)
         house.ho_type = re.search('HouseUse.*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.orientation = re.search('CHX.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
         house.insert_db()
 def get_house_info(self, code, co_name):
     house_url = 'http://house.bffdc.gov.cn/Common/Agents/ExeFunCommon.aspx?'
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               code[
                   0] + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>55</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item>false</item>\r\n<item> 1=1</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.post(url=house_url, data=payload, headers=headers)
     html = response.text
     info = re.findall("title='(.*?)'", html, re.S | re.M)
     for i in info:
         try:
             house = House(co_index)
             house.bu_num = code[1]
             house.ho_name = re.search('房号:(.*?)\r\n', i).group(1)
             house.ho_type = re.search('用途:(.*?)\r\n', i).group(1)
             house.ho_room_type = re.search('户型:(.*?)\r\n', i).group(1)
             house.ho_build_size = re.search('总面积:(.*?)\r\n', i).group(1)
             house.co_name = co_name
             house.insert_db()
         except Exception as e:
             print(e)
 def get_house_detail(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_detail_url = 'http://222.184.103.50:7700/WW/housedetail.aspx?houseID=' + i
             response = requests.get(house_detail_url, headers=self.headers)
             html = response.text
             house.ho_name = re.search('id="Label1">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="Label2">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('id="Label3">(.*?)<', html,
                                             re.S | re.M).group(1)
             house.co_id = co_id
             house.bu_id = bu_id
             house.insert_db()
         except Exception as e:
             print(
                 '请求错误,co_index={},url={}'.format(co_index,
                                                  house_detail_url), e)