コード例 #1
0
ファイル: heze_18.py プロジェクト: w4205/hilder_gv
    def room_crawler(self, room):  # 房屋

        ho = House(co_index)

        house_url = "http://www.hzszjj.gov.cn" + room

        res = requests.get(house_url, )
        con = etree.HTML(res.text)

        ho_table = con.xpath("//tr[@bgcolor='#fbf3e6']")
        for ho_list in ho_table[1:-1]:
            ho_floor = ho_list.xpath("./td[@align='center']/text()")[0]
            honum_list = ho_list.xpath(".//tr/td[@height='40']")
            for house in honum_list:
                ho.ho_floor = ho_floor  # 楼层
                id_num = re.search(r"(\d+)&\w+=(\d+)", room)
                ho.co_id = id_num.group(1)  # 小区id
                ho.bu_id = id_num.group(2)  # 楼栋id
                ho_url = house.xpath("./a/@href")[0]
                if len(ho_url) == 1:
                    ho_info = house.xpath("./a/@wf")[0]

                    ho.ho_name = house.xpath("./a/text()")[0]
                    info = re.search(
                        r":(.*?)<br>.*?:(.*?)<br>(.*?)<br><hr>.*?:(.*?)m.*?<br>.*?:(.*?)<br>.*?:(.*?)m",
                        ho_info)
                    ho.ho_type = info.group(5)
                    ho.ho_build_size = info.group(4)
                    ho.ho_room_type = info.group(2)

                else:
                    detail_url = "http://www.hzszjj.gov.cn/ts_web_dremis/web_house_dir/" + ho_url
                    res = requests.get(detail_url)
                    con = etree.HTML(res.text)
                    ho.ho_name = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_name']/text()"
                    )[0]
                    ho.ho_type = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_type']/text()"
                    )[0]
                    ho.ho_build_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_build_area']/text()"
                    )[0]
                    ho.ho_share_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_share_area']/text()"
                    )[0]
                    ho.ho_true_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_inside_area']/text()"
                    )[0]

                ho.insert_db()
コード例 #2
0
ファイル: luohe_153.py プロジェクト: w4205/hilder_gv
 def get_house_detail(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_detail_url = 'http://www.lhfdc.gov.cn/templets/lh/aspx/hpms/RoomInfo.aspx?code=' + i
             response = requests.get(house_detail_url, headers=self.headers)
             html = response.text
             house.ho_name = re.search('id="ROOM_ROOMNO">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_FWHX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_GHYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNMJ">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={}'.format(co_index,
                                                  house_detail_url), e)
コード例 #3
0
ファイル: nanjing_74.py プロジェクト: w4205/hilder_gv
    def house_info(self, co_id, bu_id, house_url_list):
        for house_ in house_url_list:
            house_url = "http://www.njhouse.com.cn/2016/spf/" + house_
            try:
                # ho_res = requests.get(house_url,headers=self.headers)
                ho_pro = Proxy_contact(app_name="nanjing",
                                       method='get',
                                       url=house_url,
                                       headers=self.headers)
                ho_con = ho_pro.contact()
                ho_con = ho_con.decode('gbk')

                # ho_con = ho_res.content.decode('gbk')
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = re.search('房号.*?;">(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
                ho.ho_price = re.search('价格.*?<td>(.*?)元', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_floor = re.search('楼层.*?;">(.*?)</td', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_build_size = re.search('建筑面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('套内面积.*?<td>(.*?)m', ho_con,
                                            re.S | re.M).group(1)
                ho.ho_share_size = re.search('分摊面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_type = re.search('房屋类型.*?<td>(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
            except Exception as e:
                log.error("房屋详情页错误{}".format(e))
                continue

            ho.insert_db()
コード例 #4
0
ファイル: yangzhou_60.py プロジェクト: w4205/hilder_gv
 def get_house_detail(self, house_detail_url_list, co_id, bu_id):
     for i in house_detail_url_list:
         detail_url = 'http://www.yzfdc.cn/' + i
         try:
             house = House(co_index)
             time.sleep(3)
             response = self.s.get(detail_url, headers=self.headers)
             html = response.text
             house.co_name = re.search('lblxmmc.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.bu_num = re.search('lbldh.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             house.ho_name = re.search('lblfh.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('lbljzmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('lbltnmj.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('lblftmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('lblfwxz.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('lblhuxin.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, detail_url),
                   e)
コード例 #5
0
ファイル: yunfu_218.py プロジェクト: w4205/hilder_gv
 def get_house_detail(self, house_detail_url, co_id, bu_id):
     try:
         house = House(co_index)
         house_detail_url_ = 'http://www.yfci.gov.cn:8080/HousePresell/' + house_detail_url
         response = requests.get(house_detail_url_, headers=self.headers)
         html = response.text
         if '找不到记录' in html:
             return
         house.ho_name = re.search('id="HouseNO".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.ho_true_size = re.search('id="HouseArea".*?>(.*?)<', html,
                                        re.S | re.M).group(1)
         house.ho_build_size = re.search('id="SumBuildArea1".*?>(.*?)<',
                                         html, re.S | re.M).group(1)
         house.ho_type = re.search('id="HouseUse".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.orientation = re.search('id="CHX".*?>(.*?)<', html,
                                       re.S | re.M).group(1)
         house.ho_type = re.search('id="CHX".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.co_id = co_id
         house.bu_id = bu_id
         house.insert_db()
     except Exception as e:
         print(
             '房号错误,co_index={},url={}'.format(co_index, house_detail_url_),
             e)
コード例 #6
0
ファイル: quzhou_191.py プロジェクト: w4205/hilder_gv
 def house_parse(self,bu_id,co_id,sid,propertyid):
     data = {
         'propertyid':propertyid,
         'sid':sid,
         'buildingid':bu_id,
         'tid':'price',
         'page':1
     }
     res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm',data=data,headers=self.headers)
     page = re.search('页数.*?/(\d+)',res.text).group(1)
     for i in range(1,int(page)+1):
         data['page'] = i
         ho_res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm', data=data, headers=self.headers)
         con  = ho_res.text
         ho_html = etree.HTML(con)
         house_list = ho_html.xpath("//tr[@onmouseout]")
         for house in house_list:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = house.xpath("./td[3]/a/div/text()")[0]
             ho.unit = house.xpath("./td[2]/a/div/text()")[0]
             buildsize = house.xpath("./td[4]/a/div/span/@class")
             truesize = house.xpath("./td[5]/a/div/span/@class")
             price = house.xpath("./td[9]/a/div/span/@class")
             ho.ho_build_size = self.number_replace(buildsize)
             ho.ho_true_size = self.number_replace(truesize)
             ho.ho_price = self.number_replace(price)
             ho.insert_db()
コード例 #7
0
ファイル: fuzhou_83.py プロジェクト: w4205/hilder_gv
 def house_info(self,ho_url,co_id,bu_id):
     url = "http://222.77.178.63:7002/" + ho_url
     url.rstrip('=')
     res = requests.get(url,headers=self.headers)
     res.encoding = 'gbk'
     html = etree.HTML(res.text)
     house_detail_list = html.xpath("//td/a[@target]/@href")
     for house_detail in house_detail_list:
         try:
             detail_url = "http://222.77.178.63:7002/" + house_detail
             detail_res = requests.get(detail_url,headers=self.headers)
             detail_res.encoding = 'gbk'
             con = detail_res.text
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('室号.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_floor = re.search('实际层.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_type = re.search('房屋类型.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_build_size = re.search('预测建筑面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_true_size = re.search('预测套内面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_share_size = re.search('预测分摊面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_price = re.search('总价.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.insert_db()
         except Exception as e:
             # log.error('房屋信息错误{}'.format(e))
             print('房屋信息错误{}'.format(e))
コード例 #8
0
ファイル: anqing_98.py プロジェクト: w4205/hilder_gv
    def ho_info(self, url, co_id, bu_id):
        ho_url = 'http://www.aqhouse.net/' + url
        while True:
            try:
                proxy = self.proxies[random.randint(0, 9)]
                ho_res = requests.get(ho_url,
                                      headers=self.headers,
                                      proxies=proxy)
                break
            except Exception as e:
                print(e)
        ho_html = etree.HTML(ho_res.text)
        room_list = ho_html.xpath("//td[@nowrap]/a/..")
        for room in room_list:
            try:
                room_info = room.xpath("./@title")[0]
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = room.xpath("./a/text()")[0]
                ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_true_size = re.search('套内面积:(.*?)平方米',
                                            room_info).group(1)
                ho.ho_share_size = re.search('分摊面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_room_type = re.search('套型:(.*)', room_info).group(1)
                ho.ho_price = re.search('价格.*?:(.*?)元/平方米', room_info).group(1)

                ho.insert_db()
            except:
                print('房屋解析失败')
コード例 #9
0
ファイル: tongling_51.py プロジェクト: w4205/hilder_gv
    def house_parse(self, house_url, co_id, bu_id):
        ho = House(co_index)
        url = "http://spf.tlfdc.cn/" + house_url
        res = requests.get(url, headers=self.headers)
        con = res.text

        ho_name = re.findall('室号:(.*?)套', con, re.S | re.M)
        ho_room_type = re.findall('套型:(.*?)建', con, re.S | re.M)
        ho_build_size = re.findall('建筑面积:(.*?)参', con, re.S | re.M)
        ho_price = re.findall('价格:(.*?)元', con, re.S | re.M)
        ho_detail = re.findall('href="(show.*?\?id=\d+&id2=\d+&prjid=\d+)"',
                               con, re.S | re.M)
        for index in range(0, len(ho_name)):
            try:
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = ho_name[index]
                ho.ho_room_type = ho_room_type[index]
                ho.ho_build_size = ho_build_size[index]
                ho.ho_price = ho_price[index]
                ho_detail_url = "http://spf.tlfdc.cn/" + ho_detail[index]
                res = requests.get(ho_detail_url, headers=self.headers)
                res = res.content.decode('gb2312')
                ho.ho_floor = re.findall('楼层.*?">(.*?)</td>', res,
                                         re.S | re.M)[0].strip()

                ho.insert_db()
            except:
                print('房号错误,co_index={},url={}'.format(co_index, url), e)
                continue
コード例 #10
0
 def get_house_info(self, bu_id, co_id):
     house_url = "http://www.xyfdc.gov.cn/wsba/Common/Agents/ExeFunCommon.aspx"
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               bu_id + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>80</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item> 1=1</item>\r\n<item>1</item>\r\n<item>false</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.request("POST",
                                 house_url,
                                 data=payload,
                                 headers=headers)
     html = response.text
     house_info_list = re.findall(
         "onclick=.g_oBuildTable.clickRoom.*? title='(.*?)'", html,
         re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_name = re.search('房号:(.*?)单元:', i,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('总面积:(.*?)平方米', i,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('用途:(.*?)户型', i,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('户型:(.*?)状态', i,
                                            re.S | re.M).group(1)
             house.info = i
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={},data={}'.format(
                     co_index, house_url, payload), e)
コード例 #11
0
ファイル: ningbo_33.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, build_url_list):
     for i in build_url_list:
         qrykey = re.search('qrykey=(.*?)&', i).group(1)
         house_url = 'http://old.newhouse.cnnbfdc.com/GetHouseTable.aspx?qrykey=' + qrykey
         response = requests.get(house_url, headers=self.headers)
         html = response.text
         info_list = re.findall('(房号:.*?")', html, re.S | re.M)
         # ho_name_list = re.findall('title=.*?center.*?center.*?<a.*?>(.*?)<', html, re.S | re.M)
         for index in range(len(info_list)):
             try:
                 house = House(co_index)
                 # house.info = info_list[index]
                 # house.ho_name = ho_name_list[index]
                 info = info_list[index]
                 house.ho_name = re.search('房号:(.*?)&', info,
                                           re.S | re.M).group(1)
                 house.ho_build_size = re.search('建筑面积:(.*?)&', info,
                                                 re.S | re.M).group(1)
                 house.ho_share_size = re.search('分摊面积:(.*?)&', info,
                                                 re.S | re.M).group(1)
                 house.info = info
                 house.bu_id = qrykey
                 house.insert_db()
             except Exception as e:
                 print(
                     'co_index={},房号错误,url ={} '.format(
                         co_index, house_url), e)
コード例 #12
0
ファイル: qingyuan_38.py プロジェクト: w4205/hilder_gv
    def get_house_info(self, ho_con=None, headers=None, bu_id=None, url=None):

        if ho_con == None:
            res = requests.get(url, headers=headers)

            con = res.content.decode('gbk')
            html = etree.HTML(con)

        else:
            html = etree.HTML(ho_con)

        ho_url_list = html.xpath("//td[@width='120']/a/@href")

        for ho_url in ho_url_list:
            ho_detail = 'http://www.qyfgj.cn/newys/' + ho_url
            res = requests.get(ho_detail, headers=headers)
            con = res.content.decode('gbk')
            ho = House(co_index)

            ho.bu_id = bu_id
            ho.ho_num = re.search('房屋号.*?">(.*?)</td', con,
                                  re.S | re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?">(.*?)m', con,
                                         re.S | re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?">(.*?)m', con,
                                        re.S | re.M).group(1)
            ho.ho_type = re.search('房屋用途.*?">(.*?)</td', con,
                                   re.S | re.M).group(1)

            ho.insert_db()
コード例 #13
0
ファイル: qinzhou_215.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, co_id, bu_id):
     house_url = "http://202.103.219.149:7000/LeadingEstate/buildingtable/ShowNewBuildingTable.aspx"
     payload = "IsShowHouse=1&BuidID=" + bu_id
     headers = {'Content-Type': "application/x-www-form-urlencoded"}
     try:
         response = requests.request("POST",
                                     house_url,
                                     data=payload,
                                     headers=headers)
         html = response.text
         house_info_list = re.findall('HouseID.*?\}', html, re.S | re.M)
         for i in house_info_list:
             house = House(co_index)
             house.bu_id = bu_id
             house.co_id = co_id
             house.ho_name = re.search('"YCHouseNo":"(.*?)"', i,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('"ActFLoor":"(.*?)"', i,
                                        re.S | re.M).group(1)
             house.ho_build_size = re.search('"YCJZArea":"(.*?)"', i,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('"YCTNJZArea":"(.*?)"', i,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('"YCFTJZArea":"(.*?)"', i,
                                             re.S | re.M).group(1)
             house.insert_db()
     except Exception as e:
         print('请求错误,url={},data={}'.format(house_url, payload))
コード例 #14
0
ファイル: fuzhou_11.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, bu_id, co_id):
     url = 'http://www.fzfgj.cn/website/presale/home/HouseTableControl/GetData.aspx?Building_ID=' + bu_id
     try:
         response = requests.get(url=url, headers=self.headers)
         xml = response.text
         tree = etree.XML(xml)
         logo = tree.xpath('//LOGICBUILDING_ID/text()')[0]
         url_2 = 'http://www.fzfgj.cn/website/presale/home/HouseTableControl/GetData.aspx?LogicBuilding_ID=' + logo
         result = requests.get(url_2, headers=self.headers)
         xml_2 = result.text
         tree_2 = etree.XML(xml_2)
         house_info_list = tree_2.xpath('T_HOUSE')
         for i in house_info_list:
             try:
                 house = House(11)
                 ho_name = i.xpath('ROOM_NUMBER/text()')[0]
                 ho_build_size = i.xpath('BUILD_AREA/text()')[0]
                 ho_true_size = i.xpath('BUILD_AREA_INSIDE/text()')[0]
                 ho_share_size = i.xpath('BUILD_AREA_SHARE/text()')[0]
                 ho_floor = i.xpath('FLOOR_REALRIGHT/text()')[0]
                 ho_type = i.xpath('USE_FACT/text()')[0]
                 house.co_id = co_id
                 house.bu_id = bu_id
                 house.ho_build_size = ho_build_size
                 house.ho_true_size = ho_true_size
                 house.ho_share_size = ho_share_size
                 house.ho_floor = ho_floor
                 house.ho_name = ho_name
                 house.ho_type = ho_type
                 house.insert_db()
             except Exception as e:
                 print('房号错误,co_index={},url={}'.format(co_index, url_2), e)
     except BaseException as e:
         print('房号错误,co_index={},url={}'.format(co_index, url), e)
コード例 #15
0
ファイル: suzhou_56.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             dongid = re.search('dongid=(.*?)&', i).group(1)
             roomid = re.search('roomid=(.*?)&', i).group(1)
             house_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/roominfo.aspx?dongid=' + dongid + '&roomid=' + roomid
             house = House(co_index)
             house.co_name = 'Labelxqmc">(.*?)<'
             house.area = 'Labelxzq">(.*?)<'
             house.bu_num = 'Labeldongmc">(.*?)<'
             house.ho_type = 'Labelyxyongtu">(.*?)<'
             house.ho_name = '<span id="Labelroommc".*?>(.*?)</span>'
             house.ho_build_size = 'Labeljzmianji">(.*?)<'
             house.ho_true_size = 'Labeltaonei">(.*?)<'
             house.ho_share_size = 'Labelgongtan">(.*?)<'
             house.ho_room_type = 'Labelhuxing">(.*?)<'
             house.bu_id = 'dongid=(.*?)&'
             p = ProducerListUrl(page_url=house_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=house.to_dict(),
                                 analyzer_type='regex',
                                 headers=self.headers)
             p.get_details()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
コード例 #16
0
ファイル: yueyang_66.py プロジェクト: w4205/hilder_gv
    def get_house_info(self, co_id, bu_id, id):

        house_list_url = "http://xx.yyfdcw.com/hetong/fdc_xxdxx.asp?id=" + str(
            id)
        res = requests.get(house_list_url, headers=self.headers)
        con = res.content.decode('gbk')
        house_list = re.findall("onClick=.*?open\('(.*?)',", con, re.S | re.M)
        for house_ in house_list:
            try:
                house_url = "http://xx.yyfdcw.com/hetong/" + house_
            except Exception as e:
                print("co_index={},房屋信息错误".format(co_index), e)
                continue
            ho_res = requests.get(house_url, headers=self.headers)
            ho_con = ho_res.content.decode('gbk')

            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = re.search('室号.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)
            ho.ho_floor = re.search('实际层.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?fafa>(.*?)</TD', ho_con,
                                        re.S | re.M).group(1)
            ho.ho_share_size = re.search('分摊面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_price = re.search('价格.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_type = re.search('用途.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)

            ho.insert_db()
コード例 #17
0
ファイル: taiyuan_49.py プロジェクト: w4205/hilder_gv
    def house_parse(self, bu_id, co_id):  # 房屋信息解析
        ho = House(co_index)
        house_url = "http://ys.tyfdc.gov.cn/Firsthand/tyfc/publish/probld/NBView.do?"
        formdata = {"nid": bu_id, "projectid": co_id}
        try:
            res = requests.post(house_url, data=formdata, headers=self.headers)
        except Exception as e:
            print("co_index={},房屋详情页无法访问".format(co_index), e)
        con = res.text

        ho_name = re.findall('\'\);">(.*?)&nbsp;', con, re.S | re.M)
        ho_build_size = re.findall('<span.*?建筑面积:(.*?)㎡', con, re.S | re.M)
        ho_true_size = re.findall('<span.*?套内面积:(.*?)分', con, re.S | re.M)
        ho_share_size = re.findall('<span.*?分摊面积:(.*?)㎡', con, re.S | re.M)
        ho_type = re.findall('<span.*?用途:(.*?)房', con, re.S | re.M)
        ho_price = re.findall('<span.*?单价:(.*?)"', con, re.S | re.M)
        ho_id = re.findall("getHouseBaseInfo\('(.*?)'\)", con, re.S | re.M)
        for index in range(0, len(ho_id)):
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = ho_name[index]
            ho.ho_build_size = ho_build_size[index]
            ho.ho_type = ho_type[index]
            ho.ho_share_size = ho_share_size[index]
            ho.ho_price = ho_price[index]
            ho.ho_true_size = ho_true_size[index]
            ho.ho_num = ho_id[index]
            ho.insert_db()
コード例 #18
0
ファイル: nanping_32.py プロジェクト: w4205/hilder_gv
    def get_house_info(self, house_url_list, bu_id, co_id):
        for i in house_url_list:
            try:
                house = House(co_index)
                house_url = 'http://www.fjnpfdc.com/House/' + i
                house_res = requests.get(house_url, headers=self.headers)
                house_con = house_res.content.decode('gbk')

                house.bu_id = bu_id
                house.co_id = co_id
                house.bu_num = re.search('幢  号:.*?<td>(.*?)<', house_con,
                                         re.S | re.M).group(1)
                house.ho_name = re.search('房  号:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.co_name = re.search('项目名称:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.ho_build_size = re.search('建筑面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_true_size = re.search('套内面积:.*?<td>(.*?)<', house_con,
                                               re.S | re.M).group(1)
                house.ho_share_size = re.search('分摊面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_floor = re.search('所 在 层:.*?<td>(.*?)<', house_con,
                                           re.S | re.M).group(1)

                house.insert_db()
            except Exception as e:
                print("co_index={},房屋{}错误".format(co_index, i), e)
コード例 #19
0
ファイル: guangan_12.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, form_data_list):
     for data in form_data_list:
         house_url = 'http://www.gafdc.cn/newhouse/GetBuildTableByAjax.ashx'
         try:
             response = requests.post(url=house_url,
                                      data=data,
                                      headers=self.headers)
             html = response.text
             ho_info_html = re.findall("<td width='95'.*?</td>", html,
                                       re.S | re.M)
             bu_id_html = re.search("^.*?overflow-x:auto;", html,
                                    re.S | re.M).group()
             bu_id = re.findall("GetData\('.*?','(.*?)'\)", bu_id_html,
                                re.S | re.M)[-1]
             for i in ho_info_html:
                 try:
                     h = House(co_index)
                     h.bu_id = bu_id
                     h.ho_name = re.search('<td.*?>(.*?)<', i,
                                           re.S | re.M).group(1)
                     h.ho_type = re.search('物业类别:(.*?) ', i,
                                           re.S | re.M).group(1)
                     h.ho_build_size = re.search('建筑面积:(.*?) ',
                                                 html).group(1)
                     h.insert_db()
                 except Exception as e:
                     print(
                         '房屋报错,co_index={},url={}'.format(
                             co_index, house_url), e)
         except Exception as e:
             print('房屋报错,co_index={},url={}'.format(co_index, house_url), e)
コード例 #20
0
ファイル: changshu_4.py プロジェクト: w4205/hilder_gv
    def house_crawler(self, house_url, bu_num, co_id, bu_id):
        ho = House(co_index, bu_num=bu_num, co_id=co_id, bu_id=bu_id)

        url = self.url + house_url
        con = requests.get(url, headers=self.headers)
        tr = con.text
        ho_name = re.findall('室号:(.*?)户', tr, re.S | re.M)  # 房号:3单元403
        # ho_num = re.findall('_td(\d+)"', tr)  # 房号id
        ho_floor = re.findall('(\d+)层', tr)  # 楼层
        ho_type = re.findall('房屋属性:(.*?)"', tr,
                             re.S | re.M)  # 房屋类型:普通住宅 / 车库仓库
        ho_room_type = re.findall('户型:(.*?)所', tr, re.S | re.M)  # 户型
        ho_build_size = re.findall('建筑面积:(.*?)房', tr, re.S | re.M)  # 建筑面积

        for floor in ho_floor:
            try:
                ho.ho_floor = floor
                for index in range(1, len(ho_name) + 1):
                    ho.ho_name = ho_name[index]
                    ho.ho_type = ho_type[index]
                    ho.ho_room_type = ho_room_type[index]
                    ho.ho_build_size = ho_build_size[index]
                    # ho.ho_num = ho_num[index]

                    ho.insert_db()
            except:
                continue
コード例 #21
0
ファイル: xinxiang_54.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             response = requests.get(i, headers=self.headers)
             html = response.text
             house.ho_name = re.search('门牌号:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('所在层:.*?<td.*?>(.*?)<', html,
                                        re.S | re.M).group(1)
             house.ho_type = re.search('房屋性质:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('预测建筑面积:.*?<td.*?>(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('预测套内面积:.*?<td.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('预测分摊面积:.*?<td.*?>(.*?)<',
                                             html, re.S | re.M).group(1)
             house.co_address = re.search('房屋坐落:.*?<td.*?>(.*?)<', html,
                                          re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, i), e)
コード例 #22
0
ファイル: cangzhou_103.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, house_id_list, bu_id, co_id):
     for i in house_id_list:
         house_url = 'http://www.hbczfdc.com:4993/HPMS/RoomInfo.aspx?code=' + i
         try:
             house = House(co_index)
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.bu_id = bu_id
             house.co_id = co_id
             house.ho_name = re.search('id="ROOM_HH">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('id="ROOM_MYC">(.*?)<', html,
                                        re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_FWYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_HX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNJZMJ">(.*?)<',
                                            html, re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
コード例 #23
0
    def get_build_info(self, url, response,co_id, bu_id):
        house = House(co_index)
        json_html = json.loads(response.text)
        for i in json_html:
                ho_name = i['roomno']  # 房号
                ho_type = i['ghyt']  # 用途
                ho_true_size = i['tnmj']  # 预测套内面积
                ho_floor = i['floorindex']  # 楼层
                ho_build_size = i['jzmj']  # 建筑面积
                house.co_id = co_id
                house.bu_id = bu_id
                house_code = i["fwcode"]
                house.ho_name = ho_name
                house.ho_type = ho_type
                house.ho_true_size = ho_true_size
                house.ho_floor = ho_floor
                house.ho_build_size = ho_build_size

                house_detail_url = "http://fsfc.fsjw.gov.cn/hpms_project/roomview.jhtml?id="+str(house_code)
                try:
                    res = requests.get(house_detail_url,headers=self.headers)
                    house.ho_share_size = re.search('实测分摊面积.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                    house.ho_price = re.search('总价.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                except Exception as e:
                    print("co_index={},房屋详情页{}请求失败!".format(co_index,house_detail_url))
                    print(e)
                    continue

                house.insert_db()
コード例 #24
0
 def get_house_info(self, house_url_list, co_name, bu_num):
     for i in house_url_list:
         try:
             house = House(co_index)
             house.co_name = co_name
             house.bu_num = bu_num
             house_url = 'http://www.sxczfdc.com/pubinfo/' + i
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.ho_floor = re.findall('HouseInfo1_lblFwlc">(.*?)<', html,
                                         re.S | re.M)[0]
             house.ho_name = re.findall('HouseInfo1_lblFwfh">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_type = re.findall('HouseInfo1_lblFwlx">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_room_type = re.findall('HouseInfo1_lblFwhx">(.*?)<',
                                             html, re.S | re.M)[0]
             house.ho_build_size = re.findall(
                 'HouseInfo1_lblycfwjzmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_true_size = re.findall(
                 'HouseInfo1_lblycfwtnmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_share_size = re.findall(
                 'HouseInfo1_lblycfwftmj">(.*?)<', html, re.S | re.M)[0]
             house.orientation = re.findall('HouseInfo1_lblCx">(.*?)<',
                                            html, re.S | re.M)[0]
             house.insert_db()
         except Exception as e:
             print(e)
コード例 #25
0
ファイル: huaibei_182.py プロジェクト: w4205/hilder_gv
    def ho_parse(self, bid, co_id):

        payload = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><param funname="SouthDigital.CMS.CBuildTableEx.GetBuildHTMLEx"><item>'\
              +bid+'</item><item>1</item><item>1</item><item>100</item><item>1000</item><item>g_oBuildTable</item><item> 1=1</item><item>1</item></param>'
        payload = parse.quote(payload)
        try:
            res = requests.post(
                'http://www.hbsfdc.com/Common/Agents/ExeFunCommon.aspx',
                data=payload,
                headers=self.headers)
        except Exception as e:
            log.error("{}楼栋请求失败".format(bid))
        con = res.content.decode()
        ho_list = re.findall("title='(.*?)'>", con, re.S | re.M)
        for ho in ho_list:
            house = House(co_index)
            house.co_id = co_id
            house.bu_id = bid
            house.ho_name = re.search('房号:(.*)', ho).group(1)
            house.ho_type = re.search('用途:(.*)', ho).group(1)
            house.ho_room_type = re.search('户型:(.*)', ho).group(1)
            house.ho_build_size = re.search('总面积:(.*)', ho).group(1)
            if re.search('售价:(.*)', ho):
                house.ho_price = re.search('售价:(.*)', ho).group(1)
            else:
                house.ho_price = None
            house.insert_db()
コード例 #26
0
ファイル: shaoguan_194.py プロジェクト: w4205/hilder_gv
 def house_parse(self, ho_url, co_id, bu_id):
     house_url = "http://61.143.241.154/" + ho_url
     ho_res = requests.get(house_url, headers=headers)
     html = etree.HTML(ho_res.content.decode('gbk'))
     detail_list = html.xpath("//td[@height='80']/a/@href")
     for detail in detail_list:
         try:
             detail_url = 'http://61.143.241.154/' + detail
             res = requests.get(detail_url, headers=headers)
             con = res.content.decode('gbk')
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('房屋号.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_true_size = re.search('套内面积.*?">(.*?)</td', con,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search('建筑面积.*?">(.*?)</td', con,
                                          re.S | re.M).group(1)
             ho.orientation = re.search('房屋朝向.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
             ho.ho_type = re.search('用途.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_price = re.search('申报总价.*?">(.*?)</td', con,
                                     re.S | re.M).group(1)
             ho.insert_db()
         except Exception as e:
             log.error("{}房屋请求解析失败{}".format(detail, e))
コード例 #27
0
ファイル: shanghai_42.py プロジェクト: w4205/hilder_gv
 def get_house_info(self, house_url, bu_id, co_id):
     ho_url = 'http://www.fangdi.com.cn/' + house_url
     response = requests.get(ho_url, headers=self.headers)
     html = response.content.decode('gbk')
     house_html = re.search('室号 <.*?</table>.*?</table>', html,
                            re.S | re.M).group()
     house_info_list = re.findall('title.*?</td>', house_html, re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_build_size = re.search('实测面积:(.*?)>', i,
                                             re.S | re.M).group(1)
             house.ho_name = re.search('实测面积.*?>(.*?)<br>', i,
                                       re.S | re.M).group(1).strip()
             house.bu_id = bu_id
             house.co_id = co_id
             if '<a' in house.ho_name:
                 house_detail_url_code = re.search('href="(.*?)"',
                                                   house.ho_name,
                                                   re.S | re.M).group(1)
                 house_detail_url = 'http://www.fangdi.com.cn/' + house_detail_url_code
                 result = requests.get(house_detail_url,
                                       headers=self.headers)
                 html_str = result.content.decode('gbk')
                 house.ho_floor = re.search('实际层.*?<TD.*?>(.*?)<', html_str,
                                            re.S | re.M).group(1)
                 house.ho_name = re.search('室号.*?<TD.*?>(.*?)<', html_str,
                                           re.S | re.M).group(1)
                 house.ho_type = re.search('房屋类型.*?<TD.*?>(.*?)<', html_str,
                                           re.S | re.M).group(1)
                 house.ho_room_type = re.search('房型.*?<TD.*?>(.*?)<',
                                                html_str,
                                                re.S | re.M).group(1)
                 house.ho_build_size = re.search('实测建筑面积.*?<TD.*?>(.*?)<',
                                                 html_str,
                                                 re.S | re.M).group(1)
                 house.ho_true_size = re.search('实测套内面积.*?<TD.*?>(.*?)<',
                                                html_str,
                                                re.S | re.M).group(1)
                 house.ho_share_size = re.search('实测分摊面积.*?<TD.*?>(.*?)<',
                                                 html_str,
                                                 re.S | re.M).group(1)
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, ho_url), e)
コード例 #28
0
ファイル: jiujiang_25.py プロジェクト: w4205/hilder_gv
    def get_build_info(self, comm_url_list):
        for i in comm_url_list:
            try:
                sid = re.findall('\+(\d+)\+', i)[0]
                pid = re.findall('\+(\d+)\+', i)[1]
                build_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/bldg_query.aspx?pid=' + pid + '&sid=' + sid
                # print(build_url)
                response = requests.get(build_url)
                html = response.text
                build = Building(co_index)
                build.bu_id = pid
                build.bu_num = re.search('楼栋座落.*?<td.*?>(.*?)<', html,
                                         re.S | re.M).group(1)
                build.bu_address = re.search('楼栋座落.*?<td.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
                build.bu_pre_sale = re.search('预售证号.*?">(.*?)&nbsp', html,
                                              re.S | re.M).group(1)
                build.bu_pre_sale_date = re.search('时间.*?">(.*?)&nbsp', html,
                                                   re.S | re.M).group(1)
                build.bu_all_house = re.search('dM.*?">(.*?)&nbsp', html,
                                               re.S | re.M).group(1)
                # build.bu_address = re.search('售楼处地址.*?">(.*?)&nbsp', html, re.S | re.M).group(1)
                build.insert_db()
            except Exception as e:
                print('co_index={}, 楼栋错误,url={}'.format(co_index, build_url),
                      e)

            house_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/proxp.aspx?key=WWW_LPB_001&params=' + sid
            # print(house_url)
            result = requests.get(house_url)
            html_ = result.text

            for house_info in re.findall('<Result.*?</Result>', html_,
                                         re.S | re.M):
                try:
                    house = House(co_index)
                    house.bu_id = build.bu_id
                    house.bu_num = build.bu_num
                    house.ho_name = re.search('<ONAME>(.*?)</ONAME>',
                                              house_info, re.S | re.M).group(1)
                    house.ho_num = re.search('<OSEQ>(.*?)</OSEQ>', house_info,
                                             re.S | re.M).group(1)
                    house.ho_build_size = re.search('<BAREA>(.*?)</BAREA>',
                                                    house_info,
                                                    re.S | re.M).group(1)
                    house.ho_floor = re.search('<FORC>(.*?)</FORC>',
                                               house_info,
                                               re.S | re.M).group(1)
                    house.ho_true_size = re.search('<PAREA>(.*?)</PAREA>',
                                                   house_info,
                                                   re.S | re.M).group(1)
                    house.insert_db()
                except Exception as e:
                    print('co_index={}, 房号错误'.format(co_index), e)
コード例 #29
0
ファイル: fenghua_73.py プロジェクト: w4205/hilder_gv
    def get_house_info(self, bu_url, bu_id):
        qrykey = re.search('qrykey=(.*?)&', bu_url).group(1)
        house_url = 'http://old.newhouse.cnnbfdc.com/GetHouseTable.aspx?qrykey=' + qrykey
        response = requests.get(house_url, headers=self.headers)
        html = response.text
        house_code_list = re.findall("onclick=select_room\('(.*?)'", html,
                                     re.S | re.M)
        for i in house_code_list:
            house_detail_url = 'http://old.newhouse.cnnbfdc.com/openRoomData.aspx?roomId=' + str(
                i)
            # while True:
            #     proxies = self.proxy_pool()
            try:
                res = requests.get(
                    house_detail_url,
                    headers=self.headers,
                )
            except Exception as e:
                print("{}城市无法访问房屋页面{}".format(city, house_detail_url), e)
                continue
                # if res.status_code ==200:
            time.sleep(2)
            #     self.proxy_status(proxies,0)
            #     break
            # else:
            #     self.proxy_status(proxies,1)
            #     continue
            content = res.text
            ho = House(co_index)
            ho.bu_id = bu_id
            try:
                ho.ho_name = re.search('室号.*?">(.*?)</td>', content,
                                       re.S | re.M).group(1)
                ho.ho_floor = re.search('楼层.*?">(.*?)</td>', content,
                                        re.S | re.M).group(1)
                ho.ho_room_type = re.search('户型.*?">(.*?)</td>', content,
                                            re.S | re.M).group(1)
                ho.ho_type = re.search('用途.*?">(.*?)</td>', content,
                                       re.S | re.M).group(1)
                ho.ho_build_size = re.search('预测建筑面积.*?">(.*?)</td>', content,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('预测套内面积.*?">(.*?)</td>', content,
                                            re.S | re.M).group(1)
                ho.ho_share_size = re.search('预测分摊面积.*?">(.*?)</td>', content,
                                             re.S | re.M).group(1)

                ho.insert_db()
            except Exception as e:
                print("{}房号错误,请求频繁,当前页面{}未提取".format(city, house_detail_url),
                      e)
                continue
コード例 #30
0
 def get_house_info(self, house_url, bu_id, co_id):
     try:
         house = House(co_index)
         house.bu_id = bu_id
         house.co_id = co_id
         response = requests.post(house_url, headers=self.headers)
         html = response.content.decode('gbk')
         house.ho_floor = re.search('所在楼层:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_name = re.search('房号:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_build_size = re.search('预测总面积:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_true_size = re.search('预测套内面积.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_share_size = re.search('预测公摊面积.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.insert_db()
     except Exception as e:
         print('房号错误,co_index={},url={}'.format(co_index, house_url), e)