Esempio n. 1
0
 def build_info(self, co_id, temp_url_list):
     for temp_url in temp_url_list:
         try:
             build_url = "http://222.77.178.63:7002/" + temp_url
             res = requests.get(build_url, headers=self.headers)
             html = etree.HTML(res.content.decode('gbk'))
             build_info_list = html.xpath("//tr[@class='indextabletxt']")
             for build_info in build_info_list:
                 bu = Building(co_index)
                 ho_url = build_info.xpath("./td/a/@href")[0]
                 bu.co_id = co_id
                 bu.bu_id = re.search('Param=(.*)', ho_url).group(1)
                 bu.bu_num = build_info.xpath("./td/a/text()")[0]
                 bu.bu_all_house = build_info.xpath("./td[2]/text()")[0]
                 try:
                     bu.bu_all_size = build_info.xpath("./td[3]/text()")[0]
                 except:
                     bu.bu_all_size = None
                 try:
                     bu.bu_live_size = build_info.xpath("./td[5]/text()")[0]
                 except:
                     bu.bu_live_size = None
                 bu.insert_db()
         except Exception as e:
             # log.error('楼栋信息错误{}'.format(e))
             print('楼栋信息错误{}'.format(e))
             continue
         self.house_info(ho_url, co_id, bu.bu_id)
Esempio n. 2
0
    def get_build_info(self, build_lis, co_id):
        for build_ in build_lis:
            build_url = "http://xx.yyfdcw.com" + build_
            try:
                build_res = requests.get(build_url, headers=self.headers)
            except Exception as e:
                print("co_index={},楼栋信息错误".format(co_index), e)
                continue
            con = build_res.text
            bu = Building(co_index)
            bu.co_id = co_id
            bu.bu_id = re.search('Bid=(\d+)', build_).group(1)
            bu.bu_num = re.search('名称.*?">(.*?)</spa', con).group(1)
            bu.bu_pre_sale = re.search("编.*?red'>(.*?)</a", con).group(1)
            bu.bu_pre_sale_date = re.search('颁发日期.*?Date">(.*?)</span',
                                            con).group(1)
            bu.bo_build_start_time = re.search('开工日期.*?">(.*?)</span',
                                               con).group(1)
            bu.bo_build_end_time = re.search('竣工日期.*?">(.*?)</span',
                                             con).group(1)
            bu.bo_develops = re.search('单位.*?">(.*?)</span', con).group(1)
            bu.bu_floor = re.search('层数.*?">(.*?)</span', con).group(1)
            bu.bu_live_size = re.search('住宅面积.*?">(.*?)</span', con).group(1)
            bu.size = re.search('总面积.*?">(.*?)</span', con).group(1)

            bu.insert_db()

            id = re.search('测量号.*?">(.*?)</span', con).group(1)
            self.get_house_info(co_id, bu.bu_id, id)
Esempio n. 3
0
    def get_build_info(self, url, co_id):
        try:
            building = Building(co_index)
            response = requests.get(url)
            html = response.text
            tree = etree.HTML(html)
            co_name = tree.xpath('//*[@id="PageB_Location"]/text()')[0]  # 小区名字
            print(co_name)
            bu_name = tree.xpath('//*[@id="ItemName"]/text()')[0]  # 楼栋名称
            bu_num = tree.xpath('//*[@id="PageB_HouseNo"]/text()')[0]  # 楼号 栋号
            bu_all_house = tree.xpath('//*[@id="lb_countbulidtaoshu"]/text()')[
                0]  # 总套数
            bu_floor = tree.xpath('//*[@id="cell3-1"]/text()')
            bu_floor = self.is_none(bu_floor)  # 楼层
            bu_build_size = tree.xpath('//*[@id="lb_countbulidarea"]/text()')[
                0]  # 建筑面积
            bu_live_size = tree.xpath('//*[@id="lb_buildarea"]/text()')[
                0]  # 住宅面积
            bu_price = tree.xpath('//*[@id="lb_buildavg"]/text()')
            bu_price = self.is_none(bu_price)  # 住宅价格
            bu_id = re.search('\?(\d+)$', url).group(1)  # 楼栋id
            building.co_id = co_id
            building.bu_name = bu_name
            building.bu_num = bu_num
            building.bu_all_house = bu_all_house
            building.bu_floor = bu_floor
            building.bu_build_size = bu_build_size
            building.bu_live_size = bu_live_size
            building.bu_price = bu_price
            building.bu_id = bu_id
            building.insert_db()
            house_info_html = re.findall('<tr id="row3">(.*)$', html,
                                         re.S | re.M)[0]
            for i in re.findall('(<td.*?>.*?</td>)', house_info_html,
                                re.S | re.M):
                if '<br>' not in i:
                    continue
                ho_name_list = re.findall('<td.*?>(.*?)<br>', i, re.S | re.M)
                ho_true_size_list = re.findall('<td.*?>.*?<br>(.*?)<br>', i,
                                               re.S | re.M)
                ho_type = re.findall('<td.*?>.*?<br>.*?<br>(.*?)<br>', i,
                                     re.S | re.M)[0]
                for i in range(len(ho_name_list)):
                    try:
                        if 'font' in ho_name_list[i]:
                            ho_name = re.sub('<font.*?>', '', ho_name_list[i])
                        else:
                            ho_name = ho_name_list[i]
                        house = House(8)
                        house.ho_name = ho_name
                        house.ho_true_size = ho_true_size_list[i]
                        house.co_id = co_id
                        house.bu_id = bu_id
                        house.ho_type = ho_type
                        house.insert_db()

                    except Exception as e:
                        print(e)
        except BaseException as e:
            print(e)
 def get_build_info(self, build_url_list, co_name):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build.co_name = co_name
             build_url = 'http://www.sxczfdc.com/pubinfo/' + i
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             # build_detail_url = re.findall('(Pub_dtxx.aspx\?ProjectBuildingID=.*?)"', html, re.S | re.M)[0]
             for k in re.findall('(Pub_dtxx.aspx\?ProjectBuildingID=.*?)"',
                                 html, re.S | re.M):
                 try:
                     build_url_detail = 'http://www.sxczfdc.com/pubinfo/' + k
                     result = requests.get(build_url_detail,
                                           headers=self.headers)
                     content = result.text
                     build.bu_num = re.findall(
                         'BuildingInfo1_lblBuildingName">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_all_house = re.findall(
                         'BuildingInfo1_lblZts">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_floor = re.findall(
                         'BuildingInfo1_lblZcs">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_build_size = re.findall(
                         'BuildingInfo1_lblJzmj">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_live_size = re.findall(
                         'BuildingInfo1_lblZzmj">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_pre_sale = re.findall(
                         'BuildingInfo1_lblYsxkzh">(.*?)<', content,
                         re.S | re.M)[0]
                     build.bu_pre_sale_date = re.findall(
                         'BuildingInfo1_lblYsxkzfzrq">(.*?)<', content,
                         re.S | re.M)[0]
                     build.insert_db()
                     house_url_list = re.findall(
                         "onClick=.getMoreHouseInfo\('(.*?)'\)", content,
                         re.S | re.M)
                     self.get_house_info(house_url_list, co_name,
                                         build.bu_num)
                 except Exception as e:
                     print(e)
         except Exception as e:
             print(e)
Esempio n. 5
0
 def get_build_info(self, presell_url_list, co_id):
     for presell_url in presell_url_list:
         pre_url = self.url + presell_url
         res = requests.get(pre_url, headers=self.headers)
         build_url_list = re.findall('【<a href="(.*?)" target="_self"',
                                     res.text, re.S | re.M)
         for build_url in build_url_list:
             build_info_url = self.url + build_url
             try:
                 build_res = requests.get(build_info_url,
                                          headers=self.headers)
                 con = build_res.text
                 bu = Building(co_index)
                 bu.co_id = co_id
                 bu.bu_id = re.search('ID=(\d+)', build_url).group(1)
                 bu.bu_num = re.search('栋.*?号.*?BuildingName">(.*?)</span',
                                       con, re.S | re.M).group(1)
                 bu.bu_floor = re.search('总 层 数.*?(\d+)</span', con,
                                         re.S | re.M).group(1)
                 bu.bu_build_size = re.search('建筑面积.*?Jzmj">(.*?)</span',
                                              con, re.S | re.M).group(1)
                 bu.bu_live_size = re.search('住宅面积.*?Zzmj">(.*?)</span',
                                             con, re.S | re.M).group(1)
                 bu.bu_not_live_size = re.search(
                     '非住宅面积.*?Fzzmj">(.*?)</span', con,
                     re.S | re.M).group(1)
                 bu.bu_pre_sale = re.search('预售许可证.*?xkzh">(.*?)</span',
                                            con, re.S | re.M).group(1)
                 bu.bu_pre_sale_date = re.search('发证日期.*?fzrq">(.*?)</span',
                                                 con, re.S | re.M).group(1)
                 bu.bu_type = re.search('项目类型.*?Type">(.*?)</span', con,
                                        re.S | re.M).group(1)
                 bu.insert_db()
             except Exception as e:
                 print("co_index={},楼栋信息错误".format(co_index), e)
                 continue
             house_detail_list = re.findall("getMoreHouseInfo\('(.*?)'\)\"",
                                            con, re.S | re.M)
             self.get_house_info(co_id, bu.bu_id, house_detail_list)
Esempio n. 6
0
    def bu_parse(self, co_id, bulist):
        for bo in bulist:
            bu_url = "http://110.89.45.7:8082" + bo
            bu_res = requests.get(bu_url, headers=self.headers)
            con = bu_res.text
            bu = Building(co_index)
            bu.co_id = co_id
            bu.bu_id = re.search('buildingInfoID=(.*?)&', bo).group(1)
            bu.bu_num = re.search('幢号.*?">(.*?)</', con, re.S | re.M).group(1)
            bu.bu_floor = re.search('总 层 数.*?">(.*?)</', con,
                                    re.S | re.M).group(1)
            bu.bu_live_size = re.search('批准销售.*?">.*?</td.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
            bu.bu_all_size = re.search('总面积.*?">(.*?)</', con,
                                       re.S | re.M).group(1)
            bu.bu_type = re.search('设计用途.*?">(.*?)</', con,
                                   re.S | re.M).group(1)
            bu.insert_db()

            bu_html = etree.HTML(con)
            ho_list = bu_html.xpath("//td[@style]/a")
            self.ho_parse(co_id, bu.bu_id, ho_list)
Esempio n. 7
0
    def get_build_info(self, bu_pre_sale, bo_develops, bu_co_name, bu_con):

        build = Building(co_index)

        build.bu_id = re.search('编号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_num = re.search('幢号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_floor = re.search('总层数.*?>(\d+)<', bu_con,
                                   re.S | re.M).group(1)
        build.bu_build_size = re.search('预售建筑面积.*?>(\d+.\d+)<', bu_con,
                                        re.S | re.M).group(1)
        build.bu_address = re.search('楼房坐落.*?;">(.*?)</span', bu_con,
                                     re.S | re.M).group(1)
        build.bu_live_size = re.search('住宅建筑面积.*?>(\d+.\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_not_live_size = re.search('非住宅建筑面积.*?;">(.*?)</span', bu_con,
                                           re.S | re.M).group(1)
        build.bo_build_start_time = re.search('开工日期.*?;">(.*?)</span', bu_con,
                                              re.S | re.M).group(1)
        build.bu_all_house = re.search('总套数.*?>(\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_pre_sale = bu_pre_sale
        build.bo_develops = bo_develops
        build.co_name = bu_co_name
        build.insert_db()