Example #1
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_url = 'http://www.ndjsj.gov.cn/House/' + i
             build.co_name = '项目名称:.*?<td.*?>(.*?)<'
             build.bu_num = '幢  号:.*?<td.*?>(.*?)<'
             build.bu_address = '坐落位置:.*?<td.*?>(.*?)<'
             build.co_build_structural = '建筑结构:.*?<td.*?>(.*?)<'
             build.bu_floor = '总 层 数:.*?<td.*?>(.*?)<'
             build.bu_build_size = '总 面 积:.*?<td.*?>(.*?)<'
             # build.bu_type = '设计用途:.*?<td.*?>(.*?)<'
             build.bu_all_house = '批准销售:.*?<td.*?>(.*?)<'
             p = ProducerListUrl(
                 page_url=build_url,
                 request_type='get',
                 encode='utf-8',
                 analyzer_rules_dict=build.to_dict(),
                 current_url_rule='javascript:ShowTitle.*?href="(.*?)"',
                 analyzer_type='regex',
                 headers=self.headers)
             house_url_list = p.get_details()
             self.get_house_info(house_url_list)
         except Exception as e:
             print('宁德楼栋错误,url={}'.format(build_url), e)
Example #2
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_code = re.search('xqbm=(.*?)$', i).group(1)
             build_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/donginfo.aspx?xqbm=' + build_code
             build.bu_num = 'Labeldongmc">(.*?)<'
             build.bu_pre_sale = 'Labelyszheng">(.*?)<'
             build.bu_floor = 'Labelsceng">(.*?)<'
             build.bu_address = 'Label1zuoluo">(.*?)<'
             build.bo_build_start_time = 'Label1kaigong">(.*?)<'
             build.co_build_structural = 'Labeljiegou">(.*?)<'
             build.co_id = 'donginfo.aspx\?xqbm=(.*?)"'
             build.bu_id = 'id="DropDownList1".*?value="(.*?)"'
             p = ProducerListUrl(page_url=build_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=build.to_dict(),
                                 current_url_rule='location\.href=(.*?)"',
                                 analyzer_type='regex',
                                 headers=self.headers)
             house_url_list = p.get_details()
             self.get_house_info(house_url_list)
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
Example #3
0
 def get_build_info(self, all_build_url_list):
     b = Building(co_index)
     b.co_id = "onclick=GetData\('(.*?)',"
     b.bu_id = "onclick=GetData\('.*?','(.*?)'"
     b.bu_num = "font12yellow-leftA'>.*?</span>套</td><td>.*?</td><td>(.*?)<"
     b.bu_all_house = "font12yellow-leftA'>(.*?)<"
     data_list = b.to_dict()
     p = ProducerListUrl(
         page_url=all_build_url_list,
         request_type='get',
         encode='utf-8',
         analyzer_rules_dict=data_list,
         current_url_rule="onclick=GetData\('(.*?)','(.*?)'\)",
         analyzer_type='regex',
         headers=self.headers)
     house_url_list = p.get_details()
     return house_url_list
Example #4
0
 def get_build_info(self, more_build_url):
     for i in more_build_url:
         try:
             build = Building(co_index)
             build_url = 'http://www.jmfc.com.cn/' + i
             build.bu_num = '<tr bgcolor="#FFFFFF">.*?<td.*?>(.*?)<'
             build.co_id = '楼盘首页.*?aid-(.*?)/'
             build.bu_id = '&addno=12&action=loupantable&lzbm=(.*?)&ql_xh='
             build.bu_pre_sale = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>(.*?)<'
             build.bu_floor = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>(.*?)<'
             build.bu_all_house = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>(.*?)<'
             p = ProducerListUrl(
                 page_url=build_url,
                 request_type='get',
                 encode='gbk',
                 analyzer_rules_dict=build.to_dict(),
                 current_url_rule=
                 '<tr bgcolor="#FFFFFF">.*?align="left".*?href="(.*?)"',
                 analyzer_type='regex',
                 headers=self.headers)
             house_url_list = p.get_details()
             self.get_house_info(house_url_list)
         except Exception as e:
             print(e)