Exemple #1
0
    def comm_detail(self, comm_url_list, city):
        for comm_url in comm_url_list[1:]:
            try:
                com_url = city.replace('/xiaoqu/', comm_url)
                statecode = re.search('xq-(.*)', comm_url).group(1)
                code = statecode.upper()
                comm_detail_url = 'http://sh.centanet.com/apipost/GetDealRecord?estateCode=' + code + '&posttype=S&pageindex=1&pagesize=10000'
                com_res = requests.get(com_url, headers=self.headers)
                res = requests.get(comm_detail_url, headers=self.headers)
                time.sleep(2)
                html = etree.HTML(com_res.text)
                data_dict = json.loads(res.text)
                district_name = html.xpath("//div/h3/text()")[0]
                city_name = html.xpath("//div[@class='idx-city']/text()")[0]
                region = html.xpath("//a[@class='f000']/text()")[0]

                for data in data_dict["result"]:
                    try:
                        co = Comm(source)
                        co.district_name = district_name.strip()
                        co.region = region
                        co.city = city_name
                        try:
                            room_type = data["houseType"]
                            co.room = int(
                                re.search('(\d)室', room_type,
                                          re.S | re.M).group(1))
                            co.hall = int(
                                re.search('(\d)厅', room_type,
                                          re.S | re.M).group(1))
                        except Exception as e:
                            log.error('roomtype为空'.format(e))
                        area = data['areaSize'].replace('平', '')
                        if area:
                            area = float(area)
                            co.area = round(area, 2)
                        co.direction = data['direction']
                        trade_date = '20' + data['dealTime']
                        if trade_date:
                            t = time.strptime(trade_date, "%Y-%m-%d")
                            y = t.tm_year
                            m = t.tm_mon
                            d = t.tm_mday
                            co.trade_date = datetime.datetime(y, m, d)

                        total_price = data['dealPrice']
                        co.total_price = int(
                            re.search('(\d+)', total_price,
                                      re.S | re.M).group(1)) * 10000

                        avg_price = data['unitPrice']
                        try:
                            co.avg_price = int(
                                re.search('(\d+)', avg_price,
                                          re.S | re.M).group(1))
                        except Exception as e:
                            co.avg_price = None
                        co.insert_db()
                    except Exception as e:
                        log.error('解析失败{}'.format(e))
            except Exception as e:
                log.error("小区成交信息错误{}".format(e))
Exemple #2
0
 def get_city_info(self, city_dict):
     for city in city_dict:
         city_url = city_dict[city] + 'chengjiao/'
         try:
             response = requests.get(city_url, headers=self.headers)
             html = response.text
             area_html = re.search('data-role="ershoufang".*?地铁', html,
                                   re.S | re.M).group()
             area_list_str = re.findall('<a.*?</a>', area_html, re.S | re.M)
             for area_i in area_list_str:
                 if 'ershoufang' in area_i:
                     continue
                 area_url = re.search('href="(.*?)"', area_i,
                                      re.S | re.M).group(1)
                 area = re.search('<a.*?>(.*?)<', area_i,
                                  re.S | re.M).group(1)
                 for i in range(1, 101):
                     city_url_ = city_url.replace(
                         '/chengjiao/', '') + area_url + 'pg' + str(i)
                     try:
                         result = requests.get(city_url_,
                                               headers=self.headers)
                         content = result.text
                         comm_str_list = re.findall(
                             'class="info".*?</div></div></li>', content,
                             re.S | re.M)
                         for i in comm_str_list:
                             comm = Comm('链家在线')
                             comm.region = area.strip()
                             comm.city = city.strip()
                             comm.district_name = re.search(
                                 'target="_blank">(.*?)<', i,
                                 re.S | re.M).group(1).strip()
                             comm.direction = re.search(
                                 'class="houseIcon"></span>(.*?) \|', i,
                                 re.S | re.M).group(1).strip()
                             try:
                                 comm.fitment = re.search(
                                     'class="houseIcon"></span>.*? \|(.*?)\| ',
                                     i, re.S | re.M).group(1).strip()
                             except Exception as e:
                                 comm.fitment = None
                             try:
                                 height = re.search(
                                     'class="positionIcon"></span>.*?\((.*?)\)',
                                     i, re.S | re.M).group(1).strip()
                                 comm.height = int(
                                     re.search('(\d+)', height,
                                               re.S | re.M).group(1))
                             except Exception as e:
                                 comm.height = None
                             total_price = re.search(
                                 "class='number'>(.*?)<", i,
                                 re.S | re.M).group(1).strip()
                             if "*" in total_price:
                                 continue
                             comm.total_price = int(
                                 re.search('(\d+)', total_price,
                                           re.S | re.M).group(1)) * 10000
                             room_type = re.search(
                                 'arget="_blank">.*? (.*?) ', i,
                                 re.S | re.M).group(1).strip()
                             try:
                                 comm.room = int(
                                     re.search('(\d)室', room_type,
                                               re.S | re.M).group(1))
                             except Exception as e:
                                 comm.room = 0
                             try:
                                 comm.hall = int(
                                     re.search('(\d)厅', room_type,
                                               re.S | re.M).group(1))
                             except Exception as e:
                                 comm.hall = None
                             area_ = re.search(
                                 'target="_blank">.*? .*? (.*?平米)', i,
                                 re.S | re.M).group(1).strip()
                             if area_:
                                 area_ = area_.replace('㎡', '').replace(
                                     '平米', '')
                                 try:
                                     area_ = float(area_)
                                     comm.area = round(area_, 2)
                                 except Exception as e:
                                     comm.area = None
                             trade_date = re.search(
                                 'dealDate">(.*?)<', i,
                                 re.S | re.M).group(1).strip()
                             if trade_date:
                                 t = time.strptime(trade_date, "%Y.%m.%d")
                                 y = t.tm_year
                                 m = t.tm_mon
                                 d = t.tm_mday
                                 comm.trade_date = datetime.datetime(
                                     y, m, d)
                             try:
                                 comm.avg_price = int(i['total_price'] /
                                                      i['area'])
                             except Exception as e:
                                 comm.avg_price = None
                             comm.insert_db()
                     except Exception as e:
                         log.error(
                             '解析错误,source="{}",html="{}",e="{}"'.format(
                                 '链家在线', html, e))
         except Exception as e:
             log.error('请求错误,source="{}",url="{}",e="{}"'.format(
                 '链家在线', city_url, e))