def comm_detail(self, comm_url_list, city): for comm_url in comm_url_list[1:]: try: com_url = city.replace('/xiaoqu/', comm_url) statecode = re.search('xq-(.*)', comm_url).group(1) code = statecode.upper() comm_detail_url = 'http://sh.centanet.com/apipost/GetDealRecord?estateCode=' + code + '&posttype=S&pageindex=1&pagesize=10000' com_res = requests.get(com_url, headers=self.headers) res = requests.get(comm_detail_url, headers=self.headers) time.sleep(2) html = etree.HTML(com_res.text) data_dict = json.loads(res.text) district_name = html.xpath("//div/h3/text()")[0] city_name = html.xpath("//div[@class='idx-city']/text()")[0] region = html.xpath("//a[@class='f000']/text()")[0] for data in data_dict["result"]: try: co = Comm(source) co.district_name = district_name.strip() co.region = region co.city = city_name try: room_type = data["houseType"] co.room = int( re.search('(\d)室', room_type, re.S | re.M).group(1)) co.hall = int( re.search('(\d)厅', room_type, re.S | re.M).group(1)) except Exception as e: log.error('roomtype为空'.format(e)) area = data['areaSize'].replace('平', '') if area: area = float(area) co.area = round(area, 2) co.direction = data['direction'] trade_date = '20' + data['dealTime'] if trade_date: t = time.strptime(trade_date, "%Y-%m-%d") y = t.tm_year m = t.tm_mon d = t.tm_mday co.trade_date = datetime.datetime(y, m, d) total_price = data['dealPrice'] co.total_price = int( re.search('(\d+)', total_price, re.S | re.M).group(1)) * 10000 avg_price = data['unitPrice'] try: co.avg_price = int( re.search('(\d+)', avg_price, re.S | re.M).group(1)) except Exception as e: co.avg_price = None co.insert_db() except Exception as e: log.error('解析失败{}'.format(e)) except Exception as e: log.error("小区成交信息错误{}".format(e))
def get_city_info(self, city_dict): for city in city_dict: city_url = city_dict[city] + 'chengjiao/' try: response = requests.get(city_url, headers=self.headers) html = response.text area_html = re.search('data-role="ershoufang".*?地铁', html, re.S | re.M).group() area_list_str = re.findall('<a.*?</a>', area_html, re.S | re.M) for area_i in area_list_str: if 'ershoufang' in area_i: continue area_url = re.search('href="(.*?)"', area_i, re.S | re.M).group(1) area = re.search('<a.*?>(.*?)<', area_i, re.S | re.M).group(1) for i in range(1, 101): city_url_ = city_url.replace( '/chengjiao/', '') + area_url + 'pg' + str(i) try: result = requests.get(city_url_, headers=self.headers) content = result.text comm_str_list = re.findall( 'class="info".*?</div></div></li>', content, re.S | re.M) for i in comm_str_list: comm = Comm('链家在线') comm.region = area.strip() comm.city = city.strip() comm.district_name = re.search( 'target="_blank">(.*?)<', i, re.S | re.M).group(1).strip() comm.direction = re.search( 'class="houseIcon"></span>(.*?) \|', i, re.S | re.M).group(1).strip() try: comm.fitment = re.search( 'class="houseIcon"></span>.*? \|(.*?)\| ', i, re.S | re.M).group(1).strip() except Exception as e: comm.fitment = None try: height = re.search( 'class="positionIcon"></span>.*?\((.*?)\)', i, re.S | re.M).group(1).strip() comm.height = int( re.search('(\d+)', height, re.S | re.M).group(1)) except Exception as e: comm.height = None total_price = re.search( "class='number'>(.*?)<", i, re.S | re.M).group(1).strip() if "*" in total_price: continue comm.total_price = int( re.search('(\d+)', total_price, re.S | re.M).group(1)) * 10000 room_type = re.search( 'arget="_blank">.*? (.*?) ', i, re.S | re.M).group(1).strip() try: comm.room = int( re.search('(\d)室', room_type, re.S | re.M).group(1)) except Exception as e: comm.room = 0 try: comm.hall = int( re.search('(\d)厅', room_type, re.S | re.M).group(1)) except Exception as e: comm.hall = None area_ = re.search( 'target="_blank">.*? .*? (.*?平米)', i, re.S | re.M).group(1).strip() if area_: area_ = area_.replace('㎡', '').replace( '平米', '') try: area_ = float(area_) comm.area = round(area_, 2) except Exception as e: comm.area = None trade_date = re.search( 'dealDate">(.*?)<', i, re.S | re.M).group(1).strip() if trade_date: t = time.strptime(trade_date, "%Y.%m.%d") y = t.tm_year m = t.tm_mon d = t.tm_mday comm.trade_date = datetime.datetime( y, m, d) try: comm.avg_price = int(i['total_price'] / i['area']) except Exception as e: comm.avg_price = None comm.insert_db() except Exception as e: log.error( '解析错误,source="{}",html="{}",e="{}"'.format( '链家在线', html, e)) except Exception as e: log.error('请求错误,source="{}",url="{}",e="{}"'.format( '链家在线', city_url, e))