Exemple #1
0
 def parse_detail(self, response):
     data = response.meta.copy()
     items = XiezilouItem()
     housing_info = response.xpath("//table")[0].xpath("./tr")
     for td in housing_info:
         td00 = td.xpath("./td")[0].xpath("./text()").extract_first()
         if td00 == "楼层":
             td01 = td.xpath("./td")[1].xpath("./text()").extract_first()
             if td01:
                 items["housing_floor"] = td01.strip()
     peitao_info = response.xpath("//table[@id='li-12']/tr")
     for entry in peitao_info:
         label_name_01 = entry.xpath("./td")[0].xpath("./text()").extract_first()
         label_value_01 = entry.xpath("./td")[1].xpath("./text()").extract_first()
         label_name_02 = entry.xpath("./td")[2].xpath("./text()").extract_first()
         label_value_02 = entry.xpath("./td")[3].xpath("./text()").extract_first()
         if label_value_01:
             label_value_01 = label_value_01.strip()
         if label_value_02:
             label_value_02 = label_value_02.strip()
         if label_name_01 == "空调:":
             items["air_condition"] = label_value_01
         if label_name_01 == "装修情况:":
             items["housing_decor"] = label_value_01
         if label_name_01 == "公交站点:":
             items["gj_site"] = label_value_01
         if label_name_01 == "物业公司:":
             items["property_company"] = label_value_01
         if label_name_01 == "地铁线:":
             items["subway_line"] = label_value_01
         if label_name_01 == "电梯:":
             items["elevator"] = label_value_01
         if label_name_01 == "标准层高:":
             items["layer_height"] = label_value_01
         if label_name_02 == "车位:":
             items["parking_place"] = label_value_02
         if label_name_02 == "交付时间:":
             items["built_in"] = label_value_02
         if label_name_02 == "公交路线:":
             items["gj_line"] = label_value_02
         if label_name_02 == "物业费用:":
             items["property_fee"] = label_value_02
         if label_name_02 == "地铁站点:":
             items["subway_site"] = label_value_02
         if label_name_02 == "土地性质:":
             items["land_property"] = label_value_02
     items["housing_name"] = data["housing_name"]
     items["housing_price1"] = data["housing_price1"]
     items["housing_area"] = data["housing_area"]
     items["flag"] = data["flag"]
     items["property_type"] = data["property_type"]
     items["publish_time"] = data["publish_time"]
     items["xzl_type"] = data["xzl_type"]
     items["business_circle"] = data["business_circle"]
     items["building_address"] = data["housing_address"]
     items["housing_url"] = data["housing_url"]
     items["city"] = data["city"]
     items["district"] = data["district"]
     if not self.redis.sismember(A5cbdSpider.name + "xzl_set", data["housing_url"]):
         yield items
Exemple #2
0
 def parse_detail(self, response):
     data = response.meta.copy()
     items = XiezilouItem()
     items["building_address"] = response.xpath(
         "//div[@class='base_info']/dl[@class='borderb mb10']/dt")[2].xpath(
             "./text()").extract_first().strip()
     items["flag"] = response.xpath(
         "//div[@class='base_info']/dl[@class='borderb mb10']/dd")[0].xpath(
             "./text()").extract()[1]
     items["xzl_type"] = response.xpath(
         "//div[@class='base_info']/dl[@class='borderb mb10']/dd")[1].xpath(
             "./span/text()").extract()[1].strip("型:")
     items["corp_reged"] = response.xpath(
         "//div[@class='base_info']/dl[@class='borderb mb10']/dd")[2].xpath(
             "./span/text()").extract_first().split(":")[1].strip()
     info = response.xpath("//dl[@class='info_c']/dd")[0].extract()
     if not re.search("个人", info):
         items["agent"] = response.xpath(
             "//span[@id='agentname']/text()").extract_first()
         items["agent_company"] = response.xpath(
             "//dl[@class='info_c']/dd[@class='black']/a/b/text()"
         ).extract_first()
         items["agent_phone"] = response.xpath(
             "//dl[@class='info_c']/dd[@class='gray6']/span/text()"
         ).extract_first()
     else:
         raw_agent_phone = response.xpath(
             "//dl[@class='info_c']/dd[@class='gray6']/span/b/text()"
         ).extract_first()
         items["agent_phone"] = re.search(".+(\d{11})", raw_agent_phone,
                                          re.M).group(1)
     items["city"] = data["city"]
     items["district"] = data["district"]
     items["housing_url"] = data["housing_url"]
     items["housing_name"] = data["housing_name"]
     items["housing_area"] = data["housing_area"]
     items["publish_time"] = data["publish_time"]
     items["housing_price1"] = data["housing_price1"]
     if not self.redis.sismember(Officese01Spider.name + "_xzl_set",
                                 data["housing_url"]):
         yield items
 def parse_detail_info_205(self, response):
     data = response.meta.copy()
     res = json.loads(response.text)["data"]["data"]["result"]
     zs_key = data["zs_key"]
     items = XiezilouItem()
     data["bangong"] = res["shopInfoMap"]["办公"]
     data["zb_suite"] = res["shopInfoMap"]["配套"]
     data["traffic"] = res["shopInfoMap"]["交通"]
     data["place"] = res["shopInfoMap"]["地段"]
     if "housing_price2" in data:
         items["province"] = data["province"]
     if "city" in data:
         items["city"] = data["city"]
     if "district" in data:
         items["district"] = data["district"]
     if "street" in data:
         items["street"] = data["street"]
     if "xzl_type" in data:
         items["xzl_type"] = data["xzl_type"]
     if "flag" in data:
         items["flag"] = data["flag"]
     if "housing_url" in data:
         items["housing_url"] = data["housing_url"]
     if "publish_time" in data:
         items["publish_time"] = data["publish_time"]
     if "housing_name" in data:
         items["housing_name"] = data["housing_name"]
     if "housing_price1" in data:
         items["housing_price1"] = data["housing_price1"]
     if "housing_price2" in data:
         items["housing_price2"] = data["housing_price2"]
     if "pay_method" in data:
         items["pay_method"] = data["pay_method"]
     if "business_circle" in data:
         items["business_circle"] = data["business_circle"]
     if "loupan" in data:
         items["loupan"] = data["loupan"]
     if "housing_floor" in data:
         items["housing_floor"] = data["housing_floor"]
     if "building_address" in data:
         items["building_address"] = data["building_address"]
     if "agent" in data:
         items["agent"] = data["agent"]
     if "agent_phone" in data:
         items["agent_phone"] = data["agent_phone"]
     if "agent_company" in data:
         items["agent_company"] = data["agent_company"]
     if "property_level" in data:
         items["property_level"] = data["property_level"]
     if "housing_decor" in data:
         items["housing_decor"] = data["housing_decor"]
     if "property_fee" in data:
         items["property_fee"] = data["property_fee"]
     if "bangong" in data:
         items["bangong"] = data["bangong"]
     if "zb_suite" in data:
         items["zb_suite"] = data["zb_suite"]
     if "traffic" in data:
         items["traffic"] = data["traffic"]
     if "place" in data:
         items["place"] = data["place"]
     if "housing_detail_url" in data:
         items["housing_detail_url"] = data["housing_detail_url"]
     if "phone" in data:
         items["phone"] = data["phone"]
     if "housing_area" in data:
         items["housing_area"] = data["housing_area"]
     if self.redis.zscore(zs_key, data['housing_url']) == 1:
         print(items)
 def parse_detail_info_281(self, response):
     data = response.meta.copy()
     res = json.loads(response.text)["data"]["data"]["result"]
     # data["housing_price1"] = res["price"]
     data["pay_method"] = res["paydetail"]
     data["business_circle"] = res["comarea"]
     data["loupan"] = res["projname"]
     data["housing_area"] = res["allacreage"] + "㎡"
     data["housing_floor"] = res["floor"]
     data["building_address"] = res["address"]
     data["property_level"] = res["propertygrade"]
     data["housing_decor"] = res["fitment"]
     data["property_fee"] = res["wuyefei"] + "元/平米·月"
     zs_key = data["zs_key"]
     items = XiezilouItem()
     if "housing_price2" in data:
         items["province"] = data["province"]
     if "city" in data:
         items["city"] = data["city"]
     if "district" in data:
         items["district"] = data["district"]
     if "street" in data:
         items["street"] = data["street"]
     if "xzl_type" in data:
         items["xzl_type"] = data["xzl_type"]
     if "flag" in data:
         items["flag"] = data["flag"]
     if "housing_url" in data:
         items["housing_url"] = data["housing_url"]
     if "publish_time" in data:
         items["publish_time"] = data["publish_time"]
     if "housing_name" in data:
         items["housing_name"] = data["housing_name"]
     if "housing_price1" in data:
         items["housing_price1"] = data["housing_price1"]
     if "housing_price2" in data:
         items["housing_price2"] = data["housing_price2"]
     if "pay_method" in data:
         items["pay_method"] = data["pay_method"]
     if "business_circle" in data:
         items["business_circle"] = data["business_circle"]
     if "loupan" in data:
         items["loupan"] = data["loupan"]
     if "housing_floor" in data:
         items["housing_floor"] = data["housing_floor"]
     if "building_address" in data:
         items["building_address"] = data["building_address"]
     if "agent" in data:
         items["agent"] = data["agent"]
     if "agent_phone" in data:
         items["agent_phone"] = data["agent_phone"]
     if "agent_company" in data:
         items["agent_company"] = data["agent_company"]
     if "property_level" in data:
         items["property_level"] = data["property_level"]
     if "housing_decor" in data:
         items["housing_decor"] = data["housing_decor"]
     if "property_fee" in data:
         items["property_fee"] = data["property_fee"]
     if "bangong" in data:
         items["bangong"] = data["bangong"]
     if "zb_suite" in data:
         items["zb_suite"] = data["zb_suite"]
     if "traffic" in data:
         items["traffic"] = data["traffic"]
     if "place" in data:
         items["place"] = data["place"]
     if "housing_detail_url" in data:
         items["housing_detail_url"] = data["housing_detail_url"]
     if "phone" in data:
         items["phone"] = data["phone"]
     if "housing_area" in data:
         items["housing_area"] = data["housing_area"]
     if self.redis.zscore(zs_key, data['housing_url']) == 2:
         yield items
Exemple #5
0
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     items["housing_name"] = response.xpath(
         "//h1[@class='tit-name']/span/text()").extract_first()
     housing_info = response.xpath("//div[@id='fy_info']/ul")
     for branch in housing_info:
         for item in branch.xpath("./li"):
             label_name = item.xpath("./span")[0].xpath(
                 "./text()").extract_first().strip()
             label_value = item.xpath("./span")[2].xpath(
                 "./text()").extract_first().strip()
             if label_name == "类型":
                 items["housing_type"] = label_value
             if label_name == "日租金":
                 items["housing_price1"] = label_value
             if label_name == "月租金":
                 items["housing_price2"] = label_value
             if label_name == "押付":
                 items["pay_method"] = label_value
             if label_name == "楼盘":
                 label_value = item.xpath(
                     "./span[@class='desc']/a/text()").extract_first()
                 if label_value:
                     items["loupan"] = label_value.strip()
                 else:
                     label_value = item.xpath(
                         "./span[@class='desc']/text()").extract_first()
                     if label_value:
                         items["loupan"] = label_value.strip()
             if label_name == "地址":
                 items["building_address"] = label_value
             if label_name == "面积":
                 items["housing_area"] = label_value
             if label_name == "起租期":
                 items["rent_lease"] = label_value
             if label_name == "使用率":
                 items["housing_use_rate"] = label_value
             if label_name == "工位数":
                 items["housing_workplace"] = label_value
             if label_name == "物业费":
                 items["property_fee"] = label_value
             if label_name == "注册":
                 items["corp_reged"] = label_value
             if label_name == "楼层":
                 items["housing_floor"] = label_value
             if label_name == "装修":
                 items["housing_decor"] = label_value
             if label_name == "单价":
                 items["housing_price1"] = label_value
             if label_name == "售价":
                 items["housing_price2"] = label_value
     peitao = []
     items["central_air_condition"] = "无"
     suites = response.xpath(
         "//ul[@class='mod-peitao clearfix']/li[@class='']")
     for item in suites:
         ss = item.xpath("./p/text()").extract_first()
         if ss == "中央空调":
             items["central_air_condition"] = "有"
         if ss:
             peitao.append(ss)
     items["peitao"] = ",".join(peitao)
     trains = []
     planes = []
     train_info = response.xpath(
         "//dl[@class='train_box clearfix']/dd[@class='clearfix']/div")
     plane_info = response.xpath(
         "//dl[@class='plane_box clearfix']/dd[@class='clearfix']/div")
     for item in train_info:
         label_name = item.xpath("./span")[0].xpath(
             "./text()").extract_first()
         label_value = item.xpath("./span")[1].xpath(
             "./text()").extract_first()
         trains.append(label_name + " " + label_value)
     for item in plane_info:
         if item.xpath("./span"):
             label_name = item.xpath("./span")[0].xpath(
                 "./text()").extract_first()
             label_value = item.xpath("./span")[1].xpath(
                 "./text()").extract_first()
             planes.append(label_name + " " + label_value)
     items["traffic"] = ",".join(trains) + "; " + ",".join(planes)
     items["publish_time"] = response.xpath(
         "//div[@class='hd-sub']/text()")[1].extract().strip()
     items["agent"] = response.xpath(
         "//div[@class='bro-info clearfix']/h5[@class='name']/text()"
     ).extract_first().strip()
     items["agent_phone"] = response.xpath(
         "//div[@class='broker_tel']/text()").extract_first().strip()
     items["agent_company"] = response.xpath(
         "//p[@class='comp_info']/a/text()").extract_first().strip()
     items["city"] = data["city"]
     items["district"] = data["district"]
     items["street"] = data["street"]
     items["flag"] = data["flag"]
     items["housing_url"] = data["housing_url"]
     zs_key = data['zs_key']
     if self.redis.zscore(zs_key, data['housing_url']) == 2:
         yield items
Exemple #6
0
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     items['publish_time'] = response.xpath(
         "//li[@class='date']/text()").extract_first().lstrip("更新于")
     items['housing_name'] = response.xpath(
         "//p[@class='card-title']/i/text()").extract_first().strip()
     price_unit = response.xpath(
         "//div[@class='price-wrap']/text()")[1].extract()
     price1 = response.xpath("//span[@class='price strongbox']"
                             ) or response.xpath("//span[@class='price']")
     price2 = response.xpath("//span[@class='unit strongbox']"
                             ) or response.xpath("//span[@class='unit']")
     items['housing_price1'] = price1.xpath(
         "./text()").extract_first() + price_unit
     items['housing_price2'] = price2.xpath(
         "./text()").extract_first().strip(" |")
     housing_info = response.xpath("//li[@class='item f-fl']")
     for item in housing_info:
         label = "".join(
             item.xpath("./span[@class='t']/text()").extract()).strip(":")
         content = item.xpath(
             "./span[@class='content']/text()").extract_first().strip()
         if label == "面积":
             items['housing_area'] = content
         if label == "楼层":
             items['housing_floor'] = content
         if label == "装修":
             items["housing_decor"] = content
         if label == "租期":
             items["rent_lease"] = content
         if label == "区域":
             items["business_circle"] = content
     address = response.xpath(
         "//li[@class='er-item f-fl']/span[@class='t2']")
     if address:
         items['building_address'] = response.xpath(
             "//li[@class='er-item f-fl']/span[@class='content']/text()"
         ).extract_first().strip()
     items['agent'] = response.xpath(
         "//div[@class='name']/a[@class='name']/text()").extract_first()
     items['agent_phone'] = response.xpath(
         "//a[@class='phone_num js_person_phone']/text()").extract_first()
     if response.xpath("//div[@class='user_other']"):
         items['agent_company'] = response.xpath(
             "//div[@class='user_other']")[0].xpath(
                 "./span[@class='company']/text()").extract_first()
     suites = response.xpath(
         "//ul[@class='collocation f-clear']/li[@class='item']")
     peitao_raw = []
     if suites:
         for item in suites:
             label = item.xpath("./p[@class='text']/text()").extract_first()
             peitao_raw.append(label)
             if label == "中央空调":
                 items['central_air_condition'] = "有"
     items['peitao'] = ",".join(peitao_raw)
     items['city'] = data['city']
     items['district'] = data['district']
     items['street'] = data['street']
     items['xzl_type'] = data['xzl_type']
     items['housing_url'] = data['housing_url']
     items['flag'] = data['flag']
     zs_key = data['zs_key']
     if self.redis.zscore(zs_key, data['housing_url']) == 2:
         yield items
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     house_update = response.xpath(
         "//p[@class='house-update-info']/span[@class='up']")
     if house_update:
         items['publish_time'] = re.search(
             ".+(2\d+-\d+-\d+)",
             house_update[0].xpath("./text()").extract_first()).group(1)
     general_info1 = response.xpath("//ul[@class='general-item-left']/li")
     general_info2 = response.xpath("//ul[@class='general-item-right']/li")
     general_info = general_info1 + general_info2
     flag = data['flag']
     for item in general_info:
         label_name = item.xpath("./span[@class='mr_25 c_999']/text()"
                                 ).extract_first().strip(":")
         lable_value = item.xpath(
             "./span[@class='c_000']/text()").extract_first().strip()
         # if flag == "出租":
         #     if label_name == "写字楼租金":
         #         items['housing_price1'] = lable_value
         # if flag == "出售":
         #     if label_name == "写字楼售价":
         #         items['housing_price1'] = lable_value
         if label_name == "建筑面积":
             items['housing_area'] = lable_value
         if label_name == "可注册公司":
             items['corp_reged'] = lable_value
         if label_name == "起租期":
             items['rent_lease'] = lable_value
         if label_name == "物业费":
             items['property_fee'] = lable_value
         if label_name == "所在楼层":
             items['housing_floor'] = lable_value
         if label_name == "装修情况":
             items['housing_decor'] = lable_value
         if label_name == "使用率":
             items['housing_use_rate'] = lable_value
         if label_name == "付款方式":
             items['pay_method'] = lable_value
         if label_name == "参考容纳工位数":
             items['housing_workplace'] = lable_value
     suites = response.xpath("//li[@class='peitao-on']")
     peitao = []
     for item in suites:
         label = item.xpath("./text()").extract_first()
         peitao.append(label)
         if label == "中央空调":
             items['central_air_condition'] = "有"
     items['peitao'] = ",".join(peitao)
     if response.xpath("//span[@class=' house_basic_title_money_mianyi ']"):
         items['housing_price2'] = "面议"
         items['housing_price1'] = "面议"
     else:
         if response.xpath("//span[@class='house_basic_title_money_num']"):
             items['housing_price1'] = response.xpath(
                 "//span[@class='house_basic_title_money_num']/text()"
             ).extract_first() + response.xpath(
                 "//span[@class='house_basic_title_money_unit']/text()"
             ).extract_first()
         if response.xpath(
                 "//span[@class='house_basic_title_money_num_chushou']"):
             items['housing_price2'] = response.xpath(
                 "//span[@class='house_basic_title_money_num_chushou']/text()"
             ).extract_first()
         elif response.xpath(
                 "//span[@class=' house_basic_title_money_num_chuzu ']"):
             items['housing_price2'] = response.xpath(
                 "//span[@class=' house_basic_title_money_num_chuzu ']/text()"
             ).extract_first() + response.xpath(
                 "//span[@class='house_basic_title_money_unit_chuzu']/text()"
             ).extract_first()
     housing_info1 = response.xpath("//div[@class='house-basic-item2']/p")
     housing_info2 = response.xpath("//ul[@class='house-basic-item3']/li")
     for item in housing_info2:
         item_name = item.xpath(
             "./span[@class='c_999']/text()").extract_first().strip(":")
         if item_name == "楼盘":
             items['loupan'] = item.xpath(
                 "./span[@class='c_000 mr_10']/span[@class='c_000']/text()"
             ).extract_first().strip()
         if item_name == "详细地址":
             address = []
             if item.xpath("./span[@class='c_000 mr_10']"):
                 for sec in item.xpath("./span[@class='c_000 mr_10']/a"):
                     address.append(
                         sec.xpath("./text()").extract_first().strip())
                 address.append(
                     item.xpath("./span[@class='c_000 mr_10']/span/text()").
                     extract_first().strip())
                 if address:
                     items['building_address'] = " ".join(address)
         # if item_name == "可注册公司":
         #     items[''] = item.xpath("./span[@class='c_000 fou']").extract_first().strip()
     if data['xzl_type'] == "纯写字楼":
         items['property_level'] = response.xpath(
             "//div[@class='house-basic-item2']/p[@class='item3']/span[@class='sub']/text()"
         ).extract_first()
     items['agent'] = response.xpath(
         "//div[@class='jjr-name f14 c_555']/a[@class='c_000 jjr-name-txt']/text()"
     ).extract_first()
     items['agent_phone'] = response.xpath(
         "//p[@class='phone-num']/text()").extract_first()
     agent_company_raw = response.xpath("//p[@class='jr-item jjr-belong']")
     if agent_company_raw:
         items['agent_company'] = agent_company_raw.xpath(
             "./span[@class='c_000']/text()").extract_first()
     items['province'] = data['province']
     items['city'] = data['city']
     items['district'] = data['district']
     items['street'] = data['street']
     items['flag'] = data['flag']
     items['xzl_type'] = data['xzl_type']
     items['housing_url'] = data['housing_url']
     zs_key = data['zs_key']
     if self.redis.zscore(zs_key, data['housing_url']) == 2:
         yield items
Exemple #8
0
 def parse_detail_info(self, response):
     items = XiezilouItem()
     province = response.meta['province']
     city = response.meta['city']
     district = response.meta['district']
     street = response.meta['street']
     flag = response.meta['flag']
     building_url = response.meta['building_url']
     building_name = response.meta['building_name']
     area_extent = response.meta['area_extent']
     building_description = response.meta['building_description']
     price_extent = response.meta['price_extent']
     buiding_agent = response.meta['buiding_agent']
     buiding_agent_phone = response.meta['buiding_agent_phone']
     building_address = response.meta['building_address']
     building_height = response.meta['building_height']
     building_elevator = response.meta['building_elevator']
     business_circle = response.meta['business_circle']
     developer = response.meta['developer']
     housing_name = response.meta['housing_name']
     housing_description = response.meta['housing_description']
     housing_url = response.meta['housing_url']
     h_key = response.meta['h_key']
     traffic = ""
     zb_suite = ""
     other = ""
     housing_area = ""
     housing_floor = ""
     housing_workplace = ""
     publish_time = ""
     housing_features = response.xpath("//div[@class='detail__feature-ul cf']/p[@class='detail__feature-text']")
     if housing_features:
         for feature in housing_features:
             label_name = feature.xpath("./strong/text()").extract_first().strip(":")
             label_value = feature.xpath("./text()").extract_first().strip()
             if label_name == "交通出行":
                 traffic = label_value
             if label_name == "周边配套":
                 zb_suite = label_value
             if label_name == "其他":
                 other = label_value
     housing_price = response.xpath("//p[@class='detail__price']")
     housing_price1 = housing_price.xpath("./label/span/text()").extract_first() + housing_price.xpath("./label/text()").extract_first()
     housing_price2 = response.xpath("//div[@class='detail__priceunit']/text()").extract_first()
     detail_info = response.xpath("//div[@class='detail__info']/p")
     for item in detail_info:
         label_name = item.xpath("./span/text()").extract_first().strip(":")
         label_value = item.xpath("./text()").extract_first()
         if label_name == "面积":
             housing_area = label_value
         if label_name == "楼层":
             housing_floor = label_value
         if label_name == "工位":
             housing_workplace = label_value
         if label_name == "时间":
             publish_time = label_value
     detail_agent = response.xpath("//div[@class='detail__agent']")
     agents = []
     for agent in detail_agent:
         agent_name = agent.xpath("./div[@class='detail__agent-info']/div[@class='detail__agent-name']/div[@class='detail__agent-top']/span[@class='detail__agent-name']/text()").extract_first()
         agent_phone = agent.xpath("./p[@class='detail__agent-phone']/text()").extract_first()
         agents.append(agent_name + ": " + agent_phone)
     housing_agents_info = ",".join(agents)
     items['province'] = province
     items['city'] = city
     items['district'] = district
     items['street'] = street
     items['flag'] = flag
     items['building_url'] = building_url
     items['building_name'] = building_name
     items['area_extent'] = area_extent
     items['building_description'] = building_description
     items['price_extent'] = price_extent
     items['buiding_agent'] = buiding_agent
     items['buiding_agent_phone'] = buiding_agent_phone
     items['building_address'] = building_address
     items['building_height'] = building_height
     items['building_elevator'] = building_elevator
     items['business_circle'] = business_circle
     items['developer'] = developer
     items['housing_name'] = housing_name
     items['housing_description'] = housing_description
     items['housing_url'] = housing_url
     items['traffic'] = traffic or ""
     items['zb_suite'] = zb_suite or ""
     items['other'] = other or ""
     items['housing_price1'] = housing_price1
     items['housing_price2'] = housing_price2
     items['housing_area'] = housing_area or ""
     items['housing_floor'] = housing_floor or ""
     items['housing_workplace'] = housing_workplace or ""
     items['publish_time'] = publish_time or ""
     items['housing_agents_info'] = housing_agents_info
     if self.redis.hexists(h_key, building_url) and not self.redis.hexists("lianjia_xzl_housing_finished_hashtable", housing_url):
         yield items
Exemple #9
0
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     housing_price1 = response.xpath(
         "//div[@class='rentBox']/span[@class='rentNum num']/text()"
     ).extract_first() + response.xpath(
         "//div[@class='rentBox']/span[@class='rentUnit']/text()"
     ).extract_first()
     housing_price_test = response.xpath(
         "//div[@class='saleBox m-l-30']/span[@class='noSalePrice fl']"
     ).extract()
     if not housing_price_test:
         housing_price2 = response.xpath(
             "//div[@class='saleBox m-l-30']/span[@class='saleNum num']/text()"
         ).extract_first() + response.xpath(
             "//div[@class='saleBox m-l-30']/span[@class='saleUnit']/text()"
         ).extract_first()
     else:
         housing_price2 = "暂无售价"
     housing_source = response.xpath(
         "//div[@class='fangyuanBox box']/span[@class='fysl']/text()"
     ).extract_first()
     housing_area = response.xpath(
         "//div[@class='xsmjBox box']/span[@class='fysl']/text()"
     ).extract_first()
     built_in = response.xpath(
         "//div[@class='basicMessage container clearfix']/div[@class='messageContent']/span[@class='yearsText text firstRow']/text()"
     ).extract_first()
     green_rate = response.xpath(
         "//div[@class='basicMessage container clearfix']/div[@class='messageContent']/span[@class='greeningText text']/text()"
     ).extract_first()
     building_info = response.xpath(
         "//div[@class='buildMessage container clearfix']")
     building_area = response.xpath(
         "//span[@class='zongmianjiNum text firstRow']/text()"
     ).extract_first()
     building_arch = response.xpath(
         "//span[@class='jiegouText text']/text()").extract_first()
     building_total = response.xpath(
         "//span[@class='blocks text']/text()").extract_first()
     elevator_num = response.xpath(
         "//span[@class='ketiNum text lastRow']/text()").extract_first()
     lift_num = response.xpath(
         "//span[@class='huotiNum text firstRow']/text()").extract_first()
     layer_height = response.xpath(
         "//span[@class='cenggaoText text']/text()").extract_first()
     property_level = response.xpath(
         "//span[@class='dengjiText text']/text()").extract_first()
     property_company = response.xpath(
         "//span[@class='wygs text firstRow']/text()").extract_first()
     property_fee = response.xpath(
         "//span[@class='wyf text']/text()").extract_first()
     parking_place = response.xpath(
         "//span[@class='carport text']/text()").extract_first()
     heat_supply = response.xpath(
         "//span[@class='cnfs text lastRow']/text()").extract_first()
     air_condition = response.xpath(
         "//span[@class='ktlx text firstRow']/text()").extract_first()
     power_voltage = response.xpath(
         "//span[@class='dianyaText text']/text()").extract_first()
     items["city"] = data["city"]
     items["district"] = data["district"]
     items["street"] = data["street"]
     items["housing_name"] = data["housing_name"]
     items["housing_url"] = data["housing_url"]
     items["building_address"] = data["housing_address"]
     items["housing_price1"] = housing_price1
     items["housing_price2"] = housing_price2
     items["housing_source"] = housing_source
     items["housing_area"] = housing_area
     items["built_in"] = built_in
     items["green_rate"] = green_rate
     items["building_area"] = building_area
     items["building_arch"] = building_arch
     items["building_total"] = building_total
     items["elevator_num"] = elevator_num
     items["lift_num"] = lift_num
     items["layer_height"] = layer_height
     items["property_company"] = property_company
     items["property_fee"] = property_fee
     items["parking_place"] = parking_place
     items["heat_supply"] = heat_supply
     items["air_condition"] = air_condition
     items["power_voltage"] = power_voltage
     items["property_level"] = property_level
     if not self.redis.sismember(KongjianjiaSpider.name + "_xzl_set",
                                 data["housing_url"]):
         yield items
Exemple #10
0
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     items["housing_price1"] = data["housing_price1"]
     items["publish_time"] = response.xpath(
         "//span[@class='fr pr20']/font[@class='color8']/text()"
     ).extract_first()
     publish_type = response.xpath(
         "//span[@class='bis_user fl']/font[@class='color2']/text()"
     ).extract_first().strip("[").strip("]")
     if publish_type == "中介":
         items["publish_type"] = publish_type
         items["publisher"] = response.xpath(
             "//span[@class='bis_user fl']/a")[0].xpath(
                 "./text()").extract_first()
         base_info = response.xpath(
             "//div[@class='bis_actinfo clearfix']/ul/li")
         for i in range(len(base_info)):
             if i % 2 == 0:
                 label_name = base_info[i].xpath(
                     "./text()").extract_first().strip()
                 label_value = base_info[i + 1].xpath(
                     "./text()").extract_first()
                 if label_name == "楼盘名称:":
                     items["loupan"] = label_value
                 if label_name == "楼盘类型:":
                     items["xzl_type"] = label_value
                 if label_name == "城区商圈:":
                     items["business_circle"] = label_value
                 if label_name == "楼盘地址:":
                     items["building_address"] = label_value
                 if label_name == "门店经理:":
                     items["agent"] = label_value
                 if label_name == "门店固话:":
                     items["agent_phone"] = label_value
         peitao_info = response.xpath(
             "//ul[@class='bis_table clearfix']/li")
         for i in range(len(peitao_info)):
             if i % 2 == 0:
                 label_name = peitao_info[i].xpath(
                     "./text()").extract_first()
                 label_value = peitao_info[i + 1].xpath(
                     "./text()").extract_first()
                 if label_name == "物管公司":
                     items["property"] = label_value
                 if label_name == "楼层状况":
                     items["housing_floor"] = label_value
                 if label_name == "总 建 面":
                     items["building_area"] = label_value
                 if label_name == "物 管 费":
                     items["property_fee"] = label_value
                 if label_name == "标准层高":
                     items["layer_height"] = label_value
                 if label_name == "空  调":
                     items["central_air_condition"] = label_value
                 if label_name == "车位数量":
                     items["parking_place"] = label_value
                 if label_name == "标准层建面":
                     items["housing_area"] = label_value
                 if label_name == "电  梯":
                     items["elevator"] = label_value
                 if label_name == "车 位 费":
                     items["parking_fee"] = label_value
                 if label_name == "开间建面":
                     items["kaijian_area"] = label_value
                 if label_name == "员工餐厅":
                     items["employee_restaurant"] = label_value
                 if label_name == "交通站点":
                     items["traffic_site"] = label_value
                 if label_name == "轨道公交":
                     items["traffic"] = label_value
     if publish_type == "非中介":
         items["publish_type"] = publish_type
         items["publisher"] = response.xpath(
             "//span[@class='bis_user fl']/text()").extract_first()
         base_info = response.xpath(
             "//div[@class='box1 bis_info bgcolor1 clearfix']/ul/li")
         for i in range(len(base_info)):
             if i % 2 == 0:
                 label_name = base_info[i].xpath(
                     "./text()").extract_first().strip()
                 label_value = base_info[i + 1].xpath(
                     "./text()").extract_first()
                 if label_name == "楼盘类型:":
                     items["xzl_type"] = label_value
                 if label_name == "所在楼层:":
                     items["housing_floor"] = label_value
                 if label_name == "招商面积:":
                     items["building_area"] = label_value
                 if label_name == "租售价格:":
                     items["housing_price1"] = label_value
                 if label_name == "付款方式:":
                     items["pay_method"] = label_value
                 if label_name == "装修状况:":
                     items["housing_decor"] = label_value
                 if label_name == "基本租期:":
                     items["rent_lease"] = label_value
                 if label_name == "楼盘地址:":
                     items["building_address"] = label_value
         peitao_info = response.xpath(
             "//ul[@class='bis_table clearfix']/li")
         for i in range(len(peitao_info)):
             if i % 2 == 0:
                 label_name = peitao_info[i].xpath(
                     "./text()").extract_first()
                 label_value = peitao_info[i + 1].xpath(
                     "./text()").extract_first()
                 if label_name == "物管公司":
                     items["property"] = label_value
                 if label_name == "楼层状况":
                     items["housing_floor"] = label_value
                 if label_name == "总 建 面":
                     items["building_area"] = label_value
                 if label_name == "物 管 费":
                     items["property_fee"] = label_value
                 if label_name == "标准层高":
                     items["layer_height"] = label_value
                 if label_name == "空  调":
                     items["central_air_condition"] = label_value
                 if label_name == "车位数量":
                     items["parking_place"] = label_value
                 if label_name == "标准层建面":
                     items["housing_area"] = label_value
                 if label_name == "电  梯":
                     items["elevator"] = label_value
                 if label_name == "车 位 费":
                     items["parking_fee"] = label_value
                 if label_name == "开间建面":
                     items["kaijian_area"] = label_value
                 if label_name == "员工餐厅":
                     items["employee_restaurant"] = label_value
                 if label_name == "交通站点":
                     items["traffic_site"] = label_value
                 if label_name == "轨道公交":
                     items["traffic"] = label_value
     items["city"] = data["city"]
     items["district"] = data["district"]
     items["housing_url"] = data["housing_url"]
     if not self.redis.sismember(O571Spider.name + "_xzl_set",
                                 data["housing_url"]):
         yield items
Exemple #11
0
 def parse_detail(self, response):
     items = XiezilouItem()
     data = response.meta.copy()
     xzl_type = data["xzl_type"]
     items["publish_time"] = response.xpath(
         "//span[@class='ddz-timestamp']/text()").extract_first().strip()
     items["housing_name"] = response.xpath(
         "//h1[@class='fl']/text()").extract_first().strip()
     housing_price = response.xpath("//div[@class='top-price fr']")[0]
     housing_price_num = housing_price.xpath(
         "./span[@class='price-num']/text()").extract_first()
     housing_price_unit = housing_price.xpath(
         "./text()").extract_first().strip()
     items["housing_price1"] = housing_price_num + housing_price_unit
     housing_rent = response.xpath("//div[@class='fbody']/div/a")
     housing_rentings = []
     if xzl_type == "写字楼":
         for housing in housing_rent:
             area = housing.xpath(
                 "./div[@class='tj-pc-listingDetail-house-click f-area f-item']"
             )
             area_num = area.xpath("./font/text()").extract_first()
             area_unit = area.xpath("./span/text()").extract_first()
             if area_unit:
                 hr_area = area_num + area_unit
             else:
                 hr_area = area_num
             price = housing.xpath(
                 "./div[@class='tj-pc-listingDetail-house-click f-price f-item']/div[@class='tj-pc-listingDetail-house-click unit-show']"
             )
             hr_price = price.xpath(
                 "./span[@class='tj-pc-listingDetail-house-click price-num']/text()"
             ).extract_first() + price.xpath(
                 "./span[@class='price-unit']/text()").extract_first()
             hr_floor = housing.xpath(
                 "./div[@class='tj-pc-listingDetail-house-click f-floor f-item']/text()"
             ).extract_first()
             hr_decor = housing.xpath(
                 "./div[@class='tj-pc-listingDetail-house-click f-decoraion f-item']/text()"
             ).extract_first()
             hr_update = housing.xpath(
                 "./div[@class='tj-pc-listingDetail-house-click f-update f-item ddz-timestamp']/text()"
             ).extract_first()
             housing_rentings.append(
                 "面积: {hr_area}, 单价: {hr_price}, 楼层: {hr_floor}, 装修: {hr_decor}, 更新: {hr_update}"
                 .format(hr_area=hr_area,
                         hr_price=hr_price,
                         hr_floor=hr_floor,
                         hr_decor=hr_decor,
                         hr_update=hr_update))
         housing_base_info = response.xpath(
             "//div[@class='clearfix donetime-address']/ul/li")
         for item in housing_base_info:
             label_name = item.xpath(
                 "./span[@class='f-title']/text()").extract_first()
             label_value = item.xpath(
                 "./span[@class='f-con']/text()").extract_first()
             if label_name == "地理位置":
                 label_value = item.xpath(
                     "./span[@class='f-con']/a/text()").extract_first()
                 items["building_address"] = label_value
             if label_name == "竣工时间":
                 items["built_in"] = label_value
         housing_infos = response.xpath("//div[@class='clearfix ul-layer']")
         for entry in housing_infos:
             lis = entry.xpath("./ul/li")
             for item in lis:
                 label_name = item.xpath(
                     "./span[@class='f-title']/text()").extract_first()
                 label_value = item.xpath(
                     "./span[@class='f-con']/text()").extract_first()
                 if label_name == "层高":
                     items["layer_height"] = label_value
                 if label_name == "层数":
                     items["building_height"] = label_value
                 if label_name == "物业":
                     items["property"] = label_value
                 if label_name == "物业费":
                     items["property_fee"] = label_value
                 if label_name == "车位":
                     items["parking_place"] = label_value
                 if label_name == "车位月租金":
                     items["parking_fee"] = label_value
                 if label_name == "空调":
                     items["air_condition"] = label_value
                 if label_name == "空调费":
                     items["air_condition_fee"] = label_value
                 if label_name == "空调开放时长":
                     items["air_condition_time"] = label_value
                 if label_name == "电梯":
                     items["elevator"] = label_value
                 if label_name == "网络":
                     items["network"] = label_value
                 if label_name == "入驻企业":
                     items['settled_enterprise'] = label_value
     else:
         pass
     items["city"] = data["city"]
     items["district"] = data["district"]
     items["street"] = data["street"]
     items["xzl_type"] = data["xzl_type"]
     items["housing_url"] = data["housing_url"]
     zs_key = data["zs_key"]
     if self.redis.zscore(zs_key, data['housing_url']) == 2:
         yield items