def parse(self, response): if response is None: return select_area_list = response.xpath( '//*[@data-role="ershoufang"]/div[1]/a[@class="selected CLICKDATA"]/text()' ).extract() if select_area_list is not None and isinstance( select_area_list, list) and len(select_area_list) == 1: area = select_area_list[0] area = area.replace(' ', '').replace('\n', '') common_str = '//*[@data-component="list"]/ul/li/div[@class="info"]' ListTitle = response.xpath( common_str + '/div[@class="title"]/a/text()').extract() ListMaidian = response.xpath( common_str + '/div[@class="title"]/a/@href').extract() ListdealDate = response.xpath( common_str + '/div[@class="address"]/div[@class="dealDate"]/text()' ).extract() ListtotalPrice = response.xpath( common_str + '/div[@class="address"]/div[@class="totalPrice"]/span/text()' ).extract() ListUnitPrice = response.xpath( common_str + '/div[@class="flood"]/div[@class="unitPrice"]/span/text()' ).extract() ListHouseAge = response.xpath( common_str + '/div[@class="flood"]/div[1]/text()').extract() ListGuapai_price = response.xpath( common_str + '/div[@class="dealCycleeInfo"]/span[@class="dealCycleTxt"][1]/span[1]/text()' ).extract() Listdealcycle_date = response.xpath( common_str + '/div[@class="dealCycleeInfo"]/span[@class="dealCycleTxt"][1]/span[2]/text()' ).extract() # SQL 插入语句 # sql = ' INSERT IGNORE INTO beike_inner_5years_100_200 (id,community_name,chengjiao_dealDate,chengjiao_totalPrice,chengjiao_unitPrice) VALUES ' # sql = ' INSERT IGNORE INTO beike_ja_shgg (id,community_name,chengjiao_dealDate,chengjiao_totalPrice,chengjiao_unitPrice, xiaoqu_name, guapai_price, dealcycle_date, kanjia_price) VALUES ' # sql = ' INSERT IGNORE INTO beike_sz_nanshanqu (id, community_name, chengjiao_dealDate, chengjiao_totalPrice, chengjiao_unitPrice) VALUES ' size = len(ListTitle) size_house_age = len(ListHouseAge) flag = size_house_age == size * 2 for i in range(size): item = getMinyanItem(i, ListMaidian, ListTitle, ListdealDate, ListtotalPrice, ListUnitPrice, ListGuapai_price, Listdealcycle_date, ListHouseAge, flag, area, city_name) yield item
def parse_first(self, response): select_area_list = response.xpath( '//*[@data-role="ershoufang"]/div[1]/a[@class="selected CLICKDATA"]/text()' ).extract() if isinstance(select_area_list, list) and len(select_area_list) == 1: area = select_area_list[0] # area = area.replace(' ', '').replace('\n', '') common_str = '//*[@data-component="list"]/ul/li/div[@class="info"]' ListTitle = response.xpath( common_str + '/div[@class="title"]/a/text()').extract() ListMaidian = response.xpath( common_str + '/div[@class="title"]/a/@href').extract() ListdealDate = response.xpath( common_str + '/div[@class="address"]/div[@class="dealDate"]/text()' ).extract() ListtotalPrice = response.xpath( common_str + '/div[@class="address"]/div[@class="totalPrice"]/span/text()' ).extract() ListUnitPrice = response.xpath( common_str + '/div[@class="flood"]/div[@class="unitPrice"]/span/text()' ).extract() ListHouseAge = response.xpath( common_str + '/div[@class="flood"]/div[1]/text()').extract() ListGuapai_price = response.xpath( common_str + '/div[@class="dealCycleeInfo"]/span[@class="dealCycleTxt"][1]/span[1]/text()' ).extract() Listdealcycle_date = response.xpath( common_str + '/div[@class="dealCycleeInfo"]/span[@class="dealCycleTxt"][1]/span[2]/text()' ).extract() size = len(ListTitle) size_house_age = len(ListHouseAge) flag = size_house_age == size * 2 for i in range(size): item = getMinyanItem(i, ListMaidian, ListTitle, ListdealDate, ListtotalPrice, ListUnitPrice, ListGuapai_price, Listdealcycle_date, ListHouseAge, flag, area, city_name) yield item