def processHouse(self, houseStr): """ 对每个学区房记录进行处理 """ houseInfo = houseStr.find("div", attrs={"class": "inventory_list_r_tit_list"}).find_all("a") houseId = houseInfo[0]["href"][len(PrefixURL) :][:-1] address = houseStr.find("div", attrs={"class": "inventory_list_r_name_ad"}).text detailInfo = houseStr.find("div", attrs={"class": "inventory_list_r_details_r"}) details = detailInfo.find_all("span") price = details[2].text area = details[1].text # 如果houseId没被处理过 if not houseId in self.success_houseIds: print houseId, address, price, area house = House() house.houseId = houseId house.address = address house.price = price house.area = area house.flage = 0 self.houses.append(house) self.success_houseIds[houseId] = house else: print "id:%s exist" % houseId print houseId, address, price, area house = self.success_houseIds[houseId] if houseId == house.houseId and address == house.address and price == house.price and area == house.area: house.flage = 1 else: house.flage = 2 print "The same houseId have different data" self.houses.append(house)
def processHouse(self,houseStr): """ 对每个学区房记录进行处理 """ houseInfo = houseStr.find_all('a') houseId = houseInfo[0]['href'][len(self.prefixText):][:-5] addressUrl = self.prefixUrl + houseInfo[1]['href'] address = self.processAddress(addressUrl) price = (houseStr.find('div',attrs={"class": "price-pre"})).text area = (houseStr.find('div',attrs={"class": "where"}).find_all('span'))[3].text #如果houseId没被处理过 if not houseId in self.success_houseIds: print houseId,address,price,area house = House() house.houseId = houseId house.address = address house.price = price house.area = area house.flage = 0 self.houses.append(house) self.success_houseIds[houseId] = house else: print "id:%s exist" % houseId print houseId,address,price,area house = self.success_houseIds[houseId] if(houseId == house.houseId and address == house.address and price == house.price and area == house.area): house.flage = 1 else: house.flage = 2 print "The same houseId have different data" self.houses.append(house)