def parseHouse(self):
        if not self.https:
            response = self.downloader.download_html_response(self.url)
        else:
            response = self.downloader.download_https_response(self.url)
        if self.isInvalidPage(response):
            return

        if self.isMeetFireWall(response):
            self.runSpider = False
            redis = RedisUtils()
            redis.add_to_redis(self.name, self.url)
            return

        if response.status == 404:
            print('%s>%s网页不存在 %s' %
                  (Utils.getCurrentTime(), self.source, self.url))
            Utils.write_error('%s>%s网页不存在 %s' %
                              (Utils.getCurrentTime(), self.source, self.url))
            return

        house = House()
        house.url = self.getMobileUrl(response)
        house.title = self.getTitle(response)
        house.image_url = self.getImageUrl(response)
        house.city = self.getCity(response)
        house.district = self.getDistrict(response)
        house.rental = self.getRental(response)
        house.campus = self.getCampus(response)
        house.date = self.getDate(response)
        house.address = self.getAddress(response)
        house.source = self.source
        house.house_type = self.getHouseType(response)
        house.rooms = self.getRooms(house.house_type)
        house.area = self.getArea(response)
        house.floor = self.getFloor(response)
        house.contact = self.getContact(response)
        house.phone = self.getPhone(response)
        house.rent_type = self.getRentType(response)
        house.lat, house.lon = self.getLatLon(house)
        house.md5 = Utils.generateMD5(house.url)
        house.time = Utils.getCurrentTime()

        timedelta = datetime.datetime.now() - datetime.datetime.strptime(
            house.date, '%Y-%m-%d')
        days = timedelta.days
        if days > 10:
            print('%s 58过期房源:%s;%s;%s' %
                  (Utils.getCurrentTime(), house.date, house.title, house.url))
            return

        DB_Manager(house)
Beispiel #2
0
 def __init__(self):
     self.redis = RedisUtils(self)