예제 #1
0
    def add_home_to_database(self, home):
        new_listing = RentDatabaseModel()
        num_of_value_errors = 0

        try:
            # Home Address info
            new_listing.street_address = "{0} {1}".format(
                home['StreetNumber'], home['StreetName']).replace(',', '')
            new_listing.city = home['City']
            new_listing.state = home['StateOrProvince']
            new_listing.zip_code = home['PostalCode']
            new_listing.latitude = home['Latitude']
            new_listing.longitude = home['Longitude']

            # Home Basic info
            new_listing.price = int(float(home['ListPrice']))
            new_listing.num_bedrooms = int(home['BedroomsTotal'])
            new_listing.num_bathrooms = int(home['BathroomsFull'])
            new_listing.apartment_number = home['UnitNumber']

            # MLS listing information
            new_listing.remarks = home['PublicRemarks']
            new_listing.listing_number = home['ListingId']
            new_listing.listing_agent = home['ListAgentMlsId']
            new_listing.listing_office = home['ListOfficeMlsId']
            new_listing.listing_provider = HomeProviderModel.objects.get_or_create(
                provider=HomeProviderModel.MLSPIN)[0]
            new_listing.showing_instructions = home['ShowingInstructions']
            new_listing.showing_remarks = home['FIRM_RMK1']

            # Amenities
            new_listing.dogs_allowed = 'yes' in home['PETS_ALLOWED'].lower()
            new_listing.cats_allowed = 'yes' in home['PETS_ALLOWED'].lower()
            word_scraper_remarks = WordScraper(new_listing.remarks)
            word_scraper_appliances = WordScraper(home['Appliances'])
            new_listing.air_conditioning = home['AIR_CONDITION'] == 'Yes'
            if word_scraper_remarks.look_for_ac(
            ) or word_scraper_appliances.look_for_ac():
                new_listing.air_conditioning = True

            new_listing.furnished = word_scraper_remarks.look_for_furnished() \
                                    or word_scraper_appliances.look_for_furnished()
            new_listing.hardwood_floors = word_scraper_remarks.look_for_hardwood_floors() \
                                          or word_scraper_appliances.look_for_hardwood_floors()
            new_listing.dishwasher = word_scraper_remarks.look_for_dishwasher() \
                                     or word_scraper_appliances.look_for_dishwasher()
            new_listing.laundry_in_building = word_scraper_remarks.look_for_laundry_in_building() \
                                              or word_scraper_appliances.look_for_laundry_in_building()
            new_listing.pool = word_scraper_remarks.look_for_pool()
            new_listing.patio_balcony = word_scraper_remarks.look_for_balcony()
            new_listing.storage = word_scraper_remarks.look_for_storage()

            new_listing.last_updated = self.update_timestamp

            list_type = home['RN_TYPE']
            if list_type == "Apartment":
                new_listing.home_type = HomeTypeModel.objects.get_or_create(
                    home_type=HomeTypeModel.APARTMENT)[0]
            elif list_type == "Single Family":
                new_listing.home_type = HomeTypeModel.objects.get_or_create(
                    home_type=HomeTypeModel.SINGLE_FAMILY)[0]
            elif list_type == "Condominium":
                new_listing.home_type = HomeTypeModel.objects.get_or_create(
                    home_type=HomeTypeModel.CONDO)[0]
            else:
                new_listing.home_type = HomeTypeModel.objects.get_or_create(
                    home_type=HomeTypeModel.OTHER)[0]

            if home['Date_Available']:
                date_available = datetime.strptime(home['Date_Available'],
                                                   '%Y-%m-%dT%H:%M:%S')
                new_listing.date_available = date_available
                date_available = pytimezone('US/Eastern').localize(
                    date_available)
                if timezone.now() > date_available - timedelta(
                        days=CURRENTLY_AVAILABLE_DELTA_DAYS):
                    new_listing.currently_available = True
                else:
                    self.num_available_in_future += 1

            else:
                new_listing.currently_available = True

        except ValueError:
            num_of_value_errors += 1
            return

        # Determines if the home already exists as a MLSPIN house
        if RentDatabaseModel.objects \
                .filter(listing_provider=new_listing.listing_provider) \
                .filter(street_address=new_listing.street_address) \
                .filter(city=new_listing.city) \
                .filter(state=new_listing.state) \
                .filter(zip_code=new_listing.zip_code) \
                .filter(apartment_number=new_listing.apartment_number) \
                .exists():

            # Retrieve the home that the home matches
            existing_apartment = RentDatabaseModel.objects.get(
                street_address=new_listing.street_address,
                city=new_listing.city,
                state=new_listing.state,
                zip_code=new_listing.zip_code,
                apartment_number=new_listing.apartment_number)

            # Since the apartments are the same
            #   Update the existing apartment with the fields stored in the new listing
            existing_apartment.update(new_listing)
            try:
                existing_apartment.save()
                print("[ UPDATED ] {0}".format(
                    existing_apartment.full_address))
                self.num_updated_homes += 1
            except ValidationError:
                print('Validation error')
                self.num_validation_error += 1

        # Tests if the home exists within another provider
        #   If so mark it as a duplicate and don't add it
        elif RentDatabaseModel.objects \
                .filter(street_address=new_listing.street_address) \
                .filter(city=new_listing.city) \
                .filter(state=new_listing.state) \
                .filter(zip_code=new_listing.zip_code) \
                .filter(apartment_number=new_listing.apartment_number) \
                .exists():
            print("[ DUPLICATE ] {0}".format(new_listing.full_address))
            self.num_of_duplicates += 1
        else:

            try:
                new_listing.save()
                self.num_added_homes += 1
                print("[ ADDING ] " + new_listing.full_address)
            except IntegrityError:
                print("[ Integrity Error ] ")
                self.num_integrity_errors += 1
            except ValidationError:
                print("[ Validation Error ] ")
                self.num_validation_error += 1
예제 #2
0
    def parse_idx_feed(self):

        lines = self.idx_txt
        print("Attempting to add *" + str(len(lines)) +
              "* apartments to the db...")
        print("An equivalent number of requests will be made to the geocoder")

        # Generate values for the different error cases for tracking purposes
        num_houses = 0
        num_of_duplicates = 0
        num_of_value_errors = 0
        num_failed_to_update = 0
        num_failed_to_geolocate = 0
        num_not_for_rental = 0
        num_integrity_error = 0
        num_added_homes = 0
        num_updated_homes = 0
        num_homes_not_enough_cells = 0

        counter = 0
        for line in lines[1:]:  # skips the col headers
            # if self.num_homes is equal to -1, then it means to loop through all homes,
            #   otherwise just loop for the indicated number of homes
            if self.num_homes != -1 and counter >= self.num_homes:
                break
            counter = counter + 1
            num_houses += 1
            new_listing = RentDatabaseModel()

            # Parse IDX feed to put each item into an array
            cells = line.split('|')

            # If the home doesn't have enough cells then something is wrong with the listing and it won't
            #   be added to the database. Otherwise it will cause an exception
            if len(cells) < 28:
                print("Removing home not enough cells")
                num_homes_not_enough_cells += 1
                continue

            # Make sure there are no commas in the street name
            cells[STREET_NAME].replace(',', '')
            split_address = cells[STREET_NAME].split()

            # Needed Variables
            clean_address = ""

            try:
                # check for presence of apartment number with int()
                int(cells[STREET_NAME][len(cells[STREET_NAME]) - 1])
                # the purpose of encoding and then decoding is to remove any non-ascii characters
                clean_address = " ".join(split_address[:-1]).encode(
                    'ascii', errors='ignore').decode()
            # no int in last address element (not an apartment #)
            except ValueError:
                clean_address = " ".join(split_address).encode(
                    'ascii', errors='ignore').decode()

            # If any of the fields give a value error, then don't save the apartment
            try:
                # Initialize word scraper
                word_scraper = WordScraper(cells[REMARKS])

                # Set the HomeBaseModel Fields
                new_listing.street_address = normalize_street_address(
                    "{0} {1}".format(cells[STREET_NO], clean_address))
                new_listing.city = self.towns[str(cells[TOWN_NUM])]["town"]
                new_listing.state = self.towns[str(cells[TOWN_NUM])]["state"]
                new_listing.zip_code = cells[ZIP_CODE]
                new_listing.price = int(cells[LIST_PRICE])
                new_listing.laundromat_nearby = word_scraper.look_for_laundromat(
                )

                # Set InteriorAmenitiesModel Fields
                # Currently don't support non-integers for num_bathrooms. Therefore
                #   The num of full and half baths are added then rounded to the nearest int
                num_baths = int(cells[NO_FULL_BATHS]) + int(
                    cells[NO_HALF_BATHS])
                new_listing.bath = True if num_baths > 0 else False
                new_listing.num_bathrooms = num_baths
                new_listing.num_bedrooms = int(cells[NO_BEDROOMS])
                new_listing.furnished = word_scraper.look_for_furnished()
                new_listing.hardwood_floors = word_scraper.look_for_hardwood_floors(
                )
                new_listing.dishwasher = word_scraper.look_for_dishwasher()

                new_listing.air_conditioning = word_scraper.look_for_ac()

                new_listing.dogs_allowed = word_scraper.look_for_pets("dogs")

                new_listing.cats_allowed = word_scraper.look_for_pets("cats")

                new_listing.laundry_in_building = word_scraper.look_for_laundry_in_building(
                )

                # Set MLSpinDataModel fields
                new_listing.remarks = cells[REMARKS]
                new_listing.listing_number = int(cells[LIST_NO])
                new_listing.listing_provider = HomeProviderModel.objects.get(
                    provider=HomeProviderModel.MLSPIN)
                new_listing.listing_agent = cells[LIST_AGENT]
                new_listing.listing_office = cells[LIST_OFFICE]
                new_listing.last_updated = self.update_timestamp

                # Set RentDatabaseModel fields
                new_listing.apartment_number = cells[UNIT_NO].lower()

                # Set Exterior Amenities fields
                if int(cells[PARKING_SPACES]) > 0:
                    new_listing.parking_spot = True
                new_listing.pool = word_scraper.look_for_pool()
                new_listing.patio_balcony = word_scraper.look_for_balcony()

                new_listing.laundry_in_unit = word_scraper.look_for_laundry_in_unit(
                )
                new_listing.gym = word_scraper.look_for_gym()
                new_listing.storage = word_scraper.look_for_storage()

                # Create the new home
                # Define the home type
                list_type = cells[PROP_TYPE]

                # verifies unit is a rental (RN denotes rental in MLS feed)
                if list_type == "RN":
                    apartment_home_type = HomeTypeModel.objects.get(
                        home_type=HomeTypeModel.APARTMENT)
                else:
                    # Since we only support rentals right now we don't want to retrieve any other home types
                    print(
                        "Home not a rental, continuing. Error was with line {0}"
                        .format(line))
                    num_not_for_rental += 1
                    continue

                new_listing.home_type = apartment_home_type
                new_listing.currently_available = True

            except ValueError:
                print(
                    "Home could not be added. Error is with line: {0}".format(
                        line))
                num_of_value_errors += 1
                continue

            # Determines if the home already exists as a MLSPIN house
            if RentDatabaseModel.objects \
                    .filter(listing_provider=new_listing.listing_provider) \
                    .filter(street_address=new_listing.street_address) \
                    .filter(city=new_listing.city) \
                    .filter(state=new_listing.state) \
                    .filter(zip_code=new_listing.zip_code) \
                    .filter(apartment_number=new_listing.apartment_number) \
                    .exists():

                # Retrieve the home that the home matches
                existing_apartment = RentDatabaseModel.objects.get(
                    street_address=new_listing.street_address,
                    city=new_listing.city,
                    state=new_listing.state,
                    zip_code=new_listing.zip_code,
                    apartment_number=new_listing.apartment_number)

                # The lat and long is the only thing that is not computed for each new_listing since it costs money
                #   Therefore assume the old lat and long values are correct (Should not change)
                new_listing.latitude = existing_apartment.latitude
                new_listing.longitude = existing_apartment.longitude

                # Since the apartments are the same
                #   Update the existing apartment with the fields stored in the new listing
                existing_apartment.update(new_listing)
                existing_apartment.save()
                print("[ UPDATED ] {0}".format(
                    existing_apartment.full_address))
                num_updated_homes += 1

            # Tests if the home exists within another provider
            #   If so mark it as a duplicate and don't add it
            elif RentDatabaseModel.objects \
                    .filter(street_address=new_listing.street_address) \
                    .filter(city=new_listing.city) \
                    .filter(state=new_listing.state) \
                    .filter(zip_code=new_listing.zip_code) \
                    .filter(apartment_number=new_listing.apartment_number) \
                    .exists():
                print("[ DUPLICATE ] {0}".format(new_listing.full_address))
                num_of_duplicates += 1
            else:

                # If it is a new home then get the lat and long of the home.
                latlng = geolocator.maps_requester(
                    gmaps_api_key).get_lat_lon_from_address(
                        new_listing.full_address)

                if latlng == -1:
                    print(
                        "Could not generate Lat and Long for apartment {0}, which had line {1} in IDX feed"
                        .format(new_listing.full_address, line))
                    num_failed_to_geolocate += 1
                    continue
                else:
                    lat = latlng[0]
                    lng = latlng[1]

                new_listing.latitude = lat
                new_listing.longitude = lng
                # After all the data is added, save the home to the database
                try:
                    new_listing.save()
                    num_added_homes += 1
                    print("[ ADDING ] " + new_listing.full_address)
                except IntegrityError:
                    print("[ Integrity Error ] ")
                    num_integrity_error += 1

        manager = HomeProviderModel.objects.get(provider="MLSPIN")
        manager.last_updated_feed = self.update_timestamp
        manager.save()

        print("")
        print("RESULTS:")
        logger.info(
            "\nNumber of houses in database: {0}\n".format(num_houses) +
            "Num added homes: {0}\n".format(num_added_homes) +
            "Num updated homes: {0}\n".format(num_updated_homes) +
            "Update timestamp: {0}\n".format(self.update_timestamp.date()) +
            "Number of duplicates: {0}\n".format(num_of_duplicates) +
            "Number of value errors: {0}\n".format(num_of_value_errors) +
            "Number of failed updated houses: {0}\n".format(
                num_failed_to_update) +
            "Number of failed geolocates: {0}\n".format(
                num_failed_to_geolocate) +
            "Number of houses not for rental: {0}\n".format(
                num_not_for_rental) +
            "Number of integrity error is: {0}\n".format(num_integrity_error) +
            "Number of homes that don't have enough cells: {0}\n".format(
                num_homes_not_enough_cells))