Beispiel #1
0
    def geocode(self):
        """Update latitude, longitude, rating and ZIP in Locations table."""
        print('\nGeocoding...')

        null_rating_rows = self.get_rows_with_null_rating()

        for row in null_rating_rows:
            full_address = "{0} {1}, New Orleans, LA".format(
                row.street_number, row.address)

            result = self.gmaps.geocode(full_address)

            if len(result) == 0:
                log.info('No geocoding results for: {}'.format(full_address))

                # TODO: Need to also note failure so future geocoding scripts
                #   don't keep trying and failing on the same addresses.
                #   Possibly update Location's `rating` and/or Cleaned's
                #   `location_publish` fields.
                continue

            details = self.process_google_results(result)

            try:
                with SESSION.begin_nested():
                    u = update(Location)
                    u = u.values(details)
                    u = u.where(Location.document_id == row.document_id)
                    SESSION.execute(u)
                    SESSION.flush()
            except Exception as error:  # TODO: Handle specific errors.
                log.exception(error, exc_info=True)
                SESSION.rollback()

            SESSION.commit()
Beispiel #2
0
    def commit_to_database(self, table, output):
        """Commit to database using nested transactions and exceptions."""
        try:
            # TODO: Is this the correct method for this?
            with SESSION.begin_nested():
                i = insert(getattr(db, table))
                vals = i.values(output)
                SESSION.execute(vals)  # TODO: What is this?
                SESSION.flush()
        except Exception as error:
            log.debug(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()  # TODO: Should this be here?
Beispiel #3
0
    def commit_to_database(self, table, output):
        """Commit to database using nested transactions and exceptions."""
        try:
            # TODO: Is this the correct method for this?
            with SESSION.begin_nested():
                i = insert(getattr(db, table))
                vals = i.values(output)
                SESSION.execute(vals)  # TODO: What is this?
                SESSION.flush()
        except Exception as error:
            log.debug(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()  # TODO: Should this be here?
Beispiel #4
0
    def check_geocoder_good_rating(self):
        """Check if PostGIS Geocoder rating scored 3 or lower: good."""
        SESSION.query(
            Location.rating,
            Location.location_publish
        ).filter(
            (Location.rating == 'RANGE_INTERPOLATED') |
            (Location.rating == 'ROOFTOP')
        ).update({"location_publish": True})

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #5
0
    def check_geocoder_bad_rating(self):
        """Check if PostGIS Geocoder rating scored higher than 3: bad."""
        SESSION.query(
            Location.rating,
            Location.location_publish
        ).filter(
            (Location.rating == 'GEOMETRIC_CENTER') |
            (Location.rating == 'APPROXIMATE') |
            (Location.rating.is_(None))
        ).update({"location_publish": False})

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #6
0
    def commit_rows(self, rows):
        """Commit JOIN-ed rows to the cleaned table."""
        log.debug('Committing %d rows', len(rows))

        for count, row in enumerate(rows):
            log.debug("Row %d", count)
            try:
                with SESSION.begin_nested():
                    i = insert(Cleaned)
                    i = i.values(row)
                    SESSION.execute(i)
                    SESSION.flush()
            except Exception as error:
                log.debug('count: %s', count)
                log.exception(error, exc_info=True)
                SESSION.rollback()

            SESSION.commit()

        log.debug('%d rows committed', len(rows))
Beispiel #7
0
    def commit_rows(self, rows):
        """Commit JOIN-ed rows to the cleaned table."""
        log.debug('Committing %d rows', len(rows))

        for count, row in enumerate(rows):
            log.debug("Row %d", count)
            try:
                with SESSION.begin_nested():
                    i = insert(Cleaned)
                    i = i.values(row)
                    SESSION.execute(i)
                    SESSION.flush()
            except Exception as error:
                log.debug('count: %s', count)
                log.exception(error, exc_info=True)
                SESSION.rollback()

            SESSION.commit()

        log.debug('%d rows committed', len(rows))
Beispiel #8
0
    def check_west_of_new_orleans(self):
        """Check if geocoded coords are within west border of New Orleans."""
        # Long less than -90.140388 is west of New Orleans:
        SESSION.query(
            Location.longitude,
            Location.location_publish
        ).filter(
            Location.longitude < -90.140388
        ).update({
            "location_publish": False
        })

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #9
0
    def check_high_amount(self):
        """Check if sale amount is unreasonably high (>= $20,000,000)."""
        # Anything over $20,000,000 wouldn't be impossible, but is rare
        SESSION.query(
            Detail.amount,
            Detail.detail_publish
        ).filter(
            Detail.amount >= 20000000
        ).update({
            "detail_publish": False
        })

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #10
0
    def check_low_amount(self):
        """Check if sale amount is unreasonably low (<= $0)."""
        # Not sure about these, so check them all for now to be safe
        SESSION.query(
            Detail.amount,
            Detail.detail_publish
        ).filter(
            Detail.amount <= 0
        ).update({
            "detail_publish": False
        })

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #11
0
    def check_north_of_new_orleans(self):
        """Check if geocoded coords are within north border of New Orleans."""
        # Lat less than 29.864543 is north of New Orleans:
        SESSION.query(
            Location.latitude,
            Location.location_publish
        ).filter(
            Location.latitude > 30.181719
        ).update({
            "location_publish": False
        })

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #12
0
    def make_all_locations_publishable(self):
        """
        Assume all sales are publishable.

        Set location_publish = 1. Then set to 0 if questionable data is  found.
        """
        # Assume publishable, then check for reasons not to publish.
        SESSION.query(
            Location.location_publish
        ).update({
            "location_publish": True
        })

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #13
0
    def check_if_no_date(self):
        """Check if sale has a date."""
        SESSION.query(
            Detail.document_date,
            Detail.document_recorded,
            Detail.detail_publish
        ).filter(
            (Detail.document_date is None) |
            (Detail.document_recorded is None)
        ).update(
            {"detail_publish": False}
        )

        try:
            with SESSION.begin_nested():
                SESSION.flush()
        except Exception as error:
            log.exception(error, exc_info=True)
            SESSION.rollback()

        SESSION.commit()
Beispiel #14
0
    def check_relative_date(self):
        """Check if sale date is >6 months prior to the recorded date."""
        # Convert date strings to date format
        new_initial_date = datetime.strptime(
            self.initial_date, '%Y-%m-%d').date()
        new_until_date = datetime.strptime(
            self.until_date, '%Y-%m-%d').date()
        current_date = new_initial_date

        # Evaluate "30 days ago" based on that particular day
        while current_date != new_until_date:
            # Update date range
            old_date = current_date - timedelta(days=180)
            previous_date = current_date - timedelta(days=1)

            # Copy datetime objects to date strings
            old_date_string = old_date.strftime('%Y-%m-%d')
            previous_date_string = previous_date.strftime('%Y-%m-%d')
            current_date_string = current_date.strftime('%Y-%m-%d')

            # For sales recorded on a given day, check if the document
            # date is unbelievable (too old or in the future)

            try:
                with SESSION.begin_nested():
                    SESSION.query(
                        Detail.document_recorded,
                        Detail.document_date,
                        Detail.detail_publish
                    ).filter(
                        Detail.document_recorded == current_date_string
                    ).filter(
                        Detail.document_date < old_date_string
                    ).update({"detail_publish": False})

                    SESSION.flush()
            except Exception as error:
                log.exception(error, exc_info=True)
                SESSION.rollback()

            try:
                with SESSION.begin_nested():
                    SESSION.query(
                        Detail.document_recorded,
                        Detail.document_date,
                        Detail.detail_publish
                    ).filter(
                        Detail.document_recorded == current_date_string
                    ).filter(
                        Detail.document_date > previous_date_string
                    ).update({
                        "detail_publish": False
                    })

                    SESSION.flush()
            except Exception as error:
                log.exception(error, exc_info=True)
                SESSION.rollback()

            SESSION.commit()

            current_date = current_date + timedelta(days=1)