def update_this_dates_permanent_flag(self, current_datetime, early_permanent_datetime, late_permanent_datetime): """TODO.""" cond = (early_permanent_datetime <= current_datetime and current_datetime <= late_permanent_datetime) if cond: SESSION.query(Detail).filter(Detail.document_recorded == '%s' % current_datetime).update( {"permanent_flag": True}) SESSION.query(Cleaned).filter(Cleaned.document_recorded == '%s' % current_datetime).update( {"permanent_flag": True}) SESSION.commit() else: SESSION.query(Detail).filter(Detail.document_recorded == '%s' % current_datetime).update( {"permanent_flag": False}) SESSION.query(Cleaned).filter(Cleaned.document_recorded == '%s' % current_datetime).update( {"permanent_flag": False}) SESSION.commit()
def update_this_dates_permanent_flag(self, current_datetime, early_permanent_datetime, late_permanent_datetime): """TODO.""" cond = (early_permanent_datetime <= current_datetime and current_datetime <= late_permanent_datetime) if cond: SESSION.query( Detail ).filter( Detail.document_recorded == '%s' % current_datetime ).update({"permanent_flag": True}) SESSION.query( Cleaned ).filter( Cleaned.document_recorded == '%s' % current_datetime ).update({"permanent_flag": True}) SESSION.commit() else: SESSION.query( Detail ).filter( Detail.document_recorded == '%s' % current_datetime ).update({"permanent_flag": False}) SESSION.query( Cleaned ).filter( Cleaned.document_recorded == '%s' % current_datetime ).update({"permanent_flag": False}) SESSION.commit()
def geocode(self): """Update latitude, longitude, rating and ZIP in Locations table.""" print('\nGeocoding...') null_rating_rows = self.get_rows_with_null_rating() for row in null_rating_rows: full_address = "{0} {1}, New Orleans, LA".format( row.street_number, row.address) result = self.gmaps.geocode(full_address) if len(result) == 0: log.info('No geocoding results for: {}'.format(full_address)) # TODO: Need to also note failure so future geocoding scripts # don't keep trying and failing on the same addresses. # Possibly update Location's `rating` and/or Cleaned's # `location_publish` fields. continue details = self.process_google_results(result) try: with SESSION.begin_nested(): u = update(Location) u = u.values(details) u = u.where(Location.document_id == row.document_id) SESSION.execute(u) SESSION.flush() except Exception as error: # TODO: Handle specific errors. log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def delete_cleaned(self): """TODO.""" SESSION.query(Cleaned).filter( Cleaned.document_recorded >= '{}'.format(self.initial_date) ).filter(Cleaned.document_recorded <= '{}'.format(self.until_date) ).delete() SESSION.commit()
def delete_details(self): """TODO.""" SESSION.query(Detail).filter( Detail.document_recorded >= '{}'.format(self.initial_date)).filter( Detail.document_recorded <= '{}'.format(self.until_date) ).delete() SESSION.commit()
def no_neighborhood_found(self): """If no neighborhood is found, update with "None" in nbhd field.""" log.debug('no_neighborhood_found') SESSION.query( Location ).filter( Location.neighborhood.is_(None) ).update( {Location.neighborhood: "None"}, synchronize_session='fetch' ) SESSION.commit()
def commit_to_database(self, table, output): """Commit to database using nested transactions and exceptions.""" try: # TODO: Is this the correct method for this? with SESSION.begin_nested(): i = insert(getattr(db, table)) vals = i.values(output) SESSION.execute(vals) # TODO: What is this? SESSION.flush() except Exception as error: log.debug(error, exc_info=True) SESSION.rollback() SESSION.commit() # TODO: Should this be here?
def check_geocoder_good_rating(self): """Check if PostGIS Geocoder rating scored 3 or lower: good.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'RANGE_INTERPOLATED') | (Location.rating == 'ROOFTOP') ).update({"location_publish": True}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_geocoder_bad_rating(self): """Check if PostGIS Geocoder rating scored higher than 3: bad.""" SESSION.query( Location.rating, Location.location_publish ).filter( (Location.rating == 'GEOMETRIC_CENTER') | (Location.rating == 'APPROXIMATE') | (Location.rating.is_(None)) ).update({"location_publish": False}) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_west_of_new_orleans(self): """Check if geocoded coords are within west border of New Orleans.""" # Long less than -90.140388 is west of New Orleans: SESSION.query( Location.longitude, Location.location_publish ).filter( Location.longitude < -90.140388 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_high_amount(self): """Check if sale amount is unreasonably high (>= $20,000,000).""" # Anything over $20,000,000 wouldn't be impossible, but is rare SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount >= 20000000 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_low_amount(self): """Check if sale amount is unreasonably low (<= $0).""" # Not sure about these, so check them all for now to be safe SESSION.query( Detail.amount, Detail.detail_publish ).filter( Detail.amount <= 0 ).update({ "detail_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def commit_rows(self, rows): """Commit JOIN-ed rows to the cleaned table.""" log.debug('Committing %d rows', len(rows)) for count, row in enumerate(rows): log.debug("Row %d", count) try: with SESSION.begin_nested(): i = insert(Cleaned) i = i.values(row) SESSION.execute(i) SESSION.flush() except Exception as error: log.debug('count: %s', count) log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() log.debug('%d rows committed', len(rows))
def check_north_of_new_orleans(self): """Check if geocoded coords are within north border of New Orleans.""" # Lat less than 29.864543 is north of New Orleans: SESSION.query( Location.latitude, Location.location_publish ).filter( Location.latitude > 30.181719 ).update({ "location_publish": False }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def make_all_locations_publishable(self): """ Assume all sales are publishable. Set location_publish = 1. Then set to 0 if questionable data is found. """ # Assume publishable, then check for reasons not to publish. SESSION.query( Location.location_publish ).update({ "location_publish": True }) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def check_if_no_date(self): """Check if sale has a date.""" SESSION.query( Detail.document_date, Detail.document_recorded, Detail.detail_publish ).filter( (Detail.document_date is None) | (Detail.document_recorded is None) ).update( {"detail_publish": False} ) try: with SESSION.begin_nested(): SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit()
def neighborhood_found(self): """Use PostGIS to find which neighborhood a long/lat pair is in.""" log.debug('neighborhood_found') SESSION.query( Location ).filter( func.ST_Contains( Neighborhood.geom, func.ST_SetSRID( func.ST_Point( cast(Location.longitude, Float), cast(Location.latitude, Float) ), 4326 ) ) ).update( {Location.neighborhood: Neighborhood.gnocdc_lab}, synchronize_session='fetch' ) SESSION.commit()
def check_relative_date(self): """Check if sale date is >6 months prior to the recorded date.""" # Convert date strings to date format new_initial_date = datetime.strptime( self.initial_date, '%Y-%m-%d').date() new_until_date = datetime.strptime( self.until_date, '%Y-%m-%d').date() current_date = new_initial_date # Evaluate "30 days ago" based on that particular day while current_date != new_until_date: # Update date range old_date = current_date - timedelta(days=180) previous_date = current_date - timedelta(days=1) # Copy datetime objects to date strings old_date_string = old_date.strftime('%Y-%m-%d') previous_date_string = previous_date.strftime('%Y-%m-%d') current_date_string = current_date.strftime('%Y-%m-%d') # For sales recorded on a given day, check if the document # date is unbelievable (too old or in the future) try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date < old_date_string ).update({"detail_publish": False}) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() try: with SESSION.begin_nested(): SESSION.query( Detail.document_recorded, Detail.document_date, Detail.detail_publish ).filter( Detail.document_recorded == current_date_string ).filter( Detail.document_date > previous_date_string ).update({ "detail_publish": False }) SESSION.flush() except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() current_date = current_date + timedelta(days=1)