def log_request(request, user, authorized, database=None): """Logs the API request to the database for security monitoring and analyzing user metrics Parameters ---------- request : request The flask request object for the API call user : str The user who made the API call. Pulled from the JWT. authorized : boolean Indicates whether of not the user was authorized to access the end point. database : shir_connect.database.database A Shir Connect database object. Primarily used for testing Returns ------- Logs information about the request to the Postgres database. """ # Don't write logs to the table during unit tests or development if conf.SHIR_CONNECT_ENV in ['DEV', 'TEST']: return None else: if not database: database = Database(database='postgres', schema='application_logs') # By default, the remote_addr attribute on the Flask request object # if the IP address of the referrer, which in our case is NGINX. We # configure NGINX for put the real remote_addr in the header so we're # able to track it. remote_addr = request.environ.get('HTTP_X_FORWARDED_FOR', request.remote_addr) item = { 'id': uuid.uuid4().hex, 'application_user': user, 'authorized': authorized, 'base_url': request.base_url, 'endpoint': request.endpoint, 'host': request.host, 'host_url': request.host_url, 'query_string': request.query_string.decode('utf-8'), 'referrer': request.referrer, 'remote_addr': remote_addr, 'scheme': request.scheme, 'url': request.url, 'url_root': request.url_root, 'user_agent': str(request.user_agent), 'load_datetime': datetime.datetime.now() } database.load_item(item, 'shir_connect_logs')
class ParticipantMatcher: def __init__(self, database=None): daiquiri.setup(level=logging.INFO) self.logger = daiquiri.getLogger(__name__) self.database = Database() if not database else database self.name_resolver = NameResolver(database=self.database) self.participants = Participants(database=self.database) self.avg_event_age = {} def run(self, limit=1000, iters=None): """Adds attendees that have not been matched up to a participant to the look up table. If there are no matches for an attendee, a new participant id is created.""" n = 0 while True: missing_attendees = self._get_missing_attendees(limit=limit) count = len(missing_attendees) if (iters and n >= iters) or count == 0: msg = 'Participant matcher has finished processing.' self.logger.info(msg) break msg = 'Iteration {} | Processing {} missing attendees' self.logger.info(msg.format(n + 1, count)) for i in range(count): attendee = dict(missing_attendees.loc[i]) if attendee['first_name'] and attendee['last_name']: self._process_attendee(attendee) n += 1 def estimate_unknown_ages(self): """Finds estimated ages for any participant whose age is unknown or who has an estimated age.""" unknowns = self._get_unknown_ages() for i, unknown in enumerate(unknowns): if i % 1000 == 0: msg = 'Estimated age for {} participants.'.format(i) self.logger.info(msg) estimated_age = self._estimate_participant_age(unknown['id']) if not estimated_age: continue now = datetime.datetime.now() estimated_birth_date = now - datetime.timedelta( estimated_age * 365) estimated_birth_date = "'{}'".format( str(estimated_birth_date)[:10]) self.database.update_column(table='participant_match', item_id=unknown['id'], column='birth_date', value=estimated_birth_date) self.database.update_column(table='participant_match', item_id=unknown['id'], column='is_birth_date_estimated', value=True) def _process_attendee(self, attendee): """Adds a link to attendee_to_participant if the attendee has a match. Otherwise a new participant id is created for the attendee.""" # Cache the average age for the event so it # doesn't have to pull it from the database each time event_id = attendee['event_id'] if event_id not in self.avg_event_age: age = self._get_avg_event_age(event_id) self.avg_event_age[event_id] = age else: age = self.avg_event_age[event_id] match = self.name_resolver.find_best_match( first_name=attendee['first_name'], last_name=attendee['last_name'], email=attendee['email'], age=age) if match: participant_id = match['id'] else: # If there is no participant match, a new participant # is created and added to the database participant_id = uuid.uuid4().hex participant = { 'id': participant_id, 'first_name': attendee['first_name'], 'last_name': attendee['last_name'], 'email': attendee['email'] } self.database.load_item(participant, 'participant_match') # Insert the attendee to participant match to the database item = {'id': attendee['id'], 'participant_id': participant_id} self.database.load_item(item, 'attendee_to_participant') def _get_missing_attendees(self, limit=1000): """Pulls a list of attendees that have not yet been matched to a participant.""" sql = """ SELECT id, event_id, first_name, last_name, email FROM {schema}.attendees WHERE id NOT IN (SELECT id FROM {schema}.attendee_to_participant) AND first_name IS NOT NULL AND last_name IS NOT NULL ORDER BY event_id ASC LIMIT {limit} """.format(schema=self.database.schema, limit=limit) df = pd.read_sql(sql, self.database.connection) return df def _get_avg_event_age(self, event_id): """Computes the average age of the attendees of an event.""" if not isinstance(event_id, list): event_id = [str(event_id)] else: event_id = [str(x) for x in event_id] sql = """ SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY avg_age) as avg_age FROM( SELECT event_id, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY z.age) as avg_age FROM {schema}.attendees x INNER JOIN {schema}.attendee_to_participant y ON x.id = y.id INNER JOIN {schema}.participants z ON y.participant_id = z.participant_id WHERE event_id = ANY(%(event_id)s) AND z.age IS NOT NULL GROUP BY event_id ) a """.format(schema=self.database.schema, event_id=event_id) df = self.database.fetch_df(sql, params={'event_id': event_id}) avg_age = None if len(df) > 0: avg_age = df.loc[0]['avg_age'] return avg_age def _estimate_participant_age(self, participant_id): """Estimates a participants age based on who they've attended events with.""" events = self.participants.get_participant_events(participant_id) if len(events) == 0: return None else: event_id = [x['event_id'] for x in events] age = self._get_avg_event_age(event_id) return age def _get_unknown_ages(self): """Pulls all participant ids that have a null date or and estimated date.""" sql = """ SELECT id FROM {schema}.participant_match WHERE is_birth_date_estimated = TRUE OR birth_date IS NULL """.format(schema=self.database.schema) df = pd.read_sql(sql, self.database.connection) results = self.database.to_json(df) return results
class EventbriteLoader(object): """Loads data from Eventbrite into Postgres """ def __init__(self, eventbrite_org, database=None): daiquiri.setup(level=logging.INFO) self.logger = daiquiri.getLogger(__name__) self.database = Database(database=database) self.eventbrite = Eventbrite() self.eventbrite_org = eventbrite_org def run(self, test=False): """ Runs the data load process """ last_load_date = self.database.last_event_load_date() if last_load_date: look_back = datetime.datetime.now() - datetime.timedelta(days=1) first_event = min(look_back, last_load_date) start = str(first_event)[:10] self.logger.info('Loading events starting at %s' % (start)) else: self.logger.info('Loading events from the first available event') start = None events = self.get_events(start=start, page=1) num_events = events['pagination']['object_count'] if num_events > 0: self.logger.info('There are %s events to process' % (num_events)) else: self.logger.info('There are not next events. Exiting') return more_events = True while more_events: for event in events['events']: if not event: continue msg = "Loading information for %s" % (event['name']['text']) self.logger.info(msg) # Load the event into the database. Delete the current # entry in order to maintain the unique index event_id = event['id'] if not test: self.database.delete_item('events', event_id) self.load_event(event) # Load the venue, if it does not already # appear in the database venue_id = event['venue_id'] venue_ = self.database.get_item('venues', venue_id) if venue_id and not venue_: venue = self.get_venue(venue_id) if not test: self.load_venue(venue) attendees = self.get_attendees(event_id, page=1) more_attendees = True while more_attendees: if not attendees: break for attendee in attendees['attendees']: if not attendee: continue if not test: self.database.delete_item('attendees', attendee['id'], {'event_id': event_id}) self.load_attendee(attendee) if test or not attendees['pagination']['has_more_items']: more_attendees = False break else: page = attendees['pagination']['page_number'] + 1 attendees = self.get_attendees(event_id, page) # Sleep to avoid the Eventbrite rate limit if test: return else: time.sleep(60) if not events['pagination']['has_more_items']: more_events = False break else: page = events['pagination']['page_number'] + 1 msg = 'Pulling events on page %s' % (page) self.logger.info(msg) events = self.get_events(start, page) def get_events(self, start, page=1): """ Pulls events from eventbrite and sleeps if the rate limit has been exceeded """ org_id = self.eventbrite_org events = self.eventbrite.get_events(org_id=org_id, start=start, page=page) if not events: # Sleep until eventbrite resets self.logger.info('Rate limit exceed. Sleeping 30 mins') time.sleep(3600) events = self.eventbrite.get_events(start=start, page=page) return events def get_attendees(self, event_id, page=1): """ Pulls attendees from eventbrite and sleeps if the rate limit has been exceeded """ attendees = self.eventbrite.get_attendees(event_id, page) if not attendees: # If events comes back as none, sleep until the # Eventbrite rate limit resets self.logger.info('Rate limit exceed. Sleeping 30 mins') time.sleep(3600) attendees = self.eventbrite.get_attendees(event_id, page) return attendees def get_venue(self, venue_id, page=1): """ Pull a venue and sleeps if the rate limit has been exceeded """ venue = self.eventbrite.get_venue(venue_id, page) if not venue: self.logger.info('Rate limit exceed. Sleeping 30 mins') time.sleep(3600) venue = self.eventbrite.get_venue(event_id, page) return venue def load_event(self, event): """ Loads an event into the database """ event_ = deepcopy(event) start = arrow.get(event_['start']['utc']).datetime event_['start_datetime'] = start end = arrow.get(event_['end']['utc']).datetime event_['end_datetime'] = end description = event_['description']['text'] event_['description'] = description name = event_['name']['text'] event_['name'] = name event_['load_datetime'] = datetime.datetime.utcnow() self.database.load_item(event_, 'events') def load_attendee(self, attendee): """ Loads an attendee into the database """ attendee_ = deepcopy(attendee) profile = attendee_['profile'] if 'name' in profile: attendee_['name'] = profile['name'] if 'first_name' in profile: attendee_['first_name'] = profile['first_name'] if 'last_name' in profile: attendee_['last_name'] = profile['last_name'] if 'email' in profile: attendee_['email'] = profile['email'] cost = attendee_['costs']['gross']['major_value'] attendee_['cost'] = float(cost) attendee_['load_datetime'] = datetime.datetime.utcnow() self.database.load_item(attendee_, 'attendees') def load_order(self, order): """ Loads an order into the database """ order_ = deepcopy(order) cost = order_['costs']['gross']['major_value'] order_['cost'] = float(cost) order_['load_datetime'] = datetime.datetime.utcnow() self.database.load_item(order_, 'orders') def load_venue(self, venue): """ Loads a venue into the database """ venue_ = deepcopy(venue) for key in venue_['address']: val = venue_['address'][key] venue_[key] = val venue_['latitude'] = float(venue_['latitude']) venue_['longitude'] = float(venue_['longitude']) self.database.load_item(venue_, 'venues')
class MM2000: def __init__(self, database=None): daiquiri.setup(level=logging.INFO) self.logger = daiquiri.getLogger(__name__) # Load column mapping configs self.path = os.path.dirname(os.path.realpath(__file__)) filename = self.path + '/member_columns.yml' with open(filename, 'r') as f: self.column_mapping = yaml.safe_load(f) self.database = Database() if not database else database self.fake_news = FakeNews(database=self.database) ##################################### # Methods for loading MM2000 members ##################################### def load(self, df): """ Loads the data in to the member database """ self.logger.info('Parsing MM2000 data.') items = self.parse_mm2000(df) self.logger.info('Backing up current member table.') self.database.backup_table('members') self.logger.info('Truncating current member table.') self.database.truncate_table('members') self.logger.info('Loading updated member data.') for item in items: self.database.load_item(item, 'members') self.logger.info('Checking updated columns.') good_columns = self.check_columns() if good_columns: self.logger.info('Generating demo data') self.fake_news.fake_names() self.logger.info('Refreshing materialized views.') self.database.refresh_view('members_view') self.database.refresh_view('participants') else: self.logger.warning('Column mismatch in upload') self.database.revert_table('members') return False return True def parse_mm2000(self, df): """ Converts the MM2000 export into a list of rows """ column_mapping = self.column_mapping['MM2000'] items = [] for group in column_mapping: column_map = column_mapping[group]['columns'] df_group = _group_mm2000(df, column_map) if 'id_extension' in column_mapping[group]: id_extension = column_mapping[group]['id_extension'] else: id_extension = None for i in df_group.index: item = dict(df_group.loc[i]) item = _parse_postal_code(item) item = _check_mm2000_active(item) # ID extension for children and spouses # since a family shares the same id item['household_id'] = item['id'] if id_extension: item['id'] += id_extension # Remove invalid birthdates item = _parse_mm2000_date(item, 'birth_date') item = _parse_mm2000_date(item, 'membership_date') # Skip if the member is under the minimum age # that we keep in the database too_young = utils.check_age(item['birth_date'], min_age=18) if too_young: continue # Children only have a full name, not separate # first names and last name if 'first_name' not in item and item['full_name']: item['first_name'] = item['full_name'].split()[0] if 'last_name' not in item and item['full_name']: item['last_name'] = item['full_name'].split()[0] if not item['first_name'] or not item['last_name']: continue else: items.append(item) return items def check_columns(self): """ Checks to make sure the columns are the same in the new table """ new_columns = self.database.get_columns('members') old_columns = self.database.get_columns('members_backup') for column in new_columns: if column not in old_columns: return False return True ########################################### # Methods for handling MM2000 resignations ########################################### def load_resignations(self, df): """Loads MM2000 resignation data into the database.""" _validate_resignation_data(df) # Map the file column names to the databse column names df = df.rename(columns=self.column_mapping['MM2000 Resignations']) # Drop any rows where the resignation date is null df = df.dropna(axis=0, how='any', subset=['resignation_date']) for i in df.index: member = dict(df.loc[i]) member = _parse_mm2000_date(member, 'resignation_date') resignation_date = str(member['resignation_date'])[:10] # TODO: This logic is specific to TRS because that's how they # track people who rejoined the congregation. We may have to # update this if another client uses MM2000 if 'Comment1' in member: if 'rejoin' in str(member['Comment1']).lower(): resignation_date = None if 'Comment2' in member: if 'rejoin' in str(member['Comment2']).lower(): resignation_date = None if resignation_date: resignation_date = "'{}'".format(resignation_date) sql = """ UPDATE {schema}.members SET resignation_date = {resignation_date} WHERE (household_id = '{member_id}' OR id = '{member_id}') """.format(schema=self.database.schema, resignation_date=resignation_date, member_id=member['id']) self.database.run_query(sql) reason = _find_resignation_reason(member['resignation_reason']) sql = """ UPDATE {schema}.members SET resignation_reason = '{reason}' WHERE (household_id = '{member_id}' OR id = '{member_id}') """.format(schema=self.database.schema, reason=reason, member_id=member['id']) self.database.run_query(sql) self.database.refresh_views()
class FakeNews: """A class for generating and uploading fake Shir Connect data.""" def __init__(self, database=None): daiquiri.setup(level=logging.INFO) self.logger = daiquiri.getLogger(__name__) self.database = Database() if not database else database self.faker = Faker() self.postal_codes = None def build_fake_data(self): """ Generates fake data for the database. """ self.fake_names() self.fake_events() self.fake_venues() self.database.refresh_views() def fake_events(self): """Generates fake events for the events table.""" event_ids = self._get_events() prefixes = [x for x in conf.EVENT_GROUPS] prefixes.append(None) for i, event_id in enumerate(event_ids): if i % 1000 == 0: msg = 'Generated fake names for {} events.'.format(i) self.logger.info(msg) fake_name = self._random_name(max_size=5) fake_name = fake_name.replace("'", '') np.random.shuffle(prefixes) prefix = prefixes[0] if prefix: fake_name = prefix + ': ' + fake_name fake_descr = self._random_paragraph(max_size=15) self.database.update_column(table='events', item_id=event_id, column='fake_name', value="'{}'".format(fake_name)) self.database.update_column(table='events', item_id=event_id, column='fake_description', value="'{}'".format(fake_descr)) def fake_venues(self): """Generates fake venues for the venues table.""" venue_ids = self._get_venues() for i, venue_id in enumerate(venue_ids): if i % 1000 == 0: msg = 'Generated fake names for {} venues.'.format(i) self.logger.info(msg) fake_name = self._random_name(max_size=2) self.database.update_column(table='venues', item_id=venue_id, column='fake_name', value="'{}'".format(fake_name)) def fake_names(self): """Generates fake names for the attendees, members, and orders table. """ participant_ids = self._get_participants() for i, participant_id in enumerate(participant_ids): if i % 1000 == 0: msg = 'Generated fake names for {} participants.'.format(i) self.logger.info(msg) fake_first_name = "'{}'".format(self.faker.first_name()) fake_last_name = "'{}'".format(self.faker.last_name()) fake_email = "'{}'".format(self.faker.email()) self.database.update_column(table='participant_match', item_id=participant_id, column='fake_first_name', value=fake_first_name) self.database.update_column(table='participant_match', item_id=participant_id, column='fake_nickname', value=fake_first_name) self.database.update_column(table='participant_match', item_id=participant_id, column='fake_last_name', value=fake_last_name) self.database.update_column(table='participant_match', item_id=participant_id, column='fake_email', value=fake_email) def fake_birthday(self, min_age=18, max_age=85): """Generates a fake birthday that can be used in the demo version. Parameters ---------- min_age : int the minimum age of the random birthday max_age : int the maximum age of the random birthday Returns ------- birthday : datetime.datetime """ start_date = "-{}y".format(max_age) end_date = "-{}y".format(min_age) birthday = self.faker.date_between(start_date=start_date, end_date=end_date) return birthday def fake_membership_date(self, max_age=30): """Generates a fake membership date for the database Parameters ---------- max_age : int the maximum age of the random birthday Returns ------- birthday : datetime.datetime """ start_date = "-{}y".format(max_age) membership_date = self.faker.date_between(start_date=start_date, end_date="today") return membership_date # def fake_members(self, num_members): """Generates fake members and loads them into the databse Parameters ---------- num_members : int the number of fake members to create Returns ------- Loads the fake members into the demo database """ for i in range(num_members): member = self._fake_member(i) self.database.load_item(member, 'members') def _fake_member(self, identifier): """Generates a fake member with all of the attributes required in the member table""" member = {} member['id'] = identifier member['household_id'] = identifier member['member_family'] = 'Y' # Generate a male or female name with 50% probability female = np.random.random() < .5 if female: first_name = self.faker.first_name_female() gender = 'F' else: first_name = self.faker.first_name_male() gender = 'M' member['first_name'] = first_name member['nickname'] = first_name member['last_name'] = self.faker.last_name() member['gender'] = gender member['email'] = self.faker.email() member['postal_code'] = self._postal_code() member['birth_date'] = self.fake_birthday() membership_date = self.fake_membership_date() member['membership_date'] = membership_date # Make the member Jewish with 90% probability jewish = np.random.random() < .9 religion = 'Jewish' if jewish else 'Not Jewish' member['member_religion'] = religion member['resignation_date'] = self._resignation_date(membership_date) reasons = ['Too far', 'Finished bar mitzvah', 'Moved', 'Lost interest'] if member['resignation_date']: resignation_reason = np.random.choice(reasons) else: resignation_reason = None member['resignation_reason'] = resignation_reason active = np.random.random() < .9 member['active_member'] = active and not member['resignation_date'] membership_types = ['MEMFAM', 'MEMIND', 'STAFF'] member['member_type'] = np.random.choice(membership_types, p=[.75, .2, .05]) return member def _resignation_date(self, membership_date, prob=.1): """Generates a fake resignation date with probability prob.""" # Make the member resign with 10% probability resigned = np.random.random() < .1 resignation_date = None if resigned: today = datetime.datetime.now() mem_dt = datetime.datetime.combine(membership_date, datetime.datetime.min.time()) max_age = int(np.floor((today - mem_dt).days / 365)) if max_age > 0: resignation_date = self.fake_membership_date(max_age) return resignation_date def _get_participants(self): """Pulls a list of participants who need fake names.""" sql = """ SELECT id FROM {schema}.participant_match WHERE fake_first_name IS NULL OR fake_last_name IS NULL OR fake_nickname IS NULL """.format(schema=self.database.schema) df = pd.read_sql(sql, self.database.connection) participant_ids = list(df['id'].unique()) return participant_ids def _postal_code(self): """Generates a fake postal code, weighted by numerical. distance from the default postal_code.""" if not isinstance(self.postal_codes, pd.DataFrame): default = conf.DEFAULT_LOCATION['postal_code'] postal_codes = self.database.read_table('geometries', ['id']) postal_codes['weights'] = (postal_codes['id'].astype(int) - default)**2 postal_codes.sort_values('weights', ascending=True, inplace=True) postal_codes.reset_index(drop=True, inplace=True) postal_codes = postal_codes[:100] postal_codes['weights'] = (postal_codes['weights'].max() - postal_codes['weights']) postal_codes['weights'] = (postal_codes['weights'] / postal_codes['weights'].sum()) self.postal_codes = postal_codes return np.random.choice(self.postal_codes['id'], p=self.postal_codes['weights']) def _get_events(self): """Pulls a list of events that need fake names.""" sql = """ SELECT id FROM {schema}.events WHERE fake_name IS NULL """.format(schema=self.database.schema) df = pd.read_sql(sql, self.database.connection) event_ids = list(df['id'].unique()) return event_ids def _get_venues(self): """Pulls a list of events that need fake names.""" sql = """ SELECT id FROM {schema}.venues WHERE fake_name IS NULL """.format(schema=self.database.schema) df = pd.read_sql(sql, self.database.connection) venue_ids = list(df['id'].unique()) return venue_ids def _random_name(self, max_size=2): """Generates a random name for events and venues.""" phrase = self.faker.catch_phrase().split() short_phrase = ' '.join(phrase[:max_size]) letters = list(short_phrase) random.shuffle(letters) name = ''.join(letters) return name.title() def _random_paragraph(self, max_size=15): """Generates a random sentence for event descriptions.""" sentences = self.faker.sentences(max_size) scrambled_sentences = [] for sentence in sentences: sentence_list = list(sentence) random.shuffle(sentence_list) scrambled_sentence = ''.join(sentence_list).replace('.', '') scrambled_sentences.append(scrambled_sentence.capitalize()) paragraph = '. '.join(scrambled_sentences) return paragraph
class Geometries(object): """ Class for parsing and loading geojson files """ def __init__(self, database=None): daiquiri.setup(level=logging.INFO) self.logger = daiquiri.getLogger(__name__) self.database = Database(database=database) self.path = os.path.dirname(os.path.realpath(__file__)) self.url = 'https://www.zip-codes.com/cache/kml-zip/' self.search = SearchEngine(simple_zipcode=True) self.zip_code_cache = {} def load_all_zip_codes(self): """Loads all zipcodes geometries into the database. Pauses five seconds betwen loading each zipcode to avoid overwhelming the site we download the geometries from.""" valid_zip_codes = self.get_all_zip_codes() for code in valid_zip_codes: try: self.logger.info("Loading geometry for {}".format(code)) self.load_zip_code(code) except: self.logger.warning('Geojson load failed for {}'.format(code)) time.sleep(5) def get_kml(self, zip_code): """ Pulls the KML file for a zip code """ url = self.url + '%s.kml' % (zip_code) response = requests.get(url) if response.status_code == 200: return response.text else: return None def get_all_zip_codes(self): """Creates a list of valid zip codes by checking them against the US zip code search object.""" possible_zip_codes = [str(i).rjust(5, '0') for i in range(99999)] valid_zip_codes = [] for code in possible_zip_codes: results = self.get_zipcode_data(code) if results and results['zipcode'] != None: valid_zip_codes.append(code) return valid_zip_codes def load_zip_code(self, zip_code): """ Pulls a zip code and loads it into the database """ # Fetch the KML file from the resource kml = self.get_kml(zip_code) if not kml: return filename = self.path + '/temp/%s.kml' % (zip_code) with open(filename, 'w') as f: f.write(kml) # Convert the KML file to GeoJSON kml2geojson.main.convert(filename, self.path + '/temp') # Load the file into the database geo_filename = self.path + '/temp/%s.geojson' % (zip_code) with open(geo_filename, 'r') as f: geo_json = json.load(f) zipcode_data = self.get_zipcode_data(zip_code) row = { 'id': zip_code, 'geometry': json.dumps(geo_json), 'city': zipcode_data['major_city'], 'county': zipcode_data['county'], 'region': zipcode_data['state'] } self.database.delete_item('geometries', zip_code) self.database.load_item(row, 'geometries') # Delete the temporary files os.remove(filename) os.remove(geo_filename) def get_zipcode_data(self, zipcode): """Pulls the city and county name for the specified zipcode.""" if zipcode in self.zip_code_cache: return self.zip_code_cache[zipcode] else: results = self.search.by_zipcode(zipcode) if results.zipcode: zipcode_data = results.to_dict() else: zipcode_data = None # Cache city and state information for later so we don't # have to search against the search engine again self.zip_code_cache[zipcode] = zipcode_data return zipcode_data