def add_locations_from_collection(): # Update locations or insert new locations from events in db updated_locations = [] added_locations = [] # Can change what collection we get locations from new_locations = get_locations_from_collection() # For every location from events db for new_loc in new_locations: # Find location of same coordinates/name coord_loc = locations_collection.find_one( { 'location.latitude': new_loc['location'].get('latitude', INVALID_COORDINATE), 'location.longitude': new_loc['location'].get('longitude', INVALID_COORDINATE) }, {'_id': False}) alt_name_loc = None # Tokenize and remove unnecessary/common words place_name = new_loc['location'].get('name') if place_name: place_name = re.sub(r'(UCLA-|-UCLA)+\s?', '', place_name, flags=re.IGNORECASE) place_name = tokenizer.tokenize_text(place_name) processed_place = re.compile(place_name, re.IGNORECASE) alt_name_loc = locations_collection.find_one( {'location.alternative_names': processed_place}, {'_id': False}) # If there exists a pre-existing location with matching coordinates/name # Location already in db but missing info # Merge new info with db document if coord_loc or alt_name_loc: loc_result = None if coord_loc and not alt_name_loc: loc_result = location_processor.handle_keys( coord_loc, new_loc, place_name) else: loc_result = location_processor.handle_keys( alt_name_loc, new_loc, place_name, True) if loc_result: updated_locations.append(loc_result) else: # No pre-existing location so insert new location to db # Also add stripped version of name to location info if place_name and place_name != new_loc['location']['name'].lower( ): new_loc['location']['alternative_names'].append(place_name) added_locations.append(new_loc) locations_collection.insert_one(new_loc.copy()) return jsonify({ 'Added Locations': added_locations, 'Updated Locations': updated_locations })
def search_locations(place_query): output = [] output_places = [] # Supplied string such as "Boelter Hall" for a location print("Original place query: " + place_query) # Remove leading/trailing white space place_query = place_query.strip() # Search for exact match first # Sometimes regency village weighted more than sunset village due to repetition of village processed_query = location_helpers.process_query(place_query) print("Processed place query: " + processed_query) place_regex = re.compile("^" + processed_query + "$", re.IGNORECASE) places_cursor = locations_collection.find({'location.alternative_names': place_regex}) # Places that match the name are appended to output if places_cursor.count() > 0: for place in places_cursor: output.append(location_helpers.append_location(place)) output_places.append(place['location'].get('name', "NO NAME")) return output print("Doing text search...") # Tokenize query tokenized_query = tokenizer.tokenize_text(processed_query) print("Tokenized place query: " + tokenized_query) # Locations db has text search index on alternate_locations field # Search for locations that match words in processed place query # Default stop words for english language, case insensitive # Sort by score (based on number of occurances of query words in alternate names) # Can limit numer of results as well places_cursor = locations_collection.find( { '$text': { '$search': tokenized_query, '$language': 'english', '$caseSensitive': False } }, { 'score': { '$meta': 'textScore' } } ).sort([('score', { '$meta': 'textScore' })]) #.limit(3) # Places that match the alternate name are appended to output if not already # part of output if places_cursor.count() > 0: for place in places_cursor: # Check if already added by maintaining list of places added by name if place['location'].get('name', "NO NAME") not in output_places: output.append(location_helpers.append_location(place, True)) output_places.append(place['location'].get('name', "NO NAME")) return output
def tokenize_names(): places = [] updated = False # Go through every location in json for location in data['locations']: place = location if 'alternative_names' in place['location']: for alt_name in place['location']['alternative_names']: processed_name = tokenizer.tokenize_text(alt_name) if processed_name not in (name.lower() for name in place['location']['alternative_names']): if processed_name: place['location']['alternative_names'].append(processed_name) updated = True if updated: places.append(place) updated = False return jsonify({"locations": places})
def fill_location_data(): places = [] updated_places = [] updated = False # Go through every location in json for location in data['locations']: place = location # Add stripped down name to alternative_names if 'name' in location['location']: processed_place = tokenizer.tokenize_text(location['location']['name']) if 'alternative_names' in location['location']: if location['location']['name'].lower() not in (name.lower() for name in location['location']['alternative_names']): if location['location']['name']: place['location']['alternative_names'].append(location['location']['name']) updated = True if processed_place not in (name.lower() for name in location['location']['alternative_names']): if processed_place: place['location']['alternative_names'].append(processed_place) updated = True # No street or zip information, try to find it if 'street' not in location['location'] or 'zip' not in location['location'] or location['location']['street'] == '' or location['location']['zip'] == '': if 'name' in location['location']: # Use location name to try to find location info search_results = google_textSearch(location['location']['name']) if search_results: # Assume first result is best result/most relevant result # Set street to the address place['location']['street'] = search_results[0]['address'] # Extract zip code from address re_result = re.search(r'(\d{5}(\-\d{4})?)', place['location']['street']) if re_result: place['location']['zip'] = re_result.group(0) # Sometimes get 5 digit address numbers else: place['location']['zip'] = "NO ZIP" updated = True else: # Without a name, street, or zip cannot find out much about location # Is it even a location at this point lmao place['location']['street'] = "NO STREET" place['location']['zip'] = "NO ZIP" place['location']['name'] = "NO NAME" # Check if latitude/longitude is filled out (420 is default value) if 'latitude' not in location['location'] or 'longitude' not in location['location'] or location['location']['latitude'] == 420 or location['location']['longitude'] == 420: if 'name' in location['location']: # Use location name to try to find location info search_results = google_textSearch(location['location']['name']) if search_results: # If there are results see if there is a latitude/longitude if search_results[0]['latitude'] == "NO LATITUDE" or search_results[0]['longitude'] == "NO LONGITUDE": place['location']['latitude'] = 404 place['location']['longitude'] = 404 else: place['location']['latitude'] = search_results[0]['latitude'] place['location']['longitude'] = search_results[0]['longitude'] updated = True # If there is no name, see if there is street info elif 'street' in place['location'] and place['location']['street'] != "NO STREET" and place['location']['street'] != '': # Use name to try to find location info search_results = google_textSearch(place['location']['street']) if search_results: if search_results[0]['latitude'] == "NO LATITUDE" or search_results[0]['longitude'] == "NO LONGITUDE": place['location']['latitude'] = 404 place['location']['longitude'] = 404 else: place['location']['latitude'] = search_results[0]['latitude'] place['location']['longitude'] = search_results[0]['longitude'] updated = True else: # There was no name or street info, set to another junk value place['location']['latitude'] = 666 place['location']['longitude'] = 666 # If we want to keep track of all places from json data uncomment this # places.append(place) # Keep track of places with info that was actually updated if updated: updated_places.append(place) updated = False # Return json info on updated locations and/or all locations from json data # return jsonify({"locations": places, "changed locations": updated_places}) return jsonify({"New/Modified Locations": updated_places})