def search(request, name): """ Respond to the "/geo/editor/location/search/XYZ" URL. We display a list of the locations which use the given name, if any. """ if not request.user.is_authenticated: return HttpResponseRedirect("/geo/editor/") if request.method == "POST": if request.POST.get("back") == "Back": # The user clicked on our "Back" button -> return to the main # location editor page. return HttpResponseRedirect("/geo/editor/location") # Find the locations matching the given name. name = helpers.tidy_name(name.upper()) locations = [] for nameRecord in Name.objects.filter(name=name): for loc_name in LocationName.objects.filter(name=nameRecord): loc = loc_name.location if loc not in locations: locations.append(loc) # Return the results back to the caller. return render_to_response("location_search.html", {'name' : name, 'locations' : locations}, context_instance=RequestContext(request))
def main(): """ Our main program. """ for name in Name.objects.all(): text = helpers.tidy_name(name.name) if name.name != text: # We've changed the name. print "'%s' -> '%s'" % (name.name, text) name.name = text name.save()
def parseUnstructuredLocation(location, source, log): """ Attempt to parse an unstructured location. The parameters are as follows: 'location' The text of the location to parse, as a string. 'source' The 3taps source code for the data source, or None if no source was specified. 'log' If we are in verbose mode, this will be a list of strings that we can append our debugging information onto. If we are not in verbose mode, this will be set to None. We attempt to parse the given text, identifying either a structured location or a geographic coordinate. As we parse the location, verbose logging information will be appended to the 'log' list, if it isn't None. Upon completion, we return a (type, value) tuple, where 'type' is one of the following: "structured" "coordinate" "unknown" The meaning of 'value' depends on the calculated type: * For unstructured locations which were parsed into structured locations, 'value' will be a dictionary with a combination of the following fields, containing the various parts of the location we identified: "country" "state" "metro" "region" "county" "city" "locality" "zipCode" * For geographic coordinates, 'value' will be a dictionary with "lat" and "long" entries, holding the parsed coordinate value. * For unknown locations, 'value' will be None. """ # Start by splitting the string into numbers and non-numbers. parts = [] # List of parts within the location. Each list item is a # [type, str] tuple, where 'type' is NUMBER or STRING and 'str' # is the string within this part of the location. i = 0 while True: match = NUMBER_SPLITTER.search(location, i) if match == None: # No more matches. if i < len(location)-1: parts.append([STRING, location[i:]]) break else: if match.start() > i: parts.append([STRING, location[i:match.start()]]) parts.append([NUMBER, location[match.start():match.end()]]) i = match.end() continue # If we have two numbers separated by a zero or one character string # delimiter, and both numbers are floating point values in the range -180 # to +180, assume we've got a lat/long coordinate. for i in range(len(parts)-2): if (parts[i][0] == NUMBER and parts[i+1][0] == STRING and parts[i+2][0] == NUMBER): num1 = parts[i][1] delimiter = parts[i+1][1] num2 = parts[i+2][1] if len(delimiter.strip()) < 2: if "." in num1 and "." in num2: n1 = float(num1) n2 = float(num2) if n1 >= -180 and n1 <= +180 and n2 >= -180 and n2 <= +180: # Success! We've found a lat/long coordinate. return ('coordinate', {'lat' : n1, 'long' : n2}) # If we get here, we need to identify the various parts of a structured # location. Start with an empty dictionary. structuredLoc = {} # If the last number in the string consists of a positive integer number # with at least four digits, see if it matches one of our existing ZIP # codes. If so, assume that the number is a ZIP code. lastNum = None for part in reversed(parts): if part[0] == NUMBER: lastNum = part[1] break if lastNum != None: if (("-" not in lastNum) and ("." not in lastNum) and (len(lastNum) >= 4)): try: loc = Location.objects.get(code="USA-"+lastNum) except Location.DoesNotExist: loc = None if loc != None: if log: log.append("Found ZIP code: " + lastNum) structuredLoc['zipCode'] = int(lastNum) # We now need to identify locations in the supplied string by name. Start # by replacing all punctuation with spaces. text = helpers.tidy_name(location) # Split the text into individual words. words = text.split() # Iterate over every contiguous combination of words, seeing if those words # define a known location name. If we find a known name, we remember that # name and the levels at which that name can appear. known_names = {} # Maps location name to a dictionary with the following # entries: # # 'start_index' # 'levels' # # where 'start_index' is the index into "words" where # this name started, and 'levels' is a dictionary mapping # a level number to a list of LocationName objects for # that level. index = 0 while index < len(words): # Try to find a match against a location name starting at the current # index. length = len(words) - index found = False while length > 0: s = " ".join(words[index:index+length]).upper() names = [] # List of (level, name_id) tuples. for level_num in range(1, 8): name_id = nameCache.search(level_num, s) if name_id != None: names.append((level_num, name_id)) if len(names) > 0: found = True break else: length = length - 1 if found: # We've found a name of length 'length' that matches at least one # known location. Remember it. known_names[s] = {'start_index' : index, 'levels' : {}} for level_num,name_id in names: known_names[s]['levels'][level_num] = [] for loc_name in LocationName.objects.filter(name=name_id): known_names[s]['levels'][level_num].append(loc_name) index = index + length else: # Keep trying. index = index + 1 # If we're in verbose mode, tell the caller which names we've identified. if log != None: log.append("The following possible names were found:") for name in sorted(known_names.keys()): for level,loc_names in known_names[name]['levels'].items(): for loc_name in loc_names: log.append(" '" + name + "' could be a " + loc_name.location.level.name + " entry for " + loc_name.location.name) # Starting at the highest level and working down, choose from the available # names at the level. If there is exactly one name at a given level, we # use that name; if there are multiple possible names for a given level, we # check the LocationName filters to see if any can be knocked out by the # values we've already calculated, hopefully resulting in just one # remaining name. If there are still multiple possible names for a level, # we don't set the name at all. used_names = set() # Set of names which we've stored into structuredLoc. used_loc_name = {} # Maps level number to LocationName for that level. for level_num in range(1, 8): level_name = level_num_to_name(level_num) possible_loc_names = [] # List of possible LocationNames at this level. for name in known_names.keys(): if name in used_names: continue # Don't use the same name twice. if level_num in known_names[name]['levels']: for loc_name in known_names[name]['levels'][level_num]: possible_loc_names.append(loc_name) if len(possible_loc_names) == 0: continue # No names at this level. elif len(possible_loc_names) == 1: # There's no ambiguity -> use the one possible name. loc_name = possible_loc_names[0] if log != None: log.append("Choosing " + str(loc_name.location) + " as the " + "only possible " + loc_name.name.level.name) structuredLoc[level_name] = loc_name.name.name used_loc_name[level_num] = loc_name used_names.add(loc_name.name.name) else: # We have multiple possible names -> see if we can't filter out any # names based on the information we've calculated thus far. for i in range(len(possible_loc_names)-1, -1, -1): loc_name = possible_loc_names[i] matches = True # initially. if loc_name.sourceFilter != None and source != None: if loc_name.sourceFilter.code != source: matches = False if loc_name.countryFilter != None and used_loc_name.has_key(1): if loc_name.countryFilter != used_loc_name[1].location: matches = False if loc_name.stateFilter != None and used_loc_name.has_key(2): if loc_name.stateFilter != used_loc_name[2].location: matches = False if loc_name.metroFilter != None and used_loc_name.has_key(3): if loc_name.metroFilter != used_loc_name[3].location: matches = False if loc_name.regionFilter != None and used_loc_name.has_key(4): if loc_name.regionFilter != used_loc_name[4].location: matches = False if loc_name.countyFilter != None and used_loc_name.has_key(5): if loc_name.countyFilter != used_loc_name[5].location: matches = False if loc_name.cityFilter != None and used_loc_name.has_key(6): if loc_name.cityFilter != used_loc_name[6].location: matches = False if not matches: # The filter doesn't match -> remove this LocationName from # the list of possibilies. if log != None: log.append("Excluding " + str(loc_name.location) + " as a possible "+loc_name.name.level.name + " because the filter values don't match.") del possible_loc_names[i] if len(possible_loc_names) == 1: # There's only one possible location name left -> use it. loc_name = possible_loc_names[0] if log != None: log.append("Choosing "+str(loc_name.location)+" as the " + "only remaining " + loc_name.name.level.name) structuredLoc[level_name] = loc_name.name.name used_loc_name[level_num] = loc_name used_names.add(loc_name.name.name) elif log != None: # Tell the caller the bad news. if len(possible_loc_names) == 0: log.append("Unable to choose a " + level_name.capitalize() + " entry because all" + " the possible locations were filtered out.") else: loc_names = [] for loc_name in possible_loc_names: loc_names.append(loc_name.location.name) if len(loc_names) == 2: s = loc_names[0] + " and " + loc_names[1] else: s = ", ".join(loc_names[:-1]) + " and " + loc_names[-1] log.append("Unable to choose between " + s + " for the " + level_name.capitalize() + " entry.") # If we couldn't identify anything, tell the caller the bad news. if len(structuredLoc) == 0: return ("unknown", None) # Finally, return the structured location back to the caller. return ("structured", structuredLoc)
def save_name(data, location, loc_name): """ Save the given location name into the database. The parameters are as follows: 'data' The entered form data, as a dictionary. 'location' The Location object we are entering a name for. 'loc_name' The existing LocationName record we are editing, if any. If we are adding a new name, this should be set to None. We save the given name into the database, either updating 'loc_name' or creating a new LocationName record as appropriate. """ # Get the Name object to use for this location name. If necessary, we # create a new Name object for this name. name_value = helpers.tidy_name(data['name'].upper()) try: name = Name.objects.get(level=location.level, name=name_value) except Name.DoesNotExist: name = Name() name.level = location.level name.name = name_value name.save() # If necessary, create a new LocationName object for this name. if loc_name == None: loc_name = LocationName() # Save the entered data into this location name. loc_name.name = name loc_name.location = location loc_name.sourceFilter = getSource(data['source_id']) if location.level.level >= 2: if data['country_field'] != "": loc_name.countryFilter = getLocation(data['country_id']) else: loc_name.countryFilter = None else: loc_name.countryFilter = None if location.level.level >= 3: if data['state_field'] != "": loc_name.stateFilter = getLocation(data['state_id']) else: loc_name.stateFilter = None else: loc_name.stateFilter = None if location.level.level >= 4: if data['metro_field'] != "": loc_name.metroFilter = getLocation(data['metro_id']) else: loc_name.metroFilter = None else: loc_name.metroFilter = None if location.level.level >= 5: if data['region_field'] != "": loc_name.regionFilter = getLocation(data['region_id']) else: loc_name.regionFilter = None else: loc_name.regionFilter = None if location.level.level >= 6: if data['county_field'] != "": loc_name.countyFilter = getLocation(data['county_id']) else: loc_name.countyFilter = None else: loc_name.countyFilter = None if location.level.level >= 7: if data['city_field'] != "": loc_name.cityFilter = getLocation(data['city_id']) else: loc_name.cityFilter = None else: loc_name.cityFilter = None loc_name.save() # Finally, send signals telling the rest of the system that the Name and # LocationName tables have been chagned. name_changed.send(sender=None) location_name_changed.send(sender=None)