Example #1
0
def search(request, name):
    """ Respond to the "/geo/editor/location/search/XYZ" URL.

        We display a list of the locations which use the given name, if any.
    """
    if not request.user.is_authenticated:
        return HttpResponseRedirect("/geo/editor/")

    if request.method == "POST":
        if request.POST.get("back") == "Back":
            # The user clicked on our "Back" button -> return to the main
            # location editor page.
            return HttpResponseRedirect("/geo/editor/location")

    # Find the locations matching the given name.

    name      = helpers.tidy_name(name.upper())
    locations = []

    for nameRecord in Name.objects.filter(name=name):
        for loc_name in LocationName.objects.filter(name=nameRecord):
            loc = loc_name.location
            if loc not in locations:
                locations.append(loc)

    # Return the results back to the caller.

    return render_to_response("location_search.html",
                              {'name'      : name,
                               'locations' : locations},
                              context_instance=RequestContext(request))
Example #2
0
def main():
    """ Our main program.
    """
    for name in Name.objects.all():
        text = helpers.tidy_name(name.name)

        if name.name != text:
            # We've changed the name.
            print "'%s' -> '%s'" % (name.name, text)
            name.name = text
            name.save()
Example #3
0
def parseUnstructuredLocation(location, source, log):
    """ Attempt to parse an unstructured location.

        The parameters are as follows:

            'location'

                The text of the location to parse, as a string.
                
            'source'
            
                The 3taps source code for the data source, or None if no source
                was specified.
                
            'log'
            
                If we are in verbose mode, this will be a list of strings that
                we can append our debugging information onto.  If we are not in
                verbose mode, this will be set to None.

        We attempt to parse the given text, identifying either a structured
        location or a geographic coordinate.

        As we parse the location, verbose logging information will be appended
        to the 'log' list, if it isn't None.

        Upon completion, we return a (type, value) tuple, where 'type' is one
        of the following:

            "structured"
            "coordinate"
            "unknown"

        The meaning of 'value' depends on the calculated type:

            * For unstructured locations which were parsed into structured
              locations, 'value' will be a dictionary with a combination of the
              following fields, containing the various parts of the location we
              identified:

                  "country"
                  "state"
                  "metro"
                  "region"
                  "county"
                  "city"
                  "locality"
                  "zipCode"

            * For geographic coordinates, 'value' will be a dictionary with
              "lat" and "long" entries, holding the parsed coordinate value.

            * For unknown locations, 'value' will be None.
    """
    # Start by splitting the string into numbers and non-numbers.

    parts = [] # List of parts within the location.  Each list item is a
               # [type, str] tuple, where 'type' is NUMBER or STRING and 'str'
               # is the string within this part of the location.

    i = 0
    while True:
        match = NUMBER_SPLITTER.search(location, i)
        if match == None: # No more matches.
            if i < len(location)-1:
                parts.append([STRING, location[i:]])
            break
        else:
            if match.start() > i:
                parts.append([STRING, location[i:match.start()]])
            parts.append([NUMBER, location[match.start():match.end()]])
            i = match.end()
            continue

    # If we have two numbers separated by a zero or one character string
    # delimiter, and both numbers are floating point values in the range -180
    # to +180, assume we've got a lat/long coordinate.

    for i in range(len(parts)-2):
        if (parts[i][0] == NUMBER and parts[i+1][0] == STRING
                                  and parts[i+2][0] == NUMBER):
            num1      = parts[i][1]
            delimiter = parts[i+1][1]
            num2      = parts[i+2][1]
            if len(delimiter.strip()) < 2:
                if "." in num1 and "." in num2:
                    n1 = float(num1)
                    n2 = float(num2)
                    if n1 >= -180 and n1 <= +180 and n2 >= -180 and n2 <= +180:
                        # Success!  We've found a lat/long coordinate.
                        return ('coordinate', {'lat'  : n1,
                                               'long' : n2})

    # If we get here, we need to identify the various parts of a structured
    # location.  Start with an empty dictionary.

    structuredLoc = {}

    # If the last number in the string consists of a positive integer number
    # with at least four digits, see if it matches one of our existing ZIP
    # codes.  If so, assume that the number is a ZIP code.

    lastNum = None
    for part in reversed(parts):
        if part[0] == NUMBER:
            lastNum = part[1]
            break

    if lastNum != None:
        if (("-" not in lastNum) and ("." not in lastNum)
                                 and (len(lastNum) >= 4)):
            try:
                loc = Location.objects.get(code="USA-"+lastNum)
            except Location.DoesNotExist:
                loc = None

            if loc != None:
                if log:
                    log.append("Found ZIP code: " + lastNum)
                structuredLoc['zipCode'] = int(lastNum)

    # We now need to identify locations in the supplied string by name.  Start
    # by replacing all punctuation with spaces.

    text = helpers.tidy_name(location)

    # Split the text into individual words.

    words = text.split()

    # Iterate over every contiguous combination of words, seeing if those words
    # define a known location name.  If we find a known name, we remember that
    # name and the levels at which that name can appear.

    known_names = {} # Maps location name to a dictionary with the following
                     # entries:
                     #
                     #    'start_index'
                     #    'levels'
                     #
                     # where 'start_index' is the index into "words" where
                     # this name started, and 'levels' is a dictionary mapping
                     # a level number to a list of LocationName objects for
                     # that level.

    index = 0

    while index < len(words):

        # Try to find a match against a location name starting at the current
        # index.

        length = len(words) - index
        found  = False
        while length > 0:
            s = " ".join(words[index:index+length]).upper()

            names = [] # List of (level, name_id) tuples.
            for level_num in range(1, 8):
                name_id = nameCache.search(level_num, s)
                if name_id != None:
                    names.append((level_num, name_id))

            if len(names) > 0:
                found = True
                break
            else:
                length = length - 1

        if found:
            # We've found a name of length 'length' that matches at least one
            # known location.  Remember it.
            known_names[s] = {'start_index' : index,
                              'levels'      : {}}
            for level_num,name_id in names:
                known_names[s]['levels'][level_num] = []
                for loc_name in LocationName.objects.filter(name=name_id):
                    known_names[s]['levels'][level_num].append(loc_name)

            index = index + length
        else:
            # Keep trying.
            index = index + 1

    # If we're in verbose mode, tell the caller which names we've identified.

    if log != None:
        log.append("The following possible names were found:")
        for name in sorted(known_names.keys()):
            for level,loc_names in known_names[name]['levels'].items():
                for loc_name in loc_names:
                    log.append("    '" + name + "' could be a " +
                               loc_name.location.level.name + " entry for " +
                               loc_name.location.name)

    # Starting at the highest level and working down, choose from the available
    # names at the level.  If there is exactly one name at a given level, we
    # use that name; if there are multiple possible names for a given level, we
    # check the LocationName filters to see if any can be knocked out by the
    # values we've already calculated, hopefully resulting in just one
    # remaining name.  If there are still multiple possible names for a level,
    # we don't set the name at all.

    used_names    = set() # Set of names which we've stored into structuredLoc.
    used_loc_name = {}    # Maps level number to LocationName for that level.

    for level_num in range(1, 8):
        level_name = level_num_to_name(level_num)
        possible_loc_names = [] # List of possible LocationNames at this level.
        for name in known_names.keys():
            if name in used_names: continue # Don't use the same name twice.
            if level_num in known_names[name]['levels']:
                for loc_name in known_names[name]['levels'][level_num]:
                    possible_loc_names.append(loc_name)

        if len(possible_loc_names) == 0:
            continue # No names at this level.
        elif len(possible_loc_names) == 1:
            # There's no ambiguity -> use the one possible name.
            loc_name = possible_loc_names[0]
            if log != None:
                log.append("Choosing " + str(loc_name.location) + " as the " +
                           "only possible " + loc_name.name.level.name)
            structuredLoc[level_name] = loc_name.name.name
            used_loc_name[level_num]  = loc_name
            used_names.add(loc_name.name.name)
        else:
            # We have multiple possible names -> see if we can't filter out any
            # names based on the information we've calculated thus far.
            for i in range(len(possible_loc_names)-1, -1, -1):
                loc_name = possible_loc_names[i]
                matches  = True # initially.
                if loc_name.sourceFilter != None and source != None:
                    if loc_name.sourceFilter.code != source:
                        matches = False
                if loc_name.countryFilter != None and used_loc_name.has_key(1):
                    if loc_name.countryFilter != used_loc_name[1].location:
                        matches = False
                if loc_name.stateFilter != None and used_loc_name.has_key(2):
                    if loc_name.stateFilter != used_loc_name[2].location:
                        matches = False
                if loc_name.metroFilter != None and used_loc_name.has_key(3):
                    if loc_name.metroFilter != used_loc_name[3].location:
                        matches = False
                if loc_name.regionFilter != None and used_loc_name.has_key(4):
                    if loc_name.regionFilter != used_loc_name[4].location:
                        matches = False
                if loc_name.countyFilter != None and used_loc_name.has_key(5):
                    if loc_name.countyFilter != used_loc_name[5].location:
                        matches = False
                if loc_name.cityFilter != None and used_loc_name.has_key(6):
                    if loc_name.cityFilter != used_loc_name[6].location:
                        matches = False

                if not matches:
                    # The filter doesn't match -> remove this LocationName from
                    # the list of possibilies.
                    if log != None:
                        log.append("Excluding " + str(loc_name.location) +
                                   " as a possible "+loc_name.name.level.name +
                                   " because the filter values don't match.")
                    del possible_loc_names[i]

            if len(possible_loc_names) == 1:
                # There's only one possible location name left -> use it.
                loc_name = possible_loc_names[0]
                if log != None:
                    log.append("Choosing "+str(loc_name.location)+" as the " +
                               "only remaining " + loc_name.name.level.name)
                structuredLoc[level_name] = loc_name.name.name
                used_loc_name[level_num]  = loc_name
                used_names.add(loc_name.name.name)
            elif log != None:
                # Tell the caller the bad news.
                if len(possible_loc_names) == 0:
                    log.append("Unable to choose a " +
                               level_name.capitalize() + " entry because all" +
                               " the possible locations were filtered out.")
                else:
                    loc_names = []
                    for loc_name in possible_loc_names:
                        loc_names.append(loc_name.location.name)

                    if len(loc_names) == 2:
                        s = loc_names[0] + " and " + loc_names[1]
                    else:
                        s = ", ".join(loc_names[:-1]) + " and " + loc_names[-1]

                    log.append("Unable to choose between " + s + " for the " +
                               level_name.capitalize() + " entry.")

    # If we couldn't identify anything, tell the caller the bad news.

    if len(structuredLoc) == 0:
        return ("unknown", None)

    # Finally, return the structured location back to the caller.

    return ("structured", structuredLoc)
Example #4
0
def save_name(data, location, loc_name):
    """ Save the given location name into the database.

        The parameters are as follows:

            'data'

                The entered form data, as a dictionary.

            'location'

                The Location object we are entering a name for.

            'loc_name'

                The existing LocationName record we are editing, if any.  If we
                are adding a new name, this should be set to None.

        We save the given name into the database, either updating 'loc_name' or
        creating a new LocationName record as appropriate.
    """
    # Get the Name object to use for this location name.  If necessary, we
    # create a new Name object for this name.

    name_value = helpers.tidy_name(data['name'].upper())

    try:
        name = Name.objects.get(level=location.level, name=name_value)
    except Name.DoesNotExist:
        name = Name()
        name.level = location.level
        name.name  = name_value
        name.save()

    # If necessary, create a new LocationName object for this name.

    if loc_name == None:
        loc_name = LocationName()

    # Save the entered data into this location name.

    loc_name.name         = name
    loc_name.location     = location
    loc_name.sourceFilter = getSource(data['source_id'])

    if location.level.level >= 2:
        if data['country_field'] != "":
            loc_name.countryFilter = getLocation(data['country_id'])
        else:
            loc_name.countryFilter = None
    else:
        loc_name.countryFilter = None

    if location.level.level >= 3:
        if data['state_field'] != "":
            loc_name.stateFilter = getLocation(data['state_id'])
        else:
            loc_name.stateFilter = None
    else:
        loc_name.stateFilter = None

    if location.level.level >= 4:
        if data['metro_field'] != "":
            loc_name.metroFilter = getLocation(data['metro_id'])
        else:
            loc_name.metroFilter = None
    else:
        loc_name.metroFilter = None

    if location.level.level >= 5:
        if data['region_field'] != "":
            loc_name.regionFilter = getLocation(data['region_id'])
        else:
            loc_name.regionFilter = None
    else:
        loc_name.regionFilter = None

    if location.level.level >= 6:
        if data['county_field'] != "":
            loc_name.countyFilter = getLocation(data['county_id'])
        else:
            loc_name.countyFilter = None
    else:
        loc_name.countyFilter = None

    if location.level.level >= 7:
        if data['city_field'] != "":
            loc_name.cityFilter = getLocation(data['city_id'])
        else:
            loc_name.cityFilter = None
    else:
        loc_name.cityFilter = None

    loc_name.save()

    # Finally, send signals telling the rest of the system that the Name and
    # LocationName tables have been chagned.

    name_changed.send(sender=None)
    location_name_changed.send(sender=None)