Example #1
0
def main():
    """ Our main program.
    """
    for name in Name.objects.all():
        text = helpers.tidy_name(name.name)

        if name.name != text:
            # We've changed the name.
            print "'%s' -> '%s'" % (name.name, text)
            name.name = text
            name.save()
Example #2
0
File: names.py Project: 3taps/geo
def save_name(data, location, loc_name):
    """ Save the given location name into the database.

        The parameters are as follows:

            'data'

                The entered form data, as a dictionary.

            'location'

                The Location object we are entering a name for.

            'loc_name'

                The existing LocationName record we are editing, if any.  If we
                are adding a new name, this should be set to None.

        We save the given name into the database, either updating 'loc_name' or
        creating a new LocationName record as appropriate.
    """
    # Get the Name object to use for this location name.  If necessary, we
    # create a new Name object for this name.

    name_value = helpers.tidy_name(data['name'].upper())

    try:
        name = Name.objects.get(level=location.level, name=name_value)
    except Name.DoesNotExist:
        name = Name()
        name.level = location.level
        name.name  = name_value
        name.save()

    # If necessary, create a new LocationName object for this name.

    if loc_name == None:
        loc_name = LocationName()

    # Save the entered data into this location name.

    loc_name.name         = name
    loc_name.location     = location
    loc_name.sourceFilter = getSource(data['source_id'])

    if location.level.level >= 2:
        if data['country_field'] != "":
            loc_name.countryFilter = getLocation(data['country_id'])
        else:
            loc_name.countryFilter = None
    else:
        loc_name.countryFilter = None

    if location.level.level >= 3:
        if data['state_field'] != "":
            loc_name.stateFilter = getLocation(data['state_id'])
        else:
            loc_name.stateFilter = None
    else:
        loc_name.stateFilter = None

    if location.level.level >= 4:
        if data['metro_field'] != "":
            loc_name.metroFilter = getLocation(data['metro_id'])
        else:
            loc_name.metroFilter = None
    else:
        loc_name.metroFilter = None

    if location.level.level >= 5:
        if data['region_field'] != "":
            loc_name.regionFilter = getLocation(data['region_id'])
        else:
            loc_name.regionFilter = None
    else:
        loc_name.regionFilter = None

    if location.level.level >= 6:
        if data['county_field'] != "":
            loc_name.countyFilter = getLocation(data['county_id'])
        else:
            loc_name.countyFilter = None
    else:
        loc_name.countyFilter = None

    if location.level.level >= 7:
        if data['city_field'] != "":
            loc_name.cityFilter = getLocation(data['city_id'])
        else:
            loc_name.cityFilter = None
    else:
        loc_name.cityFilter = None

    loc_name.save()
Example #3
0
def check_location(location):
    """ Check the given location to see that it has the correct fields.

        'location' should be a dictionary with some combination of the
        following fields:

            "text"
            "country"
            "state"
            "metro"
            "region"
            "county"
            "city"
            "locality"
            "zipCode"
            "lat"
            "long"
            "ref"

        We check to see that the combination of fields makes up a valid
        unstructured location, a valid structured location, or a valid
        geographic coordinate.

        Upon completion, we return a dictionary containing the checked
        location's details.  This dictionary will have the following entries:
        
            'type'

                The type of location.  One of:

                    "unstructured"
                    "structured"
                    "coordinate"
                    "error"

            'value'

                The location's details.  The meaning of this field will depend
                on the location's type:

                    * For an unstructured location, the value will be the
                      unstructured location's text.

                    * For a structured location, the value will be a dictionary
                      with any combination of the following fields:

                          "country"
                          "state"
                          "metro"
                          "region"
                          "county"
                          "city"
                          "locality"
                          "zipCode"
                        
                    * For a geographic coordinate, the value will be a (lat,
                      long) tuple containing the geographic coordinate, where
                      'lat' and 'long' are both floating-point numbers.

                    * For errors, the value will be a dictionary with 'code'
                      and 'message' entries, describing what is wrong with the
                      location.

            'ref'

                
                The supplied 'ref' value for this location, or None if no
                reference value was supplied for this location.
    """
    # Testing:

#    import os.path
#    log_name = os.path.normpath(os.path.join(os.path.dirname(__file__),
#                                             "..", "geolocator-log.txt"))
#    f = open(log_name, "ab")
#    f.write(repr(location) + "\n")
#    f.close()

    # End of testing.

    # See if this is an unstructured location.

    if "text" in location:
        return dict(type="unstructured",
                    value=location['text'],
                    ref=location.get("ref"))

    # See if this is a geographic coordinate.

    if ("lat" in location) or ("long" in location):
        try:
            lat = float(location['lat'])
        except KeyError:
            return dict(type="error",
                        value=dict(code=400,
                                   message="Missing latitude value."),
                        ref=location.get("ref"))
        except ValueError:
            return dict(type="error",
                        value=dict(code=400,
                                   message="Invalid latitude value."),
                        ref=location.get("ref"))

        try:
            long = float(location['long'])
        except KeyError:
            return dict(type="error",
                        value=dict(code=400,
                                   message="Missing longitude value."),
                        ref=location.get("ref"))
        except ValueError:
            return dict(type="error",
                        value=dict(code=400,
                                   message="Invalid longitude value."),
                        ref=location.get("ref"))

        return dict(type="coordinate",
                    value={'lat' : lat, 'long' : long},
                    ref=location.get("ref"))

    # If we get here, assume that it's a structured location.

    loc = {}
    for field in ["country", "state", "metro", "region", "county",
                  "city", "locality", "zipCode"]:
        if field in location:
            loc[field] = helpers.tidy_name(location[field])

    if len(loc) == 0:
        # Oops...no supplied values.
        return dict(type="error",
                    value=dict(code=400, message="Badly formed location."),
                    ref=location.get("ref"))

    return dict(type="structured", value=loc, ref=location.get("ref"))
Example #4
0
File: views.py Project: 3taps/geo
def search(request, selector_id):
    """ Respond to the "/search" URL.

        We perform searches by location code or location name.
    """
    if not request.user.is_authenticated:
        return HttpResponseRedirect(reverse(settings.ADMIN_HOME_VIEW))

    if request.method == "GET":
        params = request.GET
    elif request.method == "POST":
        params = request.POST

    if "location_selector" not in request.session:
        # Should never happen.
        return HttpResponseRedirect(reverse(settings.ADMIN_HOME_VIEW))

    info = request.session["location_selector"]["selectors"][int(selector_id)]

    menu_heading = info["menu_heading"]
    menu_cur_app = info["menu_cur_app"]
    menu_cur_view = info["menu_cur_view"]
    base_url = info["base_url"]

    cancel_url = reverse(menu_cur_app + ".views." + menu_cur_view)

    if params.get("back") != None:
        # The user clicked on our "Back" button -> redisplay the view
        # containing the location selector.
        return HttpResponseRedirect(cancel_url)

    if params.get("loc_code") not in [None, ""]:
        # The user entered a location code.  Attempt to search against this
        # code.
        loc_code = params["loc_code"]

        try:
            location = Location.objects.get(code=loc_code)
        except Location.DoesNotExist:
            location = None

        if location != None:
            return HttpResponseRedirect(base_url + loc_code)
        else:
            menu_html = menus.generate(request, menu_heading, menu_cur_app, menu_cur_view)

            err_msg = "There is no location with the code '" + loc_code + "'"

            return render_to_response(
                "search_error.html",
                {"menu_html": menu_html, "err_msg": err_msg, "cancel_url": cancel_url},
                context_instance=RequestContext(request),
            )

    if params.get("loc_name") not in [None, ""]:
        # The user entered a location name.  Attempt to search against this
        # name.
        name = helpers.tidy_name(params["loc_name"])

        locations = []
        for nameRecord in Name.objects.filter(name=name.upper()):
            for loc_name in LocationName.objects.filter(name=nameRecord):
                loc = loc_name.location
                if loc not in locations:
                    locations.append(loc)

        if len(locations) == 0:
            # Tell the user the bad news.
            menu_html = menus.generate(request, menu_heading, menu_cur_app, menu_cur_view)

            err_msg = "There are no locations with the name '" + name + "'"

            return render_to_response(
                "search_error.html",
                {"menu_html": menu_html, "err_msg": err_msg, "cancel_url": cancel_url},
                context_instance=RequestContext(request),
            )
        else:
            menu_html = menus.generate(request, menu_heading, menu_cur_app, menu_cur_view)
            return render_to_response(
                "search_results.html",
                {"menu_html": menu_html, "name": name, "locations": locations, "base_url": base_url},
                context_instance=RequestContext(request),
            )
Example #5
0
File: parser.py Project: 3taps/geo
def parseUnstructuredLocation(location, source, log):
    """ Attempt to parse an unstructured location.

        The parameters are as follows:

            'location'

                The text of the location to parse, as a string.
                
            'source'
            
                The 3taps source code for the data source, or None if no source
                was specified.
                
            'log'
            
                If we are in verbose mode, this will be a list of strings that
                we can append our debugging information onto.  If we are not in
                verbose mode, this will be set to None.

        We attempt to parse the given text, identifying either a structured
        location or a geographic coordinate.

        As we parse the location, verbose logging information will be appended
        to the 'log' list, if it isn't None.

        Upon completion, we return a (type, value) tuple, where 'type' is one
        of the following:

            "structured"
            "coordinate"
            "unknown"

        The meaning of 'value' depends on the calculated type:

            * For unstructured locations which were parsed into structured
              locations, 'value' will be a dictionary with a combination of the
              following fields, containing the various parts of the location we
              identified:

                  "country"
                  "state"
                  "metro"
                  "region"
                  "county"
                  "city"
                  "locality"
                  "zipCode"

            * For geographic coordinates, 'value' will be a dictionary with
              "lat" and "long" entries, holding the parsed coordinate value.

            * For unknown locations, 'value' will be None.
    """
    # Start by splitting the string into numbers and non-numbers.

    parts = [] # List of parts within the location.  Each list item is a
               # [type, str] tuple, where 'type' is NUMBER or STRING and 'str'
               # is the string within this part of the location.

    i = 0
    while True:
        match = NUMBER_SPLITTER.search(location, i)
        if match == None: # No more matches.
            if i < len(location)-1:
                parts.append([STRING, location[i:]])
            break
        else:
            if match.start() > i:
                parts.append([STRING, location[i:match.start()]])
            parts.append([NUMBER, location[match.start():match.end()]])
            i = match.end()
            continue

    # If we have two numbers separated by a zero or one character string
    # delimiter, and both numbers are floating point values in the range -180
    # to +180, assume we've got a lat/long coordinate.

    for i in range(len(parts)-2):
        if (parts[i][0] == NUMBER and parts[i+1][0] == STRING
                                  and parts[i+2][0] == NUMBER):
            num1      = parts[i][1]
            delimiter = parts[i+1][1]
            num2      = parts[i+2][1]
            if len(delimiter.strip()) < 2:
                if "." in num1 and "." in num2:
                    n1 = float(num1)
                    n2 = float(num2)
                    if n1 >= -180 and n1 <= +180 and n2 >= -180 and n2 <= +180:
                        # Success!  We've found a lat/long coordinate.
                        return ('coordinate', {'lat'  : n1,
                                               'long' : n2})

    # If we get here, we need to identify the various parts of a structured
    # location.  Start with an empty dictionary.

    structuredLoc = {}

    # If the last number in the string consists of a positive integer number
    # with at least four digits, see if it matches one of our existing ZIP
    # codes.  If so, assume that the number is a ZIP code.

    lastNum = None
    for part in reversed(parts):
        if part[0] == NUMBER:
            lastNum = part[1]
            break

    if lastNum != None:
        if (("-" not in lastNum) and ("." not in lastNum)
                                 and (len(lastNum) >= 4)):
            try:
                loc = Location.objects.get(code="USA-"+lastNum)
            except Location.DoesNotExist:
                loc = None

            if loc != None:
                if log:
                    log.append("Found ZIP code: " + lastNum)
                structuredLoc['zipCode'] = int(lastNum)

    # We now need to identify locations in the supplied string by name.  Start
    # by replacing all punctuation with spaces.

    text = helpers.tidy_name(location)

    # Split the text into individual words.

    words = text.split()

    # Iterate over every contiguous combination of words, seeing if those words
    # define a known location name.  If we find a known name, we remember that
    # name and the levels at which that name can appear.

    known_names = {} # Maps location name to a dictionary with the following
                     # entries:
                     #
                     #    'start_index'
                     #    'levels'
                     #
                     # where 'start_index' is the index into "words" where
                     # this name started, and 'levels' is a dictionary mapping
                     # a level number to a list of LocationName objects for
                     # that level.

    index = 0

    while index < len(words):

        # Try to find a match against a location name starting at the current
        # index.

        length = len(words) - index
        found  = False
        while length > 0:
            s = " ".join(words[index:index+length]).upper()

            names = [] # List of (level, name_id) tuples.
            for level_num in range(1, 8):
                name_id = nameCache.search(level_num, s)
                if name_id != None:
                    names.append((level_num, name_id))

            if len(names) > 0:
                found = True
                break
            else:
                length = length - 1

        if found:
            # We've found a name of length 'length' that matches at least one
            # known location.  Remember it.
            known_names[s] = {'start_index' : index,
                              'levels'      : {}}
            for level_num,name_id in names:
                known_names[s]['levels'][level_num] = []
                for loc_name in LocationName.objects.filter(name=name_id):
                    known_names[s]['levels'][level_num].append(loc_name)

            index = index + length
        else:
            # Keep trying.
            index = index + 1

    # If we're in verbose mode, tell the caller which names we've identified.

    if log != None:
        log.append("The following possible names were found:")
        for name in sorted(known_names.keys()):
            for level,loc_names in known_names[name]['levels'].items():
                for loc_name in loc_names:
                    log.append("    '" + name + "' could be a " +
                               loc_name.location.level.name + " entry for " +
                               loc_name.location.name)

    # Starting at the highest level and working down, choose from the available
    # names at the level.  If there is exactly one name at a given level, we
    # use that name; if there are multiple possible names for a given level, we
    # check the LocationName filters to see if any can be knocked out by the
    # values we've already calculated, hopefully resulting in just one
    # remaining name.  If there are still multiple possible names for a level,
    # we don't set the name at all.

    used_names    = set() # Set of names which we've stored into structuredLoc.
    used_loc_name = {}    # Maps level number to LocationName for that level.

    for level_num in range(1, 8):
        level_name = level_num_to_name(level_num)
        possible_loc_names = [] # List of possible LocationNames at this level.
        for name in known_names.keys():
            if name in used_names: continue # Don't use the same name twice.
            if level_num in known_names[name]['levels']:
                for loc_name in known_names[name]['levels'][level_num]:
                    possible_loc_names.append(loc_name)

        if len(possible_loc_names) == 0:
            continue # No names at this level.
        elif len(possible_loc_names) == 1:
            # There's no ambiguity -> use the one possible name.
            loc_name = possible_loc_names[0]
            if log != None:
                log.append("Choosing " + str(loc_name.location) + " as the " +
                           "only possible " + loc_name.name.level.name)
            structuredLoc[level_name] = loc_name.name.name
            used_loc_name[level_num]  = loc_name
            used_names.add(loc_name.name.name)
        else:
            # We have multiple possible names -> see if we can't filter out any
            # names based on the information we've calculated thus far.
            for i in range(len(possible_loc_names)-1, -1, -1):
                loc_name = possible_loc_names[i]
                matches  = True # initially.
                if loc_name.sourceFilter != None and source != None:
                    if loc_name.sourceFilter.code != source:
                        matches = False
                if loc_name.countryFilter != None and used_loc_name.has_key(1):
                    if loc_name.countryFilter != used_loc_name[1].location:
                        matches = False
                if loc_name.stateFilter != None and used_loc_name.has_key(2):
                    if loc_name.stateFilter != used_loc_name[2].location:
                        matches = False
                if loc_name.metroFilter != None and used_loc_name.has_key(3):
                    if loc_name.metroFilter != used_loc_name[3].location:
                        matches = False
                if loc_name.regionFilter != None and used_loc_name.has_key(4):
                    if loc_name.regionFilter != used_loc_name[4].location:
                        matches = False
                if loc_name.countyFilter != None and used_loc_name.has_key(5):
                    if loc_name.countyFilter != used_loc_name[5].location:
                        matches = False
                if loc_name.cityFilter != None and used_loc_name.has_key(6):
                    if loc_name.cityFilter != used_loc_name[6].location:
                        matches = False

                if not matches:
                    # The filter doesn't match -> remove this LocationName from
                    # the list of possibilies.
                    if log != None:
                        log.append("Excluding " + str(loc_name.location) +
                                   " as a possible "+loc_name.name.level.name +
                                   " because the filter values don't match.")
                    del possible_loc_names[i]

            if len(possible_loc_names) == 1:
                # There's only one possible location name left -> use it.
                loc_name = possible_loc_names[0]
                if log != None:
                    log.append("Choosing "+str(loc_name.location)+" as the " +
                               "only remaining " + loc_name.name.level.name)
                structuredLoc[level_name] = loc_name.name.name
                used_loc_name[level_num]  = loc_name
                used_names.add(loc_name.name.name)
            elif log != None:
                # Tell the caller the bad news.
                if len(possible_loc_names) == 0:
                    log.append("Unable to choose a " +
                               level_name.capitalize() + " entry because all" +
                               " the possible locations were filtered out.")
                else:
                    loc_names = []
                    for loc_name in possible_loc_names:
                        loc_names.append(loc_name.location.name)

                    if len(loc_names) == 2:
                        s = loc_names[0] + " and " + loc_names[1]
                    else:
                        s = ", ".join(loc_names[:-1]) + " and " + loc_names[-1]

                    log.append("Unable to choose between " + s + " for the " +
                               level_name.capitalize() + " entry.")

    # If we couldn't identify anything, tell the caller the bad news.

    if len(structuredLoc) == 0:
        return ("unknown", None)

    # Finally, return the structured location back to the caller.

    return ("structured", structuredLoc)