Esempio n. 1
0
 def save(self):
     """Ensure everything's normalized (uppercase, normalized whitespace).
     Doing this on the model so it happens regardless of whether
     data comes from admin UI or a script or whatever.
     """
     self.incorrect = normalize(self.incorrect or '')
     self.correct = normalize(self.correct or '')
     super(StreetMisspelling, self).save()
Esempio n. 2
0
 def save(self):
     # Not doing this in clean() because we really don't want there to be
     # any way to get this wrong.
     if self.normalized_name:
         self.normalized_name = normalize(self.normalized_name)
     else:
         self.normalized_name = normalize(self.pretty_name)
     super(LocationSynonym, self).save()
Esempio n. 3
0
 def save(self, force_insert=False, force_update=False, using=None):
     """Ensure everything's normalized (uppercase, normalized whitespace).
     Doing this on the model so it happens regardless of whether
     data comes from admin UI or a script or whatever.
     """
     self.incorrect = normalize(self.incorrect or "")
     self.correct = normalize(self.correct or "")
     super(StreetMisspelling, self).save(force_insert=force_insert, force_update=force_update, using=using)
Esempio n. 4
0
 def save(self, force_insert=False, force_update=False, using=None):
     """Ensure everything's normalized (uppercase, normalized whitespace).
     Doing this on the model so it happens regardless of whether
     data comes from admin UI or a script or whatever.
     """
     self.normalized_name = normalize(self.name)
     super(Suburb, self).save(force_insert=force_insert, force_update=force_update, using=using)
Esempio n. 5
0
    def geocode(self, location):
        """
        Geocodes the given location, handling caching behind the scenes.
        """
        location = normalize(location)
        result, cache_hit = None, False

        # Get the result (an Address instance), either from the cache or by
        # calling _do_geocode().
        if self.use_cache:
            try:
                cached = GeocoderCache.objects.filter(normalized_location=location)[0]
            except IndexError:
                pass
            else:
                result = Address.from_cache(cached)
                cache_hit = True

        if result is None:
            try:
                result = self._do_geocode(location)
            except AmbiguousResult, e:
                # If multiple results were found, check whether they have the
                # same point. If they all have the same point, don't raise the
                # AmbiguousResult exception -- just return the first one.
                # 
                # An edge case is if result['point'] is None. This could happen
                # if the geocoder found locations, not points. In that case,
                # just raise the AmbiguousResult.
                result = e.choices[0]
                if result['point'] is None:
                    raise
                for i in e.choices[1:]:
                    if i['point'] != result['point']:
                        raise
Esempio n. 6
0
 def save(self):
     """Ensure everything's normalized (uppercase, normalized whitespace).
     Doing this on the model so it happens regardless of whether
     data comes from admin UI or a script or whatever.
     """
     self.normalized_name = normalize(self.name)
     super(Suburb, self).save()
Esempio n. 7
0
    def geocode(self, location):
        """
        Geocodes the given location, handling caching behind the scenes.
        """
        location = normalize(location)
        result, cache_hit = None, False

        # Get the result (an Address instance), either from the cache or by
        # calling _do_geocode().
        if self.use_cache:
            try:
                cached = GeocoderCache.objects.filter(normalized_location=location)[0]
            except IndexError:
                pass
            else:
                result = Address.from_cache(cached)
                cache_hit = True

        if result is None:
            try:
                result = self._do_geocode(location)
            except AmbiguousResult, e:
                # If multiple results were found, check whether they have the
                # same point. If they all have the same point, don't raise the
                # AmbiguousResult exception -- just return the first one.
                # 
                # An edge case is if result['point'] is None. This could happen
                # if the geocoder found locations, not points. In that case,
                # just raise the AmbiguousResult.
                result = e.choices[0]
                if result['point'] is None:
                    raise
                for i in e.choices[1:]:
                    if i['point'] != result['point']:
                        raise
Esempio n. 8
0
    def save(self, name_field):
        verbose = self.verbose
        source = self.source
        locs = []
        for feature in self.layer:
            name = feature.get(name_field)
            geom = feature.geom.transform(4326, True).geos
            geom = ensure_valid(geom, name)
            geom = flatten_geomcollection(geom)
            fields = dict(
                name = name,
                slug = slugify(name),
                location_type = self.get_location_type(feature),
                location = geom,
                city = self.metro_name,
                source = source,
                is_public = True,
            )
            if not self.should_create_location(fields):
                continue
            locs.append(fields)
        num_created = 0
        for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])):
            kwargs = dict(
                loc_fields,
                defaults={
                    'creation_date': self.now,
                    'last_mod_date': self.now,
                    'display_order': i,
                    'normalized_name': normalize(loc_fields['name']),
                    'area': loc_fields['location'].transform(3395, True).area,
                    })
            try:
                loc, created = Location.objects.get_or_create(**kwargs)
            except IntegrityError:
                # Usually this means two towns with the same slug.
                # Try to fix that.
                slug = kwargs['slug']
                existing = Location.objects.filter(slug=slug).count()
                if existing:
                    slug = slugify('%s-%s' % (slug, existing + 1))
                    if verbose:
                        print >> sys.stderr, "Munged slug %s to %s to make it unique" % (kwargs['slug'], slug)
                    kwargs['slug'] = slug
                    loc, created = Location.objects.get_or_create(**kwargs)
                else:
                    raise
            if created:
                num_created += 1

            if verbose:
                print >> sys.stderr, '%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc)
            if verbose:
                sys.stderr.write('Populating newsitem locations ... ')
            populate_ni_loc(loc)
            if verbose:
                sys.stderr.write('done.\n')
        return num_created
Esempio n. 9
0
 def get_canonical(self, name):
     """
     Returns the 'correct' or canonical spelling of the given place name. 
     If the given place name is already correctly spelled, then it's returned as-is.
     """        
     try:
         normalized_name = normalize(name)
         return self.get(normalized_name=normalized_name).place.normalized_name
     except self.model.DoesNotExist:
         return normalized_name
Esempio n. 10
0
def full_geocode(query, search_places=True):
    """
    Tries the full geocoding stack on the given query (a string):
        * Normalizes whitespace/capitalization
        * Searches the Misspelling table to corrects location misspellings
        * Searches the Location table
        * Failing that, searches the Place table (if search_places is True)
        * Failing that, uses the given geocoder to parse this as an address
        * Failing that, raises whichever error is raised by the geocoder --
          except AmbiguousResult, in which case all possible results are
          returned

    Returns a dictionary of {type, result, ambiguous}, where ambiguous is True
    or False and type can be:
        * 'location' -- in which case result is a Location object.
        * 'place' -- in which case result is a Place object. (This is only
          possible if search_places is True.)
        * 'address' -- in which case result is an Address object as returned
          by geocoder.geocode().
        * 'block' -- in which case result is a list of Block objects.

    If ambiguous is True, result will be a list of objects.
    """
    query = normalize(query)

    # First, try correcting the spelling ("LAKEVIEW" -> "LAKE VIEW").
    try:
        miss = Misspelling.objects.get(incorrect=query)
    except Misspelling.DoesNotExist:
        pass
    else:
        query = miss.correct

    # Search the Location table.
    try:
        loc = Location.objects.get(normalized_name=query)
    except Location.DoesNotExist:
        pass
    else:
        return {'type': 'location', 'result': loc, 'ambiguous': False}

    # Search the Place table, for stuff like "Sears Tower".
    if search_places:
        places = Place.objects.filter(normalized_name=query)
        if len(places) == 1:
            return {'type': 'place', 'result': places[0], 'ambiguous': False}
        elif len(places) > 1:
            return {'type': 'place', 'result': places, 'ambiguous': True}

    # Try geocoding this as an address.
    geocoder = SmartGeocoder()
    try:
        result = geocoder.geocode(query)
    except AmbiguousResult, e:
        return {'type': 'address', 'result': e.choices, 'ambiguous': True}
Esempio n. 11
0
def full_geocode(query, search_places=True):
    """
    Tries the full geocoding stack on the given query (a string):
        * Normalizes whitespace/capitalization
        * Searches the Misspelling table to corrects location misspellings
        * Searches the Location table
        * Failing that, searches the Place table (if search_places is True)
        * Failing that, uses the given geocoder to parse this as an address
        * Failing that, raises whichever error is raised by the geocoder --
          except AmbiguousResult, in which case all possible results are
          returned

    Returns a dictionary of {type, result, ambiguous}, where ambiguous is True
    or False and type can be:
        * 'location' -- in which case result is a Location object.
        * 'place' -- in which case result is a Place object. (This is only
          possible if search_places is True.)
        * 'address' -- in which case result is an Address object as returned
          by geocoder.geocode().
        * 'block' -- in which case result is a list of Block objects.

    If ambiguous is True, result will be a list of objects.
    """
    query = normalize(query)

    # First, try correcting the spelling ("LAKEVIEW" -> "LAKE VIEW").
    try:
        miss = Misspelling.objects.get(incorrect=query)
    except Misspelling.DoesNotExist:
        pass
    else:
        query = miss.correct

    # Search the Location table.
    try:
        loc = Location.objects.get(normalized_name=query)
    except Location.DoesNotExist:
        pass
    else:
        return {'type': 'location', 'result': loc, 'ambiguous': False}

    # Search the Place table, for stuff like "Sears Tower".
    if search_places:
        places = Place.objects.filter(normalized_name=query)
        if len(places) == 1:
            return {'type': 'place', 'result': places[0], 'ambiguous': False}
        elif len(places) > 1:
            return {'type': 'place', 'result': places, 'ambiguous': True}

    # Try geocoding this as an address.
    geocoder = SmartGeocoder()
    try:
        result = geocoder.geocode(query)
    except AmbiguousResult, e:
        return {'type': 'address', 'result': e.choices, 'ambiguous': True}
Esempio n. 12
0
    def create_location(self, name, location_type, geom, display_order=0):
        source = self.source
        if hasattr(geom, 'geos'):
            geom = geom.geos
        if geom.srid is None:
            geom.srid = 4326
        elif geom.srid != 4326:
            geom = geom.transform(4326, True)
        geom = ensure_valid(geom, name)
        geom = flatten_geomcollection(geom)
        if not isinstance(location_type, int):
            location_type = location_type.id
        kwargs = dict(
            name=name,
            slug=slugify(name),
            location=geom,
            location_type_id=location_type,
            city=self.metro_name,
            source=source,
            is_public=True,
        )
        if not self.should_create_location(kwargs):
            return
        kwargs['defaults'] = {
            'creation_date': self.now,
            'last_mod_date': self.now,
            'display_order': display_order,
            'normalized_name': normalize(name),
            'area': geom.transform(3395, True).area,
            }
        try:
            loc, created = Location.objects.get_or_create(**kwargs)
        except IntegrityError:
            # Usually this means two towns with the same slug.
            # Try to fix that.
            slug = kwargs['slug']
            existing = Location.objects.filter(slug=slug).count()
            if existing:
                slug = slugify('%s-%s' % (slug, existing + 1))
                logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug))
                kwargs['slug'] = slug
                loc, created = Location.objects.get_or_create(**kwargs)
            else:
                raise

        logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc))
        logger.info('Populating newsitem locations ... ')
        populate_ni_loc(loc)
        logger.info('done.\n')

        return created
Esempio n. 13
0
def add_location(name, wkt, loc_type, source='UNKNOWN'):
    geom = fromstr(wkt, srid=4326)
    name = name.strip().title()
    loc, created = Location.objects.get_or_create(
        name=name,
        slug=slugify(name),
        normalized_name=normalize(name),
        location_type=loc_type,
        location=geom,
        display_order=0,
        city=get_metro()['city_name'].upper(),
        source=source)
    print '%s %s %s' % (created and 'Created' or 'Found', loc_type.name, name)
    return loc
Esempio n. 14
0
    def save(self):
        if self.suffix:
            self.suffix = self.suffix.upper().strip()
        if self.state:
            self.state = self.state.upper().strip()
        if self.city:
            # TODO: validate that there's a matching metro setting?
            # (or Location object, if the metro is multi-city)?
            self.city = self.city.upper().strip()

        self.street = normalize(self.pretty_name)
        if self.suffix:
            self.street = re.sub(r" %s$" % self.suffix.upper(), "", self.street)
        super(Street, self).save()
Esempio n. 15
0
    def save(self, force_insert=False, force_update=False, using=None):
        if self.suffix:
            self.suffix = self.suffix.upper().strip()
        if self.state:
            self.state = self.state.upper().strip()
        if self.city:
            # TODO: validate that there's a matching metro setting?
            # (or Location object, if the metro is multi-city)?
            self.city = self.city.upper().strip()

        self.street = normalize(self.pretty_name)
        if self.suffix:
            self.street = re.sub(r' %s$' % self.suffix.upper(), '', self.street)
        super(Street, self).save(force_insert=force_insert, force_update=force_update, using=using)
Esempio n. 16
0
def add_location(name, wkt, loc_type, source='UNKNOWN'):
    geom = fromstr(wkt, srid=4326)
    loc, created = Location.objects.get_or_create(
        name=name,
        slug=slugify(name),
        normalized_name=normalize(name),
        location_type=loc_type,
        location=geom,
        centroid=geom.centroid,
        display_order=0,
        city=get_metro()['city_name'].upper(),
        source=source
    )
    print '%s %s %s' % (created and 'Created' or 'Found', loc_type.name, name)
    return loc
Esempio n. 17
0
    def save(self, name_field='name', source='UNKNOWN', verbose=True):
        locs = []
        for feature in self.layer:
            if not self.should_create_location(feature):
                continue

            name = feature.get(name_field)
            geom = feature.geom.transform(4326, True).geos
            if not geom.valid:
                geom = geom.buffer(0.0)
                if not geom.valid:
                    print >> sys.stderr, 'Warning: invalid geometry: %s' % name
            fields = dict(
                name=name,
                normalized_name=normalize(name),
                slug=slugify(name),
                location_type=self.get_location_type(feature),
                location=geom,
                centroid=geom.centroid,
                city=self.metro_name,
                source=source,
                area=geom.transform(3395, True).area,
                is_public=True,
                display_order=0,  # This is overwritten in the next loop
            )
            locs.append(fields)
        num_created = 0
        for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])):
            kwargs = dict(loc_fields,
                          defaults={
                              'creation_date': self.now,
                              'last_mod_date': self.now,
                              'display_order': i
                          })
            loc, created = Location.objects.get_or_create(**kwargs)
            if created:
                num_created += 1
            if verbose:
                print >> sys.stderr, '%s %s %s' % (
                    created and 'Created'
                    or 'Already had', self.location_type.name, loc)
            if verbose:
                sys.stderr.write('Populating newsitem locations ... ')
            populate_ni_loc(loc)
            if verbose:
                sys.stderr.write('done.\n')
        return num_created
    def import_county(self):
        county_type_data = {
            'name': 'County',
            'plural_name': 'Counties',
            'slug': 'counties',
            'is_browsable': True,
            'is_significant': False,
            'scope': self.metro_name,
        }
        try:
            county_type = LocationType.objects.get(slug=county_type_data['slug'])
        except LocationType.DoesNotExist:
            county_type = LocationType.objects.create(**county_type_data)

        Location.objects.filter(location_type=county_type).delete()
        county_layer = DataSource('%s/%s.shp' % (self.zip_dir,
            self.datafiles['county']['file_name']))[0]
        now = datetime.datetime.now()
        county_location = None
        for feature in county_layer:
            if feature.get('GEOID10') == self.county:
                name = feature.get(self.datafiles['county']['name_field'])
                geom = feature.geom.transform(4326, True).geos
                geom = ensure_valid(geom, name)
                geom = flatten_geomcollection(geom)
                loc_fields = dict(
                    name = name,
                    slug = slugify(name),
                    location_type = county_type,
                    location = geom,
                    city = self.metro_name,
                    is_public = True,
                )
                kwargs = dict(
                    loc_fields,
                )
                kwargs.update({
                    'creation_date': now,
                    'last_mod_date': now,
                    'display_order': 0,
                    'normalized_name': normalize(loc_fields['name']),
                    'area': loc_fields['location'].transform(3395, True).area,
                })
                county_location = Location.objects.create(**kwargs)
                break
        return county_location
Esempio n. 19
0
    def create_location(self, name, location_type, geom, display_order=0):
        source = self.source
        geom = geos_with_projection(geom, 4326)
        geom = ensure_valid(geom, name)
        geom = flatten_geomcollection(geom)
        if not isinstance(location_type, int):
            location_type = location_type.id
        kwargs = dict(
            name=name,
            slug=slugify(name),
            location=geom,
            location_type_id=location_type,
            city=self.metro_name,
            source=source,
            is_public=True,
        )
        if not self.should_create_location(kwargs):
            return
        kwargs['defaults'] = {
            'creation_date': self.now,
            'last_mod_date': self.now,
            'display_order': display_order,
            'normalized_name': normalize(name),
            'area': geom.transform(3395, True).area,
            }
        try:
            loc, created = Location.objects.get_or_create(**kwargs)
        except IntegrityError:
            # Usually this means two towns with the same slug.
            # Try to fix that.
            slug = kwargs['slug']
            existing = Location.objects.filter(slug=slug).count()
            if existing:
                slug = slugify('%s-%s' % (slug, existing + 1))
                logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug))
                kwargs['slug'] = slug
                loc, created = Location.objects.get_or_create(**kwargs)
            else:
                raise

        logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc))
        logger.info('Populating newsitem locations ... ')
        populate_ni_loc(loc)
        logger.info('done.\n')

        return created
Esempio n. 20
0
    def geocode(self, location):
        """
        Geocodes the given location, handling caching behind the scenes.
        """
        location = normalize(location)
        result, cache_hit = None, False

        # Get the result (an Address instance), either from the cache or by
        # calling _do_geocode().
        # TODO: Why does this not use the normal Django caching
        # framework?
        # Defer import to avoid cyclical imports.
        from ebpub.geocoder.models import GeocoderCache
        if self.use_cache:
            try:
                cached = GeocoderCache.objects.filter(
                    normalized_location=location)[0]
            except IndexError:
                pass
            else:
                logger.debug('GeocoderCache HIT for %r' % location)
                result = Address.from_cache(cached)
                cache_hit = True

        if result is None:
            try:
                result = self._do_geocode(location)
            except AmbiguousResult, e:
                # If multiple results were found, check whether they have the
                # same point. If they all have the same point, don't raise the
                # AmbiguousResult exception -- just return the first one.
                #
                # An edge case is if result['point'] is None. This could happen
                # if the geocoder found locations, not points. In that case,
                # just raise the AmbiguousResult.
                result = e.choices[0]
                if result['point'] is None:
                    raise
                for i in e.choices[1:]:
                    if i['point'] != result['point']:
                        raise
                logger.debug('Got ambiguous results but all had same point, '
                             'returning the first')
 def handle(self, **options):
     shapefile = self.download_file()
     now = datetime.datetime.now()
     metro_name = get_metro()['metro_name'].upper()
     # get or create City location type
     type_data = {'name': 'City', 'plural_name': 'Cities', 'slug': 'cities',
                  'is_browsable': True, 'is_significant': True,
                  'scope': metro_name}
     try:
         type_ = LocationType.objects.get(slug=type_data['slug'])
     except LocationType.DoesNotExist:
         type_ = LocationType.objects.create(**type_data)
     # start with a fresh list of cities
     Location.objects.filter(location_type=type_).delete()
     # build list of cities
     locations = {}
     layer = DataSource(shapefile)[0]
     for feature in layer:
         name = self.clean_name(feature['Name'])
         # convert to 4326
         geom = feature.geom.transform(4326, True).geos
         if name not in locations:
             locations[name] = {
                 'name': name,
                 'slug': slugify(name),
                 'location_type': type_,
                 'city': metro_name,
                 'source': 'Columbus County GIS data',
                 'is_public': True,
                 'creation_date': now,
                 'last_mod_date': now,
                 'display_order': 0,
                 'normalized_name': normalize(name),
                 'location': [],
             }
         location = locations[name]
         location['location'].append(geom)
     # create city locations
     for name, location in locations.iteritems():
         location['location'] = make_multi(location['location'])
         Location.objects.create(**location)
     print 'Imported %d locations' % type_.location_set.count()
Esempio n. 22
0
    def geocode(self, location):
        """
        Geocodes the given location, handling caching behind the scenes.
        """
        location = normalize(location)
        result, cache_hit = None, False

        # Get the result (an Address instance), either from the cache or by
        # calling _do_geocode().
        # TODO: Why does this not use the normal Django caching
        # framework?
        # Defer import to avoid cyclical imports.
        from ebpub.geocoder.models import GeocoderCache
        if self.use_cache:
            try:
                cached = GeocoderCache.objects.filter(normalized_location=location)[0]
            except IndexError:
                pass
            else:
                logger.debug('GeocoderCache HIT for %r' % location)
                result = Address.from_cache(cached)
                cache_hit = True

        if result is None:
            try:
                result = self._do_geocode(location)
            except AmbiguousResult, e:
                # If multiple results were found, check whether they have the
                # same point. If they all have the same point, don't raise the
                # AmbiguousResult exception -- just return the first one.
                # 
                # An edge case is if result['point'] is None. This could happen
                # if the geocoder found locations, not points. In that case,
                # just raise the AmbiguousResult.
                result = e.choices[0]
                if result['point'] is None:
                    raise
                for i in e.choices[1:]:
                    if i['point'] != result['point']:
                        raise
                logger.debug('Got ambiguous results but all had same point, '
                             'returning the first')
Esempio n. 23
0
    def save(self, name_field='name', source='UNKNOWN', verbose=True):
        locs = []
        for feature in self.layer:
            if not self.should_create_location(feature): 
                continue

            name = feature.get(name_field)
            geom = feature.geom.transform(4326, True).geos
            if not geom.valid:
                geom = geom.buffer(0.0)
                if not geom.valid:
                    print >> sys.stderr, 'Warning: invalid geometry: %s' % name
            fields = dict(
                name = name,
                normalized_name = normalize(name),
                slug = slugify(name),
                location_type = self.get_location_type(feature),
                location = geom,
                centroid = geom.centroid,
                city = self.metro_name,
                source = source,
                area = geom.transform(3395, True).area,
                is_public = True,
                display_order = 0, # This is overwritten in the next loop
            )
            locs.append(fields)
        num_created = 0
        for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])):
            kwargs = dict(loc_fields, defaults={'creation_date': self.now, 'last_mod_date': self.now, 'display_order': i})
            loc, created = Location.objects.get_or_create(**kwargs)
            if created:
                num_created += 1
            if verbose:
                print >> sys.stderr, '%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc)
            if verbose:
                sys.stderr.write('Populating newsitem locations ... ')
            populate_ni_loc(loc)
            if verbose:
                sys.stderr.write('done.\n')
        return num_created
Esempio n. 24
0
 def save(self, name_field="name", source="UNKNOWN", verbose=True):
     hoods = []
     for feature in self.layer:
         name = feature.get(name_field)
         geom = feature.geom.transform(4326, True).geos
         if not geom.valid:
             geom = geom.buffer(0.0)
             if not geom.valid:
                 print >>sys.stderr, "Warning: invalid geometry: %s" % name
         fields = dict(
             name=name,
             normalized_name=normalize(name),
             slug=slugify(name),
             location_type=self.location_type,
             location=geom,
             centroid=geom.centroid,
             city=self.metro_name,
             source=source,
             area=geom.transform(3395, True).area,
             is_public=True,
             display_order=0,  # This is overwritten in the next loop
         )
         hoods.append(fields)
     num_created = 0
     for i, hood_fields in enumerate(sorted(hoods, key=lambda h: h["name"])):
         kwargs = dict(
             hood_fields, defaults={"creation_date": self.now, "last_mod_date": self.now, "display_order": i}
         )
         hood, created = Location.objects.get_or_create(**kwargs)
         if created:
             num_created += 1
         if verbose:
             print >>sys.stderr, "%s neighborhood %s" % (created and "Created" or "Already had", hood)
         if verbose:
             sys.stderr.write("Populating newsitem locations ... ")
         populate_ni_loc(hood)
         if verbose:
             sys.stderr.write("done.\n")
     return num_created
def auto_locations(paragraph_list, default_city=''):
    """
    Given a list of strings, detects all valid, unique addresses and returns a
    tuple (result, report), where result is a list of tuples in the format
    (address, wkt, excerpt, block) and report is a string of what happened.

    If default_city is given, it will be used in the geocoding for detected
    addresses that don't specify a city.
    """
    result, report = [], []
    addresses_seen = set()
    geocoder = SmartGeocoder()
    for para in paragraph_list:
        for addy, city in parse_addresses(para):
            # Skip addresses if they have a city that's a known suburb.
            if city and Suburb.objects.filter(normalized_name=normalize(city)).count():
                report.append('got suburb "%s, %s"' % (addy, city))
                continue

            # Try geocoding the address. If a city was provided, first try
            # geocoding with the city, then fall back to just the address
            # (without the city).
            point = None
            attempts = [addy]
            if default_city:
                attempts.insert(0, '%s, %s' % (addy, default_city))
            if city and city.lower() != default_city.lower():
                attempts.insert(0, '%s, %s' % (addy, city))
            for attempt in attempts:
                try:
                    point = geocoder.geocode(attempt)
                    break
                except AmbiguousResult:
                    report.append('got ambiguous address "%s"' % attempt)
                    # Don't try any other address attempts, because they only
                    # get *more* ambiguous. Plus, the subsequent attempts could
                    # be incorrect. For example, with this:
                    #    addy = '100 Broadway'
                    #    city = 'Manhattan'
                    #    default_city = 'Brooklyn'
                    # There are multiple "100 Broadway" addresses in Manhattan,
                    # so geocoding should fail at this point. It should not
                    # roll back to try the default_city (Brooklyn).
                    break
                except (DoesNotExist, InvalidBlockButValidStreet):
                    report.append('got nonexistent address "%s"' % attempt)
                except ParsingError:
                    report.append('got parsing error "%s"' % attempt)
            if point is None:
                continue # This address could not be geocoded.

            if point['address'] in addresses_seen:
                continue
            if len(para) > 300:
                try:
                    excerpt = smart_excerpt(para, addy)
                except ValueError:
                    excerpt = para
            else:
                excerpt = para
            result.append((addy, point['point'], excerpt, point['block']))
            addresses_seen.add(point['address'])
    return (result, '; '.join(report))
Esempio n. 26
0
def auto_locations(paragraph_list, default_city=''):
    """
    Given a list of strings, detects all valid, unique addresses and returns a
    tuple (result, report), where result is a list of tuples in the format
    (address, wkt, excerpt, block) and report is a string of what happened.

    If default_city is given, it will be used in the geocoding for detected
    addresses that don't specify a city.
    """
    result, report = [], []
    addresses_seen = set()
    geocoder = SmartGeocoder()
    for para in paragraph_list:
        for addy, city in parse_addresses(para):
            # Skip addresses if they have a city that's a known suburb.
            if city and Suburb.objects.filter(
                    normalized_name=normalize(city)).count():
                report.append('got suburb "%s, %s"' % (addy, city))
                continue

            # Try geocoding the address. If a city was provided, first try
            # geocoding with the city, then fall back to just the address
            # (without the city).
            point = None
            attempts = [addy]
            if default_city:
                attempts.insert(0, '%s, %s' % (addy, default_city))
            if city and city.lower() != default_city.lower():
                attempts.insert(0, '%s, %s' % (addy, city))
            for attempt in attempts:
                try:
                    point = geocoder.geocode(attempt)
                    break
                except AmbiguousResult:
                    report.append('got ambiguous address "%s"' % attempt)
                    # Don't try any other address attempts, because they only
                    # get *more* ambiguous. Plus, the subsequent attempts could
                    # be incorrect. For example, with this:
                    #    addy = '100 Broadway'
                    #    city = 'Manhattan'
                    #    default_city = 'Brooklyn'
                    # There are multiple "100 Broadway" addresses in Manhattan,
                    # so geocoding should fail at this point. It should not
                    # roll back to try the default_city (Brooklyn).
                    break
                except (DoesNotExist, InvalidBlockButValidStreet):
                    report.append('got nonexistent address "%s"' % attempt)
                except ParsingError:
                    report.append('got parsing error "%s"' % attempt)
            if point is None:
                continue  # This address could not be geocoded.

            if point['address'] in addresses_seen:
                continue
            if len(para) > 300:
                try:
                    excerpt = smart_excerpt(para, addy)
                except ValueError:
                    excerpt = para
            else:
                excerpt = para
            result.append((addy, point['point'], excerpt, point['block']))
            addresses_seen.add(point['address'])
    return (result, '; '.join(report))
Esempio n. 27
0
 def save(self):
     if not self.normalized_name:
         from ebpub.geocoder.parser.parsing import normalize
         self.normalized_name = normalize(self.pretty_name)
     super(Place, self).save()
Esempio n. 28
0
                # phew!
                validated_rows.append(
                    [pretty_name, address, point, place_url, synonyms])

        except csv.Error, e:
            message = "Stopped on line %d: %s" % (rows.line_num, e)
            context['errors'].append(message)
            return self._show_import_csv_results(request, context)
        except Exception, e:
            message = "Stopped on line %d: %s" % (rows.line_num, e)
            context['errors'].append(message)
            return self._show_import_csv_results(request, context)

        # wonderful, now do something...
        for pretty_name, address, point, place_url, synonyms in validated_rows:
            normalized_name = normalize(pretty_name)

            try:
                place = Place.objects.get(normalized_name=normalized_name,
                                          location__distance_lte=(point,
                                                                  D(m=1)))
                created = False
            except Place.DoesNotExist:
                place = Place(normalized_name=normalized_name)
                created = True

            try:
                place.pretty_name = pretty_name
                place.address = address
                place.location = point
                place.url = place_url
Esempio n. 29
0
 def save(self, *args, **kwargs):
     # update normalized name
     self.instance.normalized_name = normalize(self.instance.pretty_name)
     return super(PlaceAdminForm, self).save(*args, **kwargs)
Esempio n. 30
0
def populate_suburbs(suburb_list):
    for suburb in suburb_list:
        Suburb.objects.create(name=suburb, normalized_name=normalize(suburb))
Esempio n. 31
0
 def save(self):
     if not self.normalized_name:
         from ebpub.geocoder.parser.parsing import normalize
         self.normalized_name = normalize(self.pretty_name)
     super(Place, self).save()
Esempio n. 32
0
 def save(self, force_insert=False, force_update=False, using=None):
     if not self.normalized_name:
         self.normalized_name = normalize(self.pretty_name)
     super(PlaceSynonym, self).save(force_insert=force_insert, force_update=force_update, using=using)
Esempio n. 33
0
                # phew!
                validated_rows.append([pretty_name, address, point, place_url, synonyms])

        except csv.Error, e:
            message = "Stopped on line %d: %s" % (rows.line_num, e)
            context['errors'].append(message)
            return self._show_import_csv_results(request, context)
        except Exception, e:
            message = "Stopped on line %d: %s" % (rows.line_num, e)
            context['errors'].append(message)
            return self._show_import_csv_results(request, context)
        
        
        # wonderful, now do something...
        for pretty_name, address, point, place_url, synonyms in validated_rows: 
            normalized_name = normalize(pretty_name)
            
            try: 
                place = Place.objects.get(normalized_name=normalized_name,
                                          location__distance_lte=(point, D(m=1)))
                created = False
            except Place.DoesNotExist:
                place = Place(normalized_name=normalized_name)
                created = True
            
            try:
                place.pretty_name = pretty_name
                place.address = address
                place.location = point
                place.url = place_url
                place.place_type = place_type
Esempio n. 34
0
 def save(self, *args, **kwargs):
     # update normalized name
     self.instance.normalized_name = normalize(self.instance.pretty_name)
     return super(PlaceAdminForm, self).save(*args, **kwargs)
Esempio n. 35
0
def search(request, schema_slug=''):
    "Performs a location search and redirects to the address/xy page."
    # Check whether a schema was provided.
    if schema_slug:
        try:
            schema = get_schema_manager(request).get(slug=schema_slug)
        except Schema.DoesNotExist:
            raise Http404('Schema does not exist')
        url_prefix = schema.url()[:-1]
    else:
        schema = None
        url_prefix = ''

    # Get the query.
    q = request.GET.get('q', '').strip()
    if not q:
        return HttpResponseRedirect(url_prefix + '/') # TODO: Do something better than redirecting.

    # For /search/?type=alert, we redirect results to the alert page, not the
    # place page.
    if request.GET.get('type', '') == 'alert':
        url_method = 'alert_url'
    else:
        url_method = 'url'

    # Try to geocode it using full_geocode().
    try:
        result = full_geocode(q, search_places=False)
    except: # TODO: Naked except clause.
        pass
    else:
        if result['ambiguous']:
            if result['type'] == 'block':
                return eb_render(request, 'db/search_invalid_block.html', {
                    'query': q,
                    'choices': result['result'],
                    'street_name': result['street_name'],
                    'block_number': result['block_number']
                })
            else:
                return eb_render(request, 'db/did_you_mean.html', {'query': q, 'choices': result['result']})
        elif result['type'] == 'location':
            return HttpResponseRedirect(url_prefix + getattr(result['result'], url_method)())
        elif result['type'] == 'address':
            # Block
            if result['result']['block']:
                return HttpResponseRedirect(url_prefix + getattr(result['result']['block'], url_method)())
            # Intersection
            try:
                intersection = Intersection.objects.get(id=result['result']['intersection_id'])
            except Intersection.DoesNotExist:
                pass
            else:
                return HttpResponseRedirect(url_prefix + getattr(intersection, url_method)())

    # Failing the geocoding, look in the special-case table.
    try:
        special_case = SearchSpecialCase.objects.get(query=normalize(q))
    except SearchSpecialCase.DoesNotExist:
        pass
    else:
        if special_case.redirect_to:
            return HttpResponseRedirect(special_case.redirect_to)
        else:
            return eb_render(request, 'db/search_special_case.html', {'query': q, 'special_case': special_case})

    # Failing that, display a list of ZIP codes if this looks like a ZIP.
    if re.search(r'^\s*\d{5}(?:-\d{4})?\s*$', q):
        z_list = Location.objects.filter(location_type__slug='zipcodes', is_public=True).select_related().order_by('name')
        if z_list:
            return eb_render(request, 'db/search_error_zip_list.html', {'query': q, 'zipcode_list': z_list})

    # Failing all of that, display the search error page.
    lt_list = LocationType.objects.filter(is_significant=True).order_by('name')
    return eb_render(request, 'db/search_error.html', {'query': q, 'locationtype_list': lt_list})
Esempio n. 36
0
 def save(self):
     if not self.normalized_name:
         self.normalized_name = normalize(self.pretty_name)
     super(PlaceSynonym, self).save()