Example #1
0
def build_geocode_service(provider, api_key=None, cache=0):
    if (provider, cache) in CACHED_GEOCODER:
        return CACHED_GEOCODER[(provider, cache)]

    for cache_key in list(CACHED_GEOCODER):
        if cache_key[0] == provider:
            del CACHED_GEOCODER[cache_key]

    geocode_service_cls = get_geocoder_for_service(provider)

    geocode_service_args = inspect.signature(geocode_service_cls.__init__)

    if 'api_key' in geocode_service_args.parameters and not api_key:
        raise GeocodeException("Provider {} needs an api key".format(provider))
    elif 'api_key' in geocode_service_args.parameters:
        geocode_service = geocode_service_cls(api_key, user_agent="pygisapi")
    else:
        geocode_service = geocode_service_cls(user_agent="pygisapi")

    if not hasattr(geocode_service.geocode, '__wrapped__'):
        setattr(geocode_service, 'geocode',
                functools.lru_cache(maxsize=cache)(geocode_service.geocode))

    CACHED_GEOCODER[(provider, cache)] = geocode_service

    return geocode_service
Example #2
0
 def __init__(self, options, columns):
   super(_Geocode, self).__init__(options, columns, srid=4326)
   self.service = options.get('service', 'googlev3')
   geocoder = geopy.get_geocoder_for_service(self.service)
   if geocoder == geopy.geocoders.googlev3.GoogleV3:
     api_key = options.get('api_key')
     self.geocoder = geocoder(api_key = api_key)
   elif geocoder == geopy.geocoders.arcgis.ArcGIS:
     username = options.get('username')
     password = options.get('password')
     self.geocoder = geocoder(username = username, password = password)
   else:
     self.geocoder = geocoder()
Example #3
0
 def __init__(self, options, columns):
   super(_Geocode, self).__init__(options, columns, srid=4326)
   self.service = options.get('service', 'googlev3')
   geocoder = geopy.get_geocoder_for_service(self.service)
   if geocoder == geopy.geocoders.googlev3.GoogleV3:
     api_key = options.get('api_key')
     self.geocoder = geocoder(api_key = api_key)
   elif geocoder == geopy.geocoders.arcgis.ArcGIS:
     username = options.get('username')
     password = options.get('password')
     self.geocoder = geocoder(username = username, password = password)
   else:
     self.geocoder = geocoder()
Example #4
0
def load_geopy_geolocator(service):
    '''
    Set up and return a geopy geocoder object

    :param str service: name of service corresponding to
        geocoder

    :return geopy.Geocoder:
    '''
    agent_name = 'asn1/[email protected]'
    geopy.geocoders.options.default_user_agent = agent_name
    geopy.geocoders.options.default_timeout = 10
    locator_cls = geopy.get_geocoder_for_service(service)
    return locator_cls()
def geocodeBatch(locator):
    # eg: geoloc = geopy.geocoders.ArcGIS() # see geocoders/__init__.py for service names
    geoloc = geopy.get_geocoder_for_service(locator)()
    # print(geoloc)
    inputfname = os.path.splitext(os.path.basename(sys.argv[1]))[0]
    with open(sys.argv[1], 'r') as f:
        originalf = csv.reader(f)
        inputf = list(originalf)  # convert csv input to list
        totaln = len(inputf) - 1
        ffname = inputf[0]  # collect names of columns/vars
        # add names for geocoding result vars
        ffname.append('service')  # holder for geocoding service name
        ffname.append('geocodeinput')
        ffname.append('geocodedlocation')
        ffname.append('latitude')
        ffname.append('longitude')
        ffname.append('geocodedraw')
        # loop through the list (originally csv), collect info and send to a geocoder
        rownum = 0
        cnterror = 0
        for row in inputf:
            if rownum == 0:
                row = ffname  # the first row is header/names of columns
            else:
                geocodeinput = row[ffname.index(
                    sys.argv[2]
                )]  # address field has to be in the first column
                if len(sys.argv) > 3:  # 2nd  optional address field
                    geocodeinput = geocodeinput + ", " + row[ffname.index(
                        sys.argv[3])]
                if len(sys.argv) > 4:  # 3rd optional address field
                    geocodeinput = geocodeinput + ", " + row[ffname.index(
                        sys.argv[4])]
                if len(sys.argv) > 5:  # 4th optional address field
                    geocodeinput = geocodeinput + ", " + row[ffname.index(
                        sys.argv[5])]
                if len(sys.argv) > 6:  # 5th optional address field
                    geocodeinput = geocodeinput + ", " + row[ffname.index(
                        sys.argv[6])]
                row.append(
                    locator)  # keep the name of the geocoding service used
                row.append(geocodeinput
                           )  # keep address/location string to be geocoded
                # result is the location object
                try:
                    location = geoloc.geocode(
                        geocodeinput,
                        timeout=10)  # try different timeout in second..
                    row.append(location.address.encode(
                        'utf-8'))  # keep geocoded location
                    row.append(location.latitude)  # keep lat/y
                    row.append(location.longitude)  # keep lon/x
                    row.append(
                        location.raw)  # keep all location object info (json)
                    print(locator + ' geocoding ' + str(rownum) + ' of ' +
                          str(totaln))  # tell the progress..
                except:
                    row.append('')  # place holder for geocoded location
                    row.append('')  # place holder lat/y
                    row.append('')  # place holder lon/x
                    row.append('')  # place holder location object info (json)
                    print(locator + ' geocoding ' + str(rownum) + ' of ' +
                          str(totaln) +
                          ' - unsuccessful - most likely time-out')
                    cnterror += 1
                    pass
            # print('\t'.join(row))
            rownum += 1
        ratesuccess = round((rownum - cnterror - 1) / float(totaln) * 100, 1)
        print('Done! ' + str(rownum - cnterror - 1) +
              ' cases were successfully geocoded (' + str(ratesuccess) +
              '% success)')
        # save the result/updated list in CSV format
        with open(inputfname + '_' + locator + 'Geocoded.csv', 'w') as f2:
            w = csv.writer(f2, delimiter=',')
            # w.writeheader()
            for row in inputf:
                w.writerow(row)
        file.close
def geocodeBatch(locator):
    # eg: geoloc = geopy.geocoders.ArcGIS() # see geocoders/__init__.py for service names
    geoloc = geopy.get_geocoder_for_service(locator)()
    # print(geoloc) 
    inputfname = os.path.splitext(os.path.basename(sys.argv[1]))[0]
    with open(sys.argv[1],'r') as f:
        originalf = csv.reader(f)
        inputf = list(originalf) # convert csv input to list
        totaln = len(inputf)-1
        ffname = inputf[0] # collect names of columns/vars
	# add names for geocoding result vars
        ffname.append('service') # holder for geocoding service name
        ffname.append('geocodeinput')
        ffname.append('geocodedlocation')
        ffname.append('latitude')
        ffname.append('longitude')
	ffname.append('geocodedraw')
        # loop through the list (originally csv), collect info and send to a geocoder
	rownum = 0
	cnterror = 0	
	for row in inputf:
	    if rownum == 0:
		row = ffname # the first row is header/names of columns
	    else:
	        geocodeinput = row[ffname.index(sys.argv[2])] # address field has to be in the first column
		if len(sys.argv) > 3:    # 2nd  optional address field
		    geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[3])]
		if len(sys.argv) > 4:    # 3rd optional address field
		    geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[4])]	        
		if len(sys.argv) > 5:    # 4th optional address field
		    geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[5])]
		if len(sys.argv) > 6:    # 5th optional address field
		    geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[6])]
		row.append(locator)  # keep the name of the geocoding service used
	        row.append(geocodeinput) # keep address/location string to be geocoded
		# result is the location object
		try:
		    location = geoloc.geocode(geocodeinput,timeout=10) # try different timeout in second..
		    row.append(location.address.encode('utf-8')) # keep geocoded location
		    row.append(location.latitude)                # keep lat/y
		    row.append(location.longitude)               # keep lon/x
		    row.append(location.raw)                     # keep all location object info (json)
		    print(locator+' geocoding '+str(rownum)+' of '+str(totaln)) # tell the progress..
		except:
		    row.append('')                               # place holder for geocoded location
		    row.append('')                               # place holder lat/y
		    row.append('')                               # place holder lon/x
		    row.append('')                               # place holder location object info (json)
		    print(locator+' geocoding '+str(rownum)+' of '+str(totaln)+' - unsuccessful - most likely time-out')
		    cnterror += 1
		    pass
	    # print('\t'.join(row))
	    rownum += 1
	ratesuccess=round((rownum-cnterror-1)/float(totaln)*100,1)
	print('Done! '+str(rownum-cnterror-1)+' cases were successfully geocoded ('+str(ratesuccess)+'% success)')
	# save the result/updated list in CSV format
    	with open(inputfname+'_'+locator+'Geocoded.csv', 'w') as f2:  
	    w = csv.writer(f2, delimiter=',')
	    # w.writeheader()
	    for row in inputf:
	        w.writerow(row)
	file.close
    def handle(self, *args, **options):
        if not args:
            raise CommandError("Expected CSV filename to import")

        try:
            geocoder = get_geocoder_for_service(options['geocoder'])()
        except GeopyError as e:
            raise CommandError(str(e))

        dry_run = options['dry_run']
        start_at = options['start_at'] or 0

        for filename in args:
            # Not passing the utf-8 codec to codecs.open()
            # the file is opened in ascii, and unicodecsv performs the conversion.
            with codecs.open(filename, 'rb') as f:
                csv_data = unicodecsv.DictReader(
                    f,
                    dialect=options['dialect'],
                    delimiter=options['delimiter'],
                    quotechar=options['quotechar'])
                first = True
                marker_data = []
                row_num = 0
                for row in csv_data:
                    row_num += 1
                    if row_num < start_at:
                        continue

                    # Parse the row data
                    # Print first results immediately, for easy debugging
                    title = _format_field(options,
                                          'title',
                                          row,
                                          allow_empty=not first)

                    if not first:
                        self.stdout.write('----')
                    self.stdout.write(u"Row:         {0}".format(row_num))
                    self.stdout.write(u"Name:        {0}".format(title))

                    # Parse the rest
                    geocode = _format_field(options,
                                            'geocode',
                                            row,
                                            allow_empty=not first)
                    description = _format_field(options,
                                                'description',
                                                row,
                                                allow_html=True,
                                                allow_empty=not first)
                    group_id = _format_field(options,
                                             'group',
                                             row,
                                             allow_html=False,
                                             allow_empty=not first)
                    image = _format_field(options,
                                          'image',
                                          row,
                                          allow_empty=True)

                    group = _get_group(group_id)

                    if not dry_run:
                        # Avoid exceeding rate limit on dry-run tests
                        if not first:
                            time.sleep(0.3)  # 300ms

                        try:
                            location = geocoder.geocode(geocode)
                        except GeopyError as e:
                            raise CommandError(str(e))
                        if not location:
                            raise CommandError(
                                "Unable to geocode: {0}".format(geocode))

                    self.stdout.write(u"Group:       {0}".format(group))
                    self.stdout.write(u"Geocode:     {0}".format(geocode))
                    if dry_run:
                        self.stdout.write(
                            u"Location:    (not determined for dry-run)")
                    else:
                        self.stdout.write(
                            u"Location:    ({0}, {1}) {2}".format(
                                location.latitude, location.longitude,
                                location))
                    self.stdout.write(u"Image:       {0}".format(image))
                    self.stdout.write(u"Description:\n{0}".format(description))
                    first = False

                    if not dry_run:
                        marker_data.append(
                            Marker(
                                title=title,
                                image=image or '',
                                description=description,
                                group=group,
                                location=[
                                    location.latitude, location.longitude
                                ],
                            ))

                if dry_run:
                    continue

                self.stdout.write('----')
                self.stdout.write(u"Writing objects..")

                with transaction.atomic():
                    Marker.objects.bulk_create(marker_data)

                self.stdout.write(u"Done")
Example #8
0
def geocode_csv(infile, **kwargs):
    """
    Geocode entities from a provided input csv file and write results to an
    output csv file.
    
    Return a dictionary containing error, success and total count of geocoded
    rows.

    **Example:**

    Basic usage::

        >>> from giokoda.utils import geocode_csv
        >>> geocode_csv('input.csv')

    This will will try to goecode the `'input.csv'` file and write output to
    `'input.csv-geocoded.csv'`.

    **Parameters:**

    `infile` *(filepath/str)*: path to a csv file to geocode

    `*kwargs`: Optional and arbitary keyword arguments
      `outfile` (filepath/str): path to file to write output csv

      `service` *(str)*: default: `'nominatim'`. Name of a geocoding service to
      use. This can be a name of any geocoding service accepted by geopy.

      `query_column` *(str)*: default: `'name'`. Name of a column containg text
      to geocode.

      `query_columns` *(list)*: default: `[]`. A list of a columns
      to be combined in order to produce a text to geocode.

      `service_kwargs` *(dict)*: Optional keyword arguments for initialization
      of geocoding service.

      `delimiter` *(str)*: default: `','`, A one-character string used to
      separate fields in a csv file.

      `quotechar` *(str)*: default: `'"'`, A one-character string used to
      quote fields containing special characters in a csv file, such as
      the delimiter or quotechar, or which contain new-line characters.

    **Returns:**
      A dictionary of total success and error count::
      
        {
            'total': 0,
            'success': 0,
            'error': 0
        }
    """
    # Collect parameters
    service = kwargs.get('service', DEFAULT_GEOCODER)
    outfile = kwargs.get('outfile', '%s-geocoded-%s.csv' % (infile, service))
    query_column = kwargs.get('query_column')
    query_columns = kwargs.get('query_columns', [])
    if query_columns and not type(query_columns) == list:
        raise TypeError('A value for `query_columns` must be a list')
    if query_column:
        query_columns.append(query_column)
    elif not query_columns:
        query_columns.append('name')
    service_kwargs = GEOCODERS.get(service, GEOCODERS[DEFAULT_GEOCODER])
    service_kwargs.update(kwargs.get('service_kwargs', {}))
    delimiter = kwargs.get('delimiter', ',')
    quotechar = kwargs.get('quotechar', '"')
    # Get geocoder class
    Geocoder = get_geocoder_for_service(service)
    # Try to catch mandatory arguments, usually these are for authentication.
    # Instanciate geocoder
    if 'api_key' in service_kwargs:
        geocoder = Geocoder(service_kwargs.pop('api_key'), **service_kwargs)
    elif 'username' in service_kwargs and 'password' in service_kwargs:
        geocoder = Geocoder(username=service_kwargs.pop('username'),
                            password=service_kwargs.pop('password'),
                            **service_kwargs)
    elif 'auth_id' in service_kwargs and 'auth_token' in service_kwargs:
        geocoder = Geocoder(auth_id=service_kwargs.pop('auth_id'),
                            auth_token=service_kwargs.pop('auth_token'),
                            **service_kwargs)
    elif 'consumer_key' in service_kwargs and \
            'consumer_secret' in service_kwargs:
        geocoder = Geocoder(
            consumer_key=service_kwargs.pop('consumer_key'),
            consumer_secret=service_kwargs.pop('consumer_secret'),
            **service_kwargs)
    else:
        geocoder = Geocoder(**service_kwargs)
    # Read csv
    incsv = csv.DictReader(open(infile, 'r'),
                           delimiter=delimiter,
                           quotechar=quotechar)
    # Initialize csv writer
    writer = csv.writer(open(outfile, 'w'),
                        delimiter=delimiter,
                        quotechar=quotechar)
    # Geocode each row
    first_row = True
    successful = 0
    total = 0
    errors = 0
    for row in incsv:
        total += 1
        sorted_row = {'latitude': '', 'longitude': ''}
        for key, value in sorted(row.items()):
            sorted_row[key] = value
        try:
            query = []
            for column in query_columns:
                q = sorted_row.get(column)
                if q:
                    query.append(q)
            query = ', '.join(query)
            if query:
                location = geocoder.geocode(query)
                if location and location.latitude and location.longitude:
                    sorted_row['latitude'] = location.latitude
                    sorted_row['longitude'] = location.longitude
                    successful += 1
        except Exception as e:
            errors += 1
            sys.stdout.write('\n\033[91m%s\033[0m\n' % e)
            pprint(sorted_row)
        if first_row:
            # write header
            writer.writerow(list(sorted_row.keys()))
            first_row = False
        # Write row
        writer.writerow(list(sorted_row.values()))
    return {'total': total, 'success': successful, 'error': errors}
Example #9
0
 def geocoder(self):
     """The `geopy.geocoders.Geocoder` instance.
     """
     return geopy.get_geocoder_for_service(self.name)(**self._config)
    def handle(self, *args, **options):
        if not args:
            raise CommandError("Expected CSV filename to import")

        try:
            geocoder = get_geocoder_for_service(options['geocoder'])()
        except GeopyError as e:
            raise CommandError(str(e))

        dry_run = options['dry_run']
        start_at = options['start_at'] or 0

        for filename in args:
            # Not passing the utf-8 codec to codecs.open()
            # the file is opened in ascii, and unicodecsv performs the conversion.
            with codecs.open(filename, 'rb') as f:
                csv_data = unicodecsv.DictReader(f, dialect=options['dialect'], delimiter=options['delimiter'], quotechar=options['quotechar'])
                first = True
                marker_data = []
                row_num = 0
                for row in csv_data:
                    row_num += 1
                    if row_num < start_at:
                        continue

                    # Parse the row data
                    # Print first results immediately, for easy debugging
                    title = _format_field(options, 'title', row, allow_empty=not first)

                    if not first:
                        self.stdout.write('----')
                    self.stdout.write(u"Row:         {0}".format(row_num))
                    self.stdout.write(u"Name:        {0}".format(title))

                    # Parse the rest
                    geocode = _format_field(options, 'geocode', row, allow_empty=not first)
                    description = _format_field(options, 'description', row, allow_html=True, allow_empty=not first)
                    group_id = _format_field(options, 'group', row, allow_html=False, allow_empty=not first)
                    image = _format_field(options, 'image', row, allow_empty=True)

                    group = _get_group(group_id)

                    if not dry_run:
                        # Avoid exceeding rate limit on dry-run tests
                        if not first:
                            time.sleep(0.3)  # 300ms

                        try:
                            location = geocoder.geocode(geocode)
                        except GeopyError as e:
                            raise CommandError(str(e))
                        if not location:
                            raise CommandError("Unable to geocode: {0}".format(geocode))

                    self.stdout.write(u"Group:       {0}".format(group))
                    self.stdout.write(u"Geocode:     {0}".format(geocode))
                    if dry_run:
                        self.stdout.write(u"Location:    (not determined for dry-run)")
                    else:
                        self.stdout.write(u"Location:    ({0}, {1}) {2}".format(location.latitude, location.longitude, location))
                    self.stdout.write(u"Image:       {0}".format(image))
                    self.stdout.write(u"Description:\n{0}".format(description))
                    first = False

                    if not dry_run:
                        marker_data.append(Marker(
                            title=title,
                            image=image or '',
                            description=description,
                            group=group,
                            location=[location.latitude, location.longitude],
                        ))

                if dry_run:
                    continue

                self.stdout.write('----')
                self.stdout.write(u"Writing objects..")

                with transaction.atomic():
                    Marker.objects.bulk_create(marker_data)

                self.stdout.write(u"Done")