def build_geocode_service(provider, api_key=None, cache=0): if (provider, cache) in CACHED_GEOCODER: return CACHED_GEOCODER[(provider, cache)] for cache_key in list(CACHED_GEOCODER): if cache_key[0] == provider: del CACHED_GEOCODER[cache_key] geocode_service_cls = get_geocoder_for_service(provider) geocode_service_args = inspect.signature(geocode_service_cls.__init__) if 'api_key' in geocode_service_args.parameters and not api_key: raise GeocodeException("Provider {} needs an api key".format(provider)) elif 'api_key' in geocode_service_args.parameters: geocode_service = geocode_service_cls(api_key, user_agent="pygisapi") else: geocode_service = geocode_service_cls(user_agent="pygisapi") if not hasattr(geocode_service.geocode, '__wrapped__'): setattr(geocode_service, 'geocode', functools.lru_cache(maxsize=cache)(geocode_service.geocode)) CACHED_GEOCODER[(provider, cache)] = geocode_service return geocode_service
def __init__(self, options, columns): super(_Geocode, self).__init__(options, columns, srid=4326) self.service = options.get('service', 'googlev3') geocoder = geopy.get_geocoder_for_service(self.service) if geocoder == geopy.geocoders.googlev3.GoogleV3: api_key = options.get('api_key') self.geocoder = geocoder(api_key = api_key) elif geocoder == geopy.geocoders.arcgis.ArcGIS: username = options.get('username') password = options.get('password') self.geocoder = geocoder(username = username, password = password) else: self.geocoder = geocoder()
def load_geopy_geolocator(service): ''' Set up and return a geopy geocoder object :param str service: name of service corresponding to geocoder :return geopy.Geocoder: ''' agent_name = 'asn1/[email protected]' geopy.geocoders.options.default_user_agent = agent_name geopy.geocoders.options.default_timeout = 10 locator_cls = geopy.get_geocoder_for_service(service) return locator_cls()
def geocodeBatch(locator): # eg: geoloc = geopy.geocoders.ArcGIS() # see geocoders/__init__.py for service names geoloc = geopy.get_geocoder_for_service(locator)() # print(geoloc) inputfname = os.path.splitext(os.path.basename(sys.argv[1]))[0] with open(sys.argv[1], 'r') as f: originalf = csv.reader(f) inputf = list(originalf) # convert csv input to list totaln = len(inputf) - 1 ffname = inputf[0] # collect names of columns/vars # add names for geocoding result vars ffname.append('service') # holder for geocoding service name ffname.append('geocodeinput') ffname.append('geocodedlocation') ffname.append('latitude') ffname.append('longitude') ffname.append('geocodedraw') # loop through the list (originally csv), collect info and send to a geocoder rownum = 0 cnterror = 0 for row in inputf: if rownum == 0: row = ffname # the first row is header/names of columns else: geocodeinput = row[ffname.index( sys.argv[2] )] # address field has to be in the first column if len(sys.argv) > 3: # 2nd optional address field geocodeinput = geocodeinput + ", " + row[ffname.index( sys.argv[3])] if len(sys.argv) > 4: # 3rd optional address field geocodeinput = geocodeinput + ", " + row[ffname.index( sys.argv[4])] if len(sys.argv) > 5: # 4th optional address field geocodeinput = geocodeinput + ", " + row[ffname.index( sys.argv[5])] if len(sys.argv) > 6: # 5th optional address field geocodeinput = geocodeinput + ", " + row[ffname.index( sys.argv[6])] row.append( locator) # keep the name of the geocoding service used row.append(geocodeinput ) # keep address/location string to be geocoded # result is the location object try: location = geoloc.geocode( geocodeinput, timeout=10) # try different timeout in second.. row.append(location.address.encode( 'utf-8')) # keep geocoded location row.append(location.latitude) # keep lat/y row.append(location.longitude) # keep lon/x row.append( location.raw) # keep all location object info (json) print(locator + ' geocoding ' + str(rownum) + ' of ' + str(totaln)) # tell the progress.. except: row.append('') # place holder for geocoded location row.append('') # place holder lat/y row.append('') # place holder lon/x row.append('') # place holder location object info (json) print(locator + ' geocoding ' + str(rownum) + ' of ' + str(totaln) + ' - unsuccessful - most likely time-out') cnterror += 1 pass # print('\t'.join(row)) rownum += 1 ratesuccess = round((rownum - cnterror - 1) / float(totaln) * 100, 1) print('Done! ' + str(rownum - cnterror - 1) + ' cases were successfully geocoded (' + str(ratesuccess) + '% success)') # save the result/updated list in CSV format with open(inputfname + '_' + locator + 'Geocoded.csv', 'w') as f2: w = csv.writer(f2, delimiter=',') # w.writeheader() for row in inputf: w.writerow(row) file.close
def geocodeBatch(locator): # eg: geoloc = geopy.geocoders.ArcGIS() # see geocoders/__init__.py for service names geoloc = geopy.get_geocoder_for_service(locator)() # print(geoloc) inputfname = os.path.splitext(os.path.basename(sys.argv[1]))[0] with open(sys.argv[1],'r') as f: originalf = csv.reader(f) inputf = list(originalf) # convert csv input to list totaln = len(inputf)-1 ffname = inputf[0] # collect names of columns/vars # add names for geocoding result vars ffname.append('service') # holder for geocoding service name ffname.append('geocodeinput') ffname.append('geocodedlocation') ffname.append('latitude') ffname.append('longitude') ffname.append('geocodedraw') # loop through the list (originally csv), collect info and send to a geocoder rownum = 0 cnterror = 0 for row in inputf: if rownum == 0: row = ffname # the first row is header/names of columns else: geocodeinput = row[ffname.index(sys.argv[2])] # address field has to be in the first column if len(sys.argv) > 3: # 2nd optional address field geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[3])] if len(sys.argv) > 4: # 3rd optional address field geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[4])] if len(sys.argv) > 5: # 4th optional address field geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[5])] if len(sys.argv) > 6: # 5th optional address field geocodeinput = geocodeinput + ", "+ row[ffname.index(sys.argv[6])] row.append(locator) # keep the name of the geocoding service used row.append(geocodeinput) # keep address/location string to be geocoded # result is the location object try: location = geoloc.geocode(geocodeinput,timeout=10) # try different timeout in second.. row.append(location.address.encode('utf-8')) # keep geocoded location row.append(location.latitude) # keep lat/y row.append(location.longitude) # keep lon/x row.append(location.raw) # keep all location object info (json) print(locator+' geocoding '+str(rownum)+' of '+str(totaln)) # tell the progress.. except: row.append('') # place holder for geocoded location row.append('') # place holder lat/y row.append('') # place holder lon/x row.append('') # place holder location object info (json) print(locator+' geocoding '+str(rownum)+' of '+str(totaln)+' - unsuccessful - most likely time-out') cnterror += 1 pass # print('\t'.join(row)) rownum += 1 ratesuccess=round((rownum-cnterror-1)/float(totaln)*100,1) print('Done! '+str(rownum-cnterror-1)+' cases were successfully geocoded ('+str(ratesuccess)+'% success)') # save the result/updated list in CSV format with open(inputfname+'_'+locator+'Geocoded.csv', 'w') as f2: w = csv.writer(f2, delimiter=',') # w.writeheader() for row in inputf: w.writerow(row) file.close
def handle(self, *args, **options): if not args: raise CommandError("Expected CSV filename to import") try: geocoder = get_geocoder_for_service(options['geocoder'])() except GeopyError as e: raise CommandError(str(e)) dry_run = options['dry_run'] start_at = options['start_at'] or 0 for filename in args: # Not passing the utf-8 codec to codecs.open() # the file is opened in ascii, and unicodecsv performs the conversion. with codecs.open(filename, 'rb') as f: csv_data = unicodecsv.DictReader( f, dialect=options['dialect'], delimiter=options['delimiter'], quotechar=options['quotechar']) first = True marker_data = [] row_num = 0 for row in csv_data: row_num += 1 if row_num < start_at: continue # Parse the row data # Print first results immediately, for easy debugging title = _format_field(options, 'title', row, allow_empty=not first) if not first: self.stdout.write('----') self.stdout.write(u"Row: {0}".format(row_num)) self.stdout.write(u"Name: {0}".format(title)) # Parse the rest geocode = _format_field(options, 'geocode', row, allow_empty=not first) description = _format_field(options, 'description', row, allow_html=True, allow_empty=not first) group_id = _format_field(options, 'group', row, allow_html=False, allow_empty=not first) image = _format_field(options, 'image', row, allow_empty=True) group = _get_group(group_id) if not dry_run: # Avoid exceeding rate limit on dry-run tests if not first: time.sleep(0.3) # 300ms try: location = geocoder.geocode(geocode) except GeopyError as e: raise CommandError(str(e)) if not location: raise CommandError( "Unable to geocode: {0}".format(geocode)) self.stdout.write(u"Group: {0}".format(group)) self.stdout.write(u"Geocode: {0}".format(geocode)) if dry_run: self.stdout.write( u"Location: (not determined for dry-run)") else: self.stdout.write( u"Location: ({0}, {1}) {2}".format( location.latitude, location.longitude, location)) self.stdout.write(u"Image: {0}".format(image)) self.stdout.write(u"Description:\n{0}".format(description)) first = False if not dry_run: marker_data.append( Marker( title=title, image=image or '', description=description, group=group, location=[ location.latitude, location.longitude ], )) if dry_run: continue self.stdout.write('----') self.stdout.write(u"Writing objects..") with transaction.atomic(): Marker.objects.bulk_create(marker_data) self.stdout.write(u"Done")
def geocode_csv(infile, **kwargs): """ Geocode entities from a provided input csv file and write results to an output csv file. Return a dictionary containing error, success and total count of geocoded rows. **Example:** Basic usage:: >>> from giokoda.utils import geocode_csv >>> geocode_csv('input.csv') This will will try to goecode the `'input.csv'` file and write output to `'input.csv-geocoded.csv'`. **Parameters:** `infile` *(filepath/str)*: path to a csv file to geocode `*kwargs`: Optional and arbitary keyword arguments `outfile` (filepath/str): path to file to write output csv `service` *(str)*: default: `'nominatim'`. Name of a geocoding service to use. This can be a name of any geocoding service accepted by geopy. `query_column` *(str)*: default: `'name'`. Name of a column containg text to geocode. `query_columns` *(list)*: default: `[]`. A list of a columns to be combined in order to produce a text to geocode. `service_kwargs` *(dict)*: Optional keyword arguments for initialization of geocoding service. `delimiter` *(str)*: default: `','`, A one-character string used to separate fields in a csv file. `quotechar` *(str)*: default: `'"'`, A one-character string used to quote fields containing special characters in a csv file, such as the delimiter or quotechar, or which contain new-line characters. **Returns:** A dictionary of total success and error count:: { 'total': 0, 'success': 0, 'error': 0 } """ # Collect parameters service = kwargs.get('service', DEFAULT_GEOCODER) outfile = kwargs.get('outfile', '%s-geocoded-%s.csv' % (infile, service)) query_column = kwargs.get('query_column') query_columns = kwargs.get('query_columns', []) if query_columns and not type(query_columns) == list: raise TypeError('A value for `query_columns` must be a list') if query_column: query_columns.append(query_column) elif not query_columns: query_columns.append('name') service_kwargs = GEOCODERS.get(service, GEOCODERS[DEFAULT_GEOCODER]) service_kwargs.update(kwargs.get('service_kwargs', {})) delimiter = kwargs.get('delimiter', ',') quotechar = kwargs.get('quotechar', '"') # Get geocoder class Geocoder = get_geocoder_for_service(service) # Try to catch mandatory arguments, usually these are for authentication. # Instanciate geocoder if 'api_key' in service_kwargs: geocoder = Geocoder(service_kwargs.pop('api_key'), **service_kwargs) elif 'username' in service_kwargs and 'password' in service_kwargs: geocoder = Geocoder(username=service_kwargs.pop('username'), password=service_kwargs.pop('password'), **service_kwargs) elif 'auth_id' in service_kwargs and 'auth_token' in service_kwargs: geocoder = Geocoder(auth_id=service_kwargs.pop('auth_id'), auth_token=service_kwargs.pop('auth_token'), **service_kwargs) elif 'consumer_key' in service_kwargs and \ 'consumer_secret' in service_kwargs: geocoder = Geocoder( consumer_key=service_kwargs.pop('consumer_key'), consumer_secret=service_kwargs.pop('consumer_secret'), **service_kwargs) else: geocoder = Geocoder(**service_kwargs) # Read csv incsv = csv.DictReader(open(infile, 'r'), delimiter=delimiter, quotechar=quotechar) # Initialize csv writer writer = csv.writer(open(outfile, 'w'), delimiter=delimiter, quotechar=quotechar) # Geocode each row first_row = True successful = 0 total = 0 errors = 0 for row in incsv: total += 1 sorted_row = {'latitude': '', 'longitude': ''} for key, value in sorted(row.items()): sorted_row[key] = value try: query = [] for column in query_columns: q = sorted_row.get(column) if q: query.append(q) query = ', '.join(query) if query: location = geocoder.geocode(query) if location and location.latitude and location.longitude: sorted_row['latitude'] = location.latitude sorted_row['longitude'] = location.longitude successful += 1 except Exception as e: errors += 1 sys.stdout.write('\n\033[91m%s\033[0m\n' % e) pprint(sorted_row) if first_row: # write header writer.writerow(list(sorted_row.keys())) first_row = False # Write row writer.writerow(list(sorted_row.values())) return {'total': total, 'success': successful, 'error': errors}
def geocoder(self): """The `geopy.geocoders.Geocoder` instance. """ return geopy.get_geocoder_for_service(self.name)(**self._config)
def handle(self, *args, **options): if not args: raise CommandError("Expected CSV filename to import") try: geocoder = get_geocoder_for_service(options['geocoder'])() except GeopyError as e: raise CommandError(str(e)) dry_run = options['dry_run'] start_at = options['start_at'] or 0 for filename in args: # Not passing the utf-8 codec to codecs.open() # the file is opened in ascii, and unicodecsv performs the conversion. with codecs.open(filename, 'rb') as f: csv_data = unicodecsv.DictReader(f, dialect=options['dialect'], delimiter=options['delimiter'], quotechar=options['quotechar']) first = True marker_data = [] row_num = 0 for row in csv_data: row_num += 1 if row_num < start_at: continue # Parse the row data # Print first results immediately, for easy debugging title = _format_field(options, 'title', row, allow_empty=not first) if not first: self.stdout.write('----') self.stdout.write(u"Row: {0}".format(row_num)) self.stdout.write(u"Name: {0}".format(title)) # Parse the rest geocode = _format_field(options, 'geocode', row, allow_empty=not first) description = _format_field(options, 'description', row, allow_html=True, allow_empty=not first) group_id = _format_field(options, 'group', row, allow_html=False, allow_empty=not first) image = _format_field(options, 'image', row, allow_empty=True) group = _get_group(group_id) if not dry_run: # Avoid exceeding rate limit on dry-run tests if not first: time.sleep(0.3) # 300ms try: location = geocoder.geocode(geocode) except GeopyError as e: raise CommandError(str(e)) if not location: raise CommandError("Unable to geocode: {0}".format(geocode)) self.stdout.write(u"Group: {0}".format(group)) self.stdout.write(u"Geocode: {0}".format(geocode)) if dry_run: self.stdout.write(u"Location: (not determined for dry-run)") else: self.stdout.write(u"Location: ({0}, {1}) {2}".format(location.latitude, location.longitude, location)) self.stdout.write(u"Image: {0}".format(image)) self.stdout.write(u"Description:\n{0}".format(description)) first = False if not dry_run: marker_data.append(Marker( title=title, image=image or '', description=description, group=group, location=[location.latitude, location.longitude], )) if dry_run: continue self.stdout.write('----') self.stdout.write(u"Writing objects..") with transaction.atomic(): Marker.objects.bulk_create(marker_data) self.stdout.write(u"Done")