def unify_geo_data(input_string): """ Return unified geographic data >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina") >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Capital Federal',\ 'street': u'Av Rivadavia', 'street2': '',\ 'longitud': -58.503058099999997, 'zip': 'C1407DZU'} True >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina") >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Buenos Aires', 'street': u'Gral. Paz',\ 'street2': '', 'longitud': -58.513873400000001} True >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB, CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina") >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\ 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\ 'number': u'360', 'state': u'Capital Federal',\ 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998} True >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1, VICENTE LOPEZ,BUENOS AIRES,Argentina") >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\ 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\ 'number': u'767', 'state': u'Buenos Aires Province',\ 'street': u'Av Del Libertador Gral. San Martin',\ 'longitud': -58.471314999999997} True >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina") >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\ 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\ 'number': u'255', 'state': u'La Pampa', \ 'street': u'Pellegrini',\ 'longitud': -64.292496499999999} True """ input_string = input_string.lower() # Remove sporius data for search and store it in street2 street2 = [] for country in street2_searcher.keys(): if country in input_string: for rexp in street2_searcher[country]: match = rexp.search(input_string) if match: street2.append(','.join(match.groups())) input_string = rexp.sub('', input_string) street2 = ','.join(street2) input_string = input_string.encode('ascii', 'ignore') # Search data in geographics database try: _gc = geocode(_st(input_string, " ", " ")) if _gc is None: return {'error': 'No geocoding service available'} if not _gc: return {'error': 'No answer'} except GeocoderTimedOut: return {'error': 'Connection timeout'} if len(_gc) > 1: i = mostequivalent(map(lambda (a, b): a, _gc), input_string) _gc = _gc[i] else: _gc = _gc[0] place, lat, lng = [_gc[k] for k in ['display_name', 'lat', 'lon']] _logger.debug("PLACE: %s" % place) for _re in address_re: result = _re.search(place) if result: result = result.groupdict() break _logger.debug("IGNORE: %s" % _re) if not result: return False _logger.debug("RESULT: %s" % result) data = { 'country': result['country'], 'zip': result['zip'], 'state': result['state'], 'city': result['city'], 'nbhd': result['nbhd'], 'number': result['number'], 'street': result['street'], 'street2': result['building'], 'latitud': float(lat), 'longitud': float(lng), } return data
def unify_geo_data(input_string): """ Return unified geographic data >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina") >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Capital Federal',\ 'street': u'Av Rivadavia', 'street2': '',\ 'longitud': -58.503058099999997, 'zip': 'C1407DZU'} True >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina") >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Buenos Aires', 'street': u'Gral. Paz',\ 'street2': '', 'longitud': -58.513873400000001} True >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina") >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\ 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\ 'number': u'360', 'state': u'Capital Federal',\ 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998} True >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,VICENTE LOPEZ,BUENOS AIRES,Argentina") >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\ 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\ 'number': u'767', 'state': u'Buenos Aires Province',\ 'street': u'Av Del Libertador Gral. San Martin',\ 'longitud': -58.471314999999997} True >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina") >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\ 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\ 'number': u'255', 'state': u'La Pampa', 'street': u'Pellegrini',\ 'longitud': -64.292496499999999} True """ #print >> sys.stderr, "Unifying:", input_string input_string = input_string.lower() # Remove sporius data for search and store it in street2 street2 = [] for country in street2_searcher.keys(): if country in input_string: for rexp in street2_searcher[country]: match = rexp.search(input_string) if match: street2.append(','.join(match.groups())) input_string = rexp.sub('', input_string) street2 = ','.join(street2) input_string = input_string.encode('ascii', 'ignore') # Search data in geographics database try: place, (lat, lng) = geocode(_st(input_string, " ", " ")) except ValueError: places = list(geocode(input_string, exactly_one=False)) i = mostequivalent(map(lambda (a,b): a, places), input_string) place, (lat, lng) = places[i] data = {} result = map(lambda s: s.strip(), place.split(',')) result = [u'']*(4-len(result)) + result # Ordering data if len(result) == 4: address, data['city'], data['state'], data['country'] = result else: raise RuntimeError('Exists more than 4 tokens in the place.') data['latitud'] = lat data['longitud'] = lng # Split address data if data['country'] in ['Argentina',]: s = re.search(r'^\s*(.*)\s+(\d+)\s*$', address) if s != None: street, number = s.groups() else: street = input_string.split(',')[0] s = re.search(r'(.*)\s+(\d+)', street) if s != None: street, number = s.groups() else: number = '' data['city'] = address else: number, street = re.search(r'(\d*)\s+(.*)', address).groups() data['street'] = street.strip() data['street2'] = street2.strip() data['number'] = number.strip() # Load zip data #try: #data['zip'] = search_zip(data['street'], data['number'], data['city'], #data['state'], data['country']) #except: data['zip'] = '' return data
def unify_geo_data(input_string): """ Return unified geographic data >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina") >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Capital Federal',\ 'street': u'Av Rivadavia', 'street2': '',\ 'longitud': -58.503058099999997, 'zip': 'C1407DZU'} True >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina") >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Buenos Aires', 'street': u'Gral. Paz',\ 'street2': '', 'longitud': -58.513873400000001} True >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina") >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\ 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\ 'number': u'360', 'state': u'Capital Federal',\ 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998} True >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,VICENTE LOPEZ,BUENOS AIRES,Argentina") >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\ 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\ 'number': u'767', 'state': u'Buenos Aires Province',\ 'street': u'Av Del Libertador Gral. San Martin',\ 'longitud': -58.471314999999997} True >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina") >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\ 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\ 'number': u'255', 'state': u'La Pampa', 'street': u'Pellegrini',\ 'longitud': -64.292496499999999} True """ #print >> sys.stderr, "Unifying:", input_string input_string = input_string.lower() # Remove sporius data for search and store it in street2 street2 = [] for country in street2_searcher.keys(): if country in input_string: for rexp in street2_searcher[country]: match = rexp.search(input_string) if match: street2.append(','.join(match.groups())) input_string = rexp.sub('', input_string) street2 = ','.join(street2) input_string = input_string.encode('ascii', 'ignore') # Search data in geographics database try: place, (lat, lng) = geocode(_st(input_string, " ", " ")) except ValueError: places = list(geocode(input_string, exactly_one=False)) i = mostequivalent(map(lambda (a, b): a, places), input_string) place, (lat, lng) = places[i] data = {} result = map(lambda s: s.strip(), place.split(',')) result = [u''] * (4 - len(result)) + result # Ordering data if len(result) == 4: address, data['city'], data['state'], data['country'] = result else: raise RuntimeError('Exists more than 4 tokens in the place.') data['latitud'] = lat data['longitud'] = lng # Split address data if data['country'] in [ 'Argentina', ]: s = re.search(r'^\s*(.*)\s+(\d+)\s*$', address) if s != None: street, number = s.groups() else: street = input_string.split(',')[0] s = re.search(r'(.*)\s+(\d+)', street) if s != None: street, number = s.groups() else: number = '' data['city'] = address else: number, street = re.search(r'(\d*)\s+(.*)', address).groups() data['street'] = street.strip() data['street2'] = street2.strip() data['number'] = number.strip() # Load zip data #try: #data['zip'] = search_zip(data['street'], data['number'], data['city'], #data['state'], data['country']) #except: data['zip'] = '' return data
def unify_geo_data(input_string): """ Return unified geographic data >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina") >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Capital Federal',\ 'street': u'Av Rivadavia', 'street2': '',\ 'longitud': -58.503058099999997, 'zip': 'C1407DZU'} True >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina") >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\ 'country': u'Argentina', 'number': u'9858',\ 'state': u'Buenos Aires', 'street': u'Gral. Paz',\ 'street2': '', 'longitud': -58.513873400000001} True >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB, CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina") >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\ 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\ 'number': u'360', 'state': u'Capital Federal',\ 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998} True >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1, VICENTE LOPEZ,BUENOS AIRES,Argentina") >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\ 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\ 'number': u'767', 'state': u'Buenos Aires Province',\ 'street': u'Av Del Libertador Gral. San Martin',\ 'longitud': -58.471314999999997} True >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina") >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\ 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\ 'number': u'255', 'state': u'La Pampa', \ 'street': u'Pellegrini',\ 'longitud': -64.292496499999999} True """ input_string = input_string.lower() # Remove sporius data for search and store it in street2 street2 = [] for country in street2_searcher.keys(): if country in input_string: for rexp in street2_searcher[country]: match = rexp.search(input_string) if match: street2.append(','.join(match.groups())) input_string = rexp.sub('', input_string) street2 = ','.join(street2) input_string = input_string.encode('ascii', 'ignore') # Search data in geographics database try: _gc = geocode(_st(input_string, " ", " ")) if _gc is None: return {'error': 'No geocoding service available'} if not _gc: return {'error': 'No answer'} except GeocoderTimedOut: return {'error': 'Connection timeout'} if len(_gc) > 1: i = mostequivalent(map(lambda (a, b): a, _gc), input_string) _gc = _gc[i] else: _gc = _gc[0] place, lat, lng = [_gc[k] for k in ['display_name', 'lat', 'lon']] _logger.debug("PLACE: %s" % place) for _re in address_re: result = _re.search(place) if result: result = result.groupdict() break _logger.debug("IGNORE: %s" % _re) assert result is not None, "Geolocalization return wrong address." _logger.debug("RESULT: %s" % result) data = { 'country': result['country'], 'zip': result['zip'], 'state': result['state'], 'city': result['city'], 'nbhd': result['nbhd'], 'number': result['number'], 'street': result['street'], 'street2': result['building'], 'latitud': float(lat), 'longitud': float(lng), } return data