Beispiel #1
0
def unify_geo_data(input_string):
    """
    Return unified geographic data

    >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina")
    >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Capital Federal',\
                 'street': u'Av Rivadavia', 'street2': '',\
                 'longitud': -58.503058099999997, 'zip': 'C1407DZU'}
    True
    >>> data = unify_geo_data("gral paz 9858, general san martin,
                              buenos aires, argentina")
    >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Buenos Aires', 'street': u'Gral. Paz',\
                 'street2': '', 'longitud': -58.513873400000001}
    True
    >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,
                              CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina")
    >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\
                 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\
                 'number': u'360', 'state': u'Capital Federal',\
                 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998}
    True
    >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,
                              VICENTE LOPEZ,BUENOS AIRES,Argentina")
    >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\
                 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\
                 'number': u'767', 'state': u'Buenos Aires Province',\
                 'street': u'Av Del Libertador Gral. San Martin',\
                 'longitud': -58.471314999999997}
    True
    >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina")
    >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\
                 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\
                 'number': u'255', 'state': u'La Pampa', \
                 'street': u'Pellegrini',\
                 'longitud': -64.292496499999999}
    True
    """
    input_string = input_string.lower()
    # Remove sporius data for search and store it in street2
    street2 = []
    for country in street2_searcher.keys():
        if country in input_string:
            for rexp in street2_searcher[country]:
                match = rexp.search(input_string)
                if match:
                    street2.append(','.join(match.groups()))
                    input_string = rexp.sub('', input_string)
    street2 = ','.join(street2)
    input_string = input_string.encode('ascii', 'ignore')

    # Search data in geographics database
    try:
        _gc = geocode(_st(input_string, " ", " "))
        if _gc is None:
            return {'error': 'No geocoding service available'}
        if not _gc:
            return {'error': 'No answer'}
    except GeocoderTimedOut:
        return {'error': 'Connection timeout'}

    if len(_gc) > 1:
        i = mostequivalent(map(lambda (a, b): a, _gc), input_string)
        _gc = _gc[i]
    else:
        _gc = _gc[0]
    place, lat, lng = [_gc[k] for k in ['display_name', 'lat', 'lon']]
    _logger.debug("PLACE: %s" % place)

    for _re in address_re:
        result = _re.search(place)
        if result:
            result = result.groupdict()
            break
        _logger.debug("IGNORE: %s" % _re)

    if not result:
        return False

    _logger.debug("RESULT: %s" % result)

    data = {
        'country': result['country'],
        'zip': result['zip'],
        'state': result['state'],
        'city': result['city'],
        'nbhd': result['nbhd'],
        'number': result['number'],
        'street': result['street'],
        'street2': result['building'],
        'latitud': float(lat),
        'longitud': float(lng),
    }
    return data
Beispiel #2
0
def unify_geo_data(input_string):
    """
    Return unified geographic data

    >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina")
    >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Capital Federal',\
                 'street': u'Av Rivadavia', 'street2': '',\
                 'longitud': -58.503058099999997, 'zip': 'C1407DZU'}
    True
    >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina")
    >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Buenos Aires', 'street': u'Gral. Paz',\
                 'street2': '', 'longitud': -58.513873400000001}
    True
    >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina")
    >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\
                 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\
                 'number': u'360', 'state': u'Capital Federal',\
                 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998}
    True
    >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,VICENTE LOPEZ,BUENOS AIRES,Argentina")
    >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\
                 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\
                 'number': u'767', 'state': u'Buenos Aires Province',\
                 'street': u'Av Del Libertador Gral. San Martin',\
                 'longitud': -58.471314999999997}
    True
    >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina")
    >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\
                 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\
                 'number': u'255', 'state': u'La Pampa', 'street': u'Pellegrini',\
                 'longitud': -64.292496499999999}
    True
    """
    #print >> sys.stderr, "Unifying:", input_string
    input_string = input_string.lower()
    # Remove sporius data for search and store it in street2
    street2 = []
    for country in street2_searcher.keys():
        if country in input_string:
            for rexp in street2_searcher[country]:
                match = rexp.search(input_string)
                if match:
                    street2.append(','.join(match.groups()))
                    input_string = rexp.sub('', input_string)
    street2 = ','.join(street2)
    input_string = input_string.encode('ascii', 'ignore')
    
    # Search data in geographics database
    try:
        place, (lat, lng) = geocode(_st(input_string, " ", " "))
    except ValueError:
        places = list(geocode(input_string, exactly_one=False))
        i = mostequivalent(map(lambda (a,b): a, places), input_string)
        place, (lat, lng) = places[i]
    data = {}
    result = map(lambda s: s.strip(), place.split(','))
    result = [u'']*(4-len(result)) + result
    # Ordering data
    if len(result) == 4:
        address, data['city'], data['state'], data['country'] = result
    else:
        raise RuntimeError('Exists more than 4 tokens in the place.')
    data['latitud'] = lat
    data['longitud'] = lng
    # Split address data
    if data['country'] in ['Argentina',]:
        s = re.search(r'^\s*(.*)\s+(\d+)\s*$', address)
        if s != None:
            street, number = s.groups()
        else:
            street = input_string.split(',')[0]
            s = re.search(r'(.*)\s+(\d+)', street)
            if s != None:
                street, number = s.groups()
            else:
                number = ''
            data['city'] = address
    else:
        number, street = re.search(r'(\d*)\s+(.*)', address).groups()
    data['street'] = street.strip()
    data['street2'] = street2.strip()
    data['number'] = number.strip()
    # Load zip data
    #try:
        #data['zip'] = search_zip(data['street'], data['number'], data['city'],
                                 #data['state'], data['country'])
    #except:
    data['zip'] = ''
    return data
Beispiel #3
0
def unify_geo_data(input_string):
    """
    Return unified geographic data

    >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina")
    >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Capital Federal',\
                 'street': u'Av Rivadavia', 'street2': '',\
                 'longitud': -58.503058099999997, 'zip': 'C1407DZU'}
    True
    >>> data = unify_geo_data("gral paz 9858, general san martin, buenos aires, argentina")
    >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Buenos Aires', 'street': u'Gral. Paz',\
                 'street2': '', 'longitud': -58.513873400000001}
    True
    >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina")
    >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\
                 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\
                 'number': u'360', 'state': u'Capital Federal',\
                 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998}
    True
    >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,VICENTE LOPEZ,BUENOS AIRES,Argentina")
    >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\
                 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\
                 'number': u'767', 'state': u'Buenos Aires Province',\
                 'street': u'Av Del Libertador Gral. San Martin',\
                 'longitud': -58.471314999999997}
    True
    >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina")
    >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\
                 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\
                 'number': u'255', 'state': u'La Pampa', 'street': u'Pellegrini',\
                 'longitud': -64.292496499999999}
    True
    """
    #print >> sys.stderr, "Unifying:", input_string
    input_string = input_string.lower()
    # Remove sporius data for search and store it in street2
    street2 = []
    for country in street2_searcher.keys():
        if country in input_string:
            for rexp in street2_searcher[country]:
                match = rexp.search(input_string)
                if match:
                    street2.append(','.join(match.groups()))
                    input_string = rexp.sub('', input_string)
    street2 = ','.join(street2)
    input_string = input_string.encode('ascii', 'ignore')

    # Search data in geographics database
    try:
        place, (lat, lng) = geocode(_st(input_string, " ", " "))
    except ValueError:
        places = list(geocode(input_string, exactly_one=False))
        i = mostequivalent(map(lambda (a, b): a, places), input_string)
        place, (lat, lng) = places[i]
    data = {}
    result = map(lambda s: s.strip(), place.split(','))
    result = [u''] * (4 - len(result)) + result
    # Ordering data
    if len(result) == 4:
        address, data['city'], data['state'], data['country'] = result
    else:
        raise RuntimeError('Exists more than 4 tokens in the place.')
    data['latitud'] = lat
    data['longitud'] = lng
    # Split address data
    if data['country'] in [
            'Argentina',
    ]:
        s = re.search(r'^\s*(.*)\s+(\d+)\s*$', address)
        if s != None:
            street, number = s.groups()
        else:
            street = input_string.split(',')[0]
            s = re.search(r'(.*)\s+(\d+)', street)
            if s != None:
                street, number = s.groups()
            else:
                number = ''
            data['city'] = address
    else:
        number, street = re.search(r'(\d*)\s+(.*)', address).groups()
    data['street'] = street.strip()
    data['street2'] = street2.strip()
    data['number'] = number.strip()
    # Load zip data
    #try:
    #data['zip'] = search_zip(data['street'], data['number'], data['city'],
    #data['state'], data['country'])
    #except:
    data['zip'] = ''
    return data
Beispiel #4
0
def unify_geo_data(input_string):
    """
    Return unified geographic data

    >>> data = unify_geo_data("Av. rivadavia 9858, buenos aires, argentina")
    >>> data == {'latitud': -34.637979199999997, 'city': u'Buenos Aires',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Capital Federal',\
                 'street': u'Av Rivadavia', 'street2': '',\
                 'longitud': -58.503058099999997, 'zip': 'C1407DZU'}
    True
    >>> data = unify_geo_data("gral paz 9858, general san martin,
                              buenos aires, argentina")
    >>> data == {'latitud': -34.581238599999999, 'city': '', 'zip': '',\
                 'country': u'Argentina', 'number': u'9858',\
                 'state': u'Buenos Aires', 'street': u'Gral. Paz',\
                 'street2': '', 'longitud': -58.513873400000001}
    True
    >>> data = unify_geo_data("VICTORIA OCAMPO 360 PISO PB,
                              CAPITAL FEDERAL,CAPITAL FEDERAL,Argentina")
    >>> data == {'latitud': -34.601967000000002, 'city': u'Buenos Aires',\
                 'zip': '', 'country': u'Argentina', 'street2': 'piso pb',\
                 'number': u'360', 'state': u'Capital Federal',\
                 'street': u'Victoria Ocampo', 'longitud': -58.364093699999998}
    True
    >>> data = unify_geo_data("AV. DEL LIBERTADOR 767 PISO 1,
                              VICENTE LOPEZ,BUENOS AIRES,Argentina")
    >>> data == {'latitud': -34.527026800000002, 'city': u'Vicente L\xf3pez',\
                 'zip': u'1638', 'country': u'Argentina', 'street2': 'piso 1',\
                 'number': u'767', 'state': u'Buenos Aires Province',\
                 'street': u'Av Del Libertador Gral. San Martin',\
                 'longitud': -58.471314999999997}
    True
    >>> data = unify_geo_data("PELLEGRINI 255,,,SANTA ROSA,LA PAMPA,Argentina")
    >>> data == {'latitud': -36.619441799999997, 'city': u'Santa Rosa',\
                 'zip': u'L6300DRE', 'country': u'Argentina', 'street2': '',\
                 'number': u'255', 'state': u'La Pampa', \
                 'street': u'Pellegrini',\
                 'longitud': -64.292496499999999}
    True
    """
    input_string = input_string.lower()
    # Remove sporius data for search and store it in street2
    street2 = []
    for country in street2_searcher.keys():
        if country in input_string:
            for rexp in street2_searcher[country]:
                match = rexp.search(input_string)
                if match:
                    street2.append(','.join(match.groups()))
                    input_string = rexp.sub('', input_string)
    street2 = ','.join(street2)
    input_string = input_string.encode('ascii', 'ignore')

    # Search data in geographics database
    try:
        _gc = geocode(_st(input_string, " ", " "))
        if _gc is None:
            return {'error': 'No geocoding service available'}
        if not _gc:
            return {'error': 'No answer'}
    except GeocoderTimedOut:
        return {'error': 'Connection timeout'}

    if len(_gc) > 1:
        i = mostequivalent(map(lambda (a, b): a, _gc), input_string)
        _gc = _gc[i]
    else:
        _gc = _gc[0]
    place, lat, lng = [_gc[k] for k in ['display_name', 'lat', 'lon']]
    _logger.debug("PLACE: %s" % place)

    for _re in address_re:
        result = _re.search(place)
        if result:
            result = result.groupdict()
            break
        _logger.debug("IGNORE: %s" % _re)

    assert result is not None, "Geolocalization return wrong address."

    _logger.debug("RESULT: %s" % result)

    data = {
        'country': result['country'],
        'zip': result['zip'],
        'state': result['state'],
        'city': result['city'],
        'nbhd': result['nbhd'],
        'number': result['number'],
        'street': result['street'],
        'street2': result['building'],
        'latitud': float(lat),
        'longitud': float(lng),
    }
    return data