예제 #1
0
def geolocate(url):
    """
    Finds data about url in geolocation database and transfers it mongodb

    :param url: url to lacate
    :return: geolocation_id
    """
    from geoip2.database import Reader
    from geoip2.webservice import Client
    from geoip2.errors import GeoIP2Error, HTTPError

    geolocation_data = dict()

    try:
        ip = url_to_ip(url)

        response = None

        if config.GEOIP2_WEB_SERVICE_USER_ID and config.GEOIP2_WEB_SERVICE_LICENSE_KEY \
                and config.GEOIP2_WEB_SERVICE_TYPE:
            client = Client(config.GEOIP2_WEB_SERVICE_USER_ID,
                            config.GEOIP2_WEB_SERVICE_LICENSE_KEY)

            if config.GEOIP2_WEB_SERVICE_TYPE == 'country':
                response = client.country(ip).country
            elif config.GEOIP2_WEB_SERVICE_TYPE == 'city':
                response = client.city(ip).city
            elif config.GEOIP2_WEB_SERVICE_TYPE == 'insights':
                response = client.insights(ip).insights
        elif config.GEOIP2_DATABASE_PATH and config.GEOIP2_DATABASE_TYPE:
            reader = Reader(config.GEOIP2_DATABASE_PATH)

            if config.GEOIP2_DATABASE_TYPE == 'country':
                response = reader.country(ip).country
            if config.GEOIP2_DATABASE_TYPE == 'city':
                response = reader.city(ip).city
        else:
            reader = Reader(
                '/opt/project/worker/utils/geoloc_databases/GeoLite2-City.mmdb'
            )
            response = reader.city(ip)

        for name in dir(response):
            value = getattr(response, name)
            if not name.startswith('_') and not type(value) == dict:
                geolocation_data[name] = value.__dict__

    except (GeoIP2Error, HTTPError) as error:
        geolocation_data = {'_error': str(error)}

    finally:
        duplicate = db.geolocation.find_one(geolocation_data)

        if duplicate:
            return duplicate['_id']

        geolocation_id = db.geolocation.insert_one(geolocation_data)

        return str(geolocation_id.inserted_id)
예제 #2
0
def get_country(ip):
    from geoip2.database import Reader
    from geoip2.errors import AddressNotFoundError
    try:
        # Насколько я понимаю spark неспособен создать broadcast верси database.Reader
        # объект, поэтому приходится его создавать при каждом вызове функции
        # хотя возможно, производится какая-то оптимизация внутри udf()
        reader = Reader(giLite2db_path)
        response = reader.country(ip)
        country = response.country.name
        return country if country else "NOT_FOUND"
    except AddressNotFoundError:
        return "NOT_FOUND"
예제 #3
0
def _get_countries(addresses: List[str], reader: Reader) -> List[str]:
    res = set()
    for ip in addresses:
        try:
            geoip_result = reader.country(ip)
            this_result = geoip_result.country.name
            if not this_result:
                this_result = geoip_result.continent.name
            if not this_result:
                raise AddressNotFoundError
            res.add(this_result)
        except AddressNotFoundError:
            # TODO: Add entry specifying that at least one location has not been found
            continue
    return list(res)
예제 #4
0
def _get_countries(addresses: Iterable[str],
                   reader: Reader) -> Tuple[List[str], List[str]]:
    locations = set()
    unresolved_ips = set()
    for ip in addresses:
        try:
            geoip_result = reader.country(ip)
            this_result = geoip_result.country.name
            if not this_result:
                this_result = geoip_result.continent.name
            if not this_result:
                raise AddressNotFoundError
            locations.add(this_result)
        except AddressNotFoundError:
            unresolved_ips.add(ip)
            continue
    return list(locations), list(unresolved_ips)
예제 #5
0
파일: utiles.py 프로젝트: ltdicai/tdc-tp2
def obtener_pais(ip):
    try:
        country_database = Reader("GeoLite2-Country.mmdb")
    except:
        country_database = None

    if ip is None or country_database is None:
        return "Unknown"
    if ip.startswith("10.") or ip.startswith("192.168."):
        return "Local"
    try:
        response = country_database.country(ip.strip())
        if response.country.name:
            return response.country.name
        else:
            return response.continent.name
    except Exception, exc:
        print exc
        return "Unknown"