def geolocate(url): """ Finds data about url in geolocation database and transfers it mongodb :param url: url to lacate :return: geolocation_id """ from geoip2.database import Reader from geoip2.webservice import Client from geoip2.errors import GeoIP2Error, HTTPError geolocation_data = dict() try: ip = url_to_ip(url) response = None if config.GEOIP2_WEB_SERVICE_USER_ID and config.GEOIP2_WEB_SERVICE_LICENSE_KEY \ and config.GEOIP2_WEB_SERVICE_TYPE: client = Client(config.GEOIP2_WEB_SERVICE_USER_ID, config.GEOIP2_WEB_SERVICE_LICENSE_KEY) if config.GEOIP2_WEB_SERVICE_TYPE == 'country': response = client.country(ip).country elif config.GEOIP2_WEB_SERVICE_TYPE == 'city': response = client.city(ip).city elif config.GEOIP2_WEB_SERVICE_TYPE == 'insights': response = client.insights(ip).insights elif config.GEOIP2_DATABASE_PATH and config.GEOIP2_DATABASE_TYPE: reader = Reader(config.GEOIP2_DATABASE_PATH) if config.GEOIP2_DATABASE_TYPE == 'country': response = reader.country(ip).country if config.GEOIP2_DATABASE_TYPE == 'city': response = reader.city(ip).city else: reader = Reader( '/opt/project/worker/utils/geoloc_databases/GeoLite2-City.mmdb' ) response = reader.city(ip) for name in dir(response): value = getattr(response, name) if not name.startswith('_') and not type(value) == dict: geolocation_data[name] = value.__dict__ except (GeoIP2Error, HTTPError) as error: geolocation_data = {'_error': str(error)} finally: duplicate = db.geolocation.find_one(geolocation_data) if duplicate: return duplicate['_id'] geolocation_id = db.geolocation.insert_one(geolocation_data) return str(geolocation_id.inserted_id)
def get_country(ip): from geoip2.database import Reader from geoip2.errors import AddressNotFoundError try: # Насколько я понимаю spark неспособен создать broadcast верси database.Reader # объект, поэтому приходится его создавать при каждом вызове функции # хотя возможно, производится какая-то оптимизация внутри udf() reader = Reader(giLite2db_path) response = reader.country(ip) country = response.country.name return country if country else "NOT_FOUND" except AddressNotFoundError: return "NOT_FOUND"
def _get_countries(addresses: List[str], reader: Reader) -> List[str]: res = set() for ip in addresses: try: geoip_result = reader.country(ip) this_result = geoip_result.country.name if not this_result: this_result = geoip_result.continent.name if not this_result: raise AddressNotFoundError res.add(this_result) except AddressNotFoundError: # TODO: Add entry specifying that at least one location has not been found continue return list(res)
def _get_countries(addresses: Iterable[str], reader: Reader) -> Tuple[List[str], List[str]]: locations = set() unresolved_ips = set() for ip in addresses: try: geoip_result = reader.country(ip) this_result = geoip_result.country.name if not this_result: this_result = geoip_result.continent.name if not this_result: raise AddressNotFoundError locations.add(this_result) except AddressNotFoundError: unresolved_ips.add(ip) continue return list(locations), list(unresolved_ips)
def obtener_pais(ip): try: country_database = Reader("GeoLite2-Country.mmdb") except: country_database = None if ip is None or country_database is None: return "Unknown" if ip.startswith("10.") or ip.startswith("192.168."): return "Local" try: response = country_database.country(ip.strip()) if response.country.name: return response.country.name else: return response.continent.name except Exception, exc: print exc return "Unknown"