def geolocate(url):
    """
    Finds data about url in geolocation database and transfers it mongodb

    :param url: url to lacate
    :return: geolocation_id
    """
    from geoip2.database import Reader
    from geoip2.webservice import Client
    from geoip2.errors import GeoIP2Error, HTTPError

    geolocation_data = dict()

    try:
        ip = url_to_ip(url)

        response = None

        if config.GEOIP2_WEB_SERVICE_USER_ID and config.GEOIP2_WEB_SERVICE_LICENSE_KEY \
                and config.GEOIP2_WEB_SERVICE_TYPE:
            client = Client(config.GEOIP2_WEB_SERVICE_USER_ID,
                            config.GEOIP2_WEB_SERVICE_LICENSE_KEY)

            if config.GEOIP2_WEB_SERVICE_TYPE == 'country':
                response = client.country(ip).country
            elif config.GEOIP2_WEB_SERVICE_TYPE == 'city':
                response = client.city(ip).city
            elif config.GEOIP2_WEB_SERVICE_TYPE == 'insights':
                response = client.insights(ip).insights
        elif config.GEOIP2_DATABASE_PATH and config.GEOIP2_DATABASE_TYPE:
            reader = Reader(config.GEOIP2_DATABASE_PATH)

            if config.GEOIP2_DATABASE_TYPE == 'country':
                response = reader.country(ip).country
            if config.GEOIP2_DATABASE_TYPE == 'city':
                response = reader.city(ip).city
        else:
            reader = Reader(
                '/opt/project/worker/utils/geoloc_databases/GeoLite2-City.mmdb'
            )
            response = reader.city(ip)

        for name in dir(response):
            value = getattr(response, name)
            if not name.startswith('_') and not type(value) == dict:
                geolocation_data[name] = value.__dict__

    except (GeoIP2Error, HTTPError) as error:
        geolocation_data = {'_error': str(error)}

    finally:
        duplicate = db.geolocation.find_one(geolocation_data)

        if duplicate:
            return duplicate['_id']

        geolocation_id = db.geolocation.insert_one(geolocation_data)

        return str(geolocation_id.inserted_id)
Exemple #2
0
class GeoIP:
    def __init__(self, db='/home/insane/PycharmProjects/octopus/GeoLite2-City.mmdb'):
        self.r = Reader(db)

    def get_continent_code(self, ip):
        return self.r.city(ip).continent

    def get_longitude(self, ip):
        return self.r.city(ip).location.longitude

    def get_latitude(self, ip):
        return self.r.city(ip).location.latitude

    def get_iso_code(self, ip):
        return self.r.city(ip).country.iso_code
Exemple #3
0
def main():
    args = docopt(__doc__)
    out = csv.writer(sys.stdout)
    reader = Reader(args['-f'])
    try:
        ips = [x.strip() for x in open(args['IPFILE'], 'r')]
    except FileNotFoundError:
        if not args['-q']:
            print("IP file {} count not be opened, interpreting as single IP!".format(args['IPFILE']))
        ips = [args['IPFILE'].strip()]

    for ip in ips:
        try:
            resp = reader.city(ip)
        except ValueError:
            if not args['-q']:
                print("{} is not an IP address, skipping!".format(ip), file=sys.stderr)
            continue
        row = []
        if args['-i']:
            row.append(ip)
        if args['-c']:
            row.append(resp.country.names.get('en', '-'))
        row.append(resp.country.iso_code)
        try:
            row.append(resp.city.names['en'])
        except KeyError:
            row.append('-')

        out.writerow(row)
Exemple #4
0
def set_mirror_country(
    mirror_info: Dict[AnyStr, Union[Dict, AnyStr]], ) -> None:
    """
    Set country by IP of a mirror
    :param mirror_info: Dict with info about a mirror
    """

    mirror_name = mirror_info['name']
    try:
        ip = socket.gethostbyname(mirror_name)
    except socket.gaierror:
        logger.error('Can\'t get IP of mirror %s', mirror_name)
        mirror_info['country'] = 'Unknown'
        return
    db = Reader(GEOPIP_DB)
    logger.info('Set country for mirror "%s"', mirror_name)
    try:
        match = db.city(ip)  # type: City
        mirror_info['country'] = match.country.name
    except AddressNotFoundError:
        logger.warning(
            'GeoIP db does not have information about IP "%s"',
            ip,
        )
        mirror_info['country'] = 'Unknown'
 def get_location(cls, ip=None):
     if not ip:
         ip = cls.get_ip()
     try:
         city_client = Reader("./utils/geoip2/GeoLite2-City.mmdb")
         response = city_client.city(ip)
         if response.city.name:
             location = ",".join(
                 [response.country.iso_code, response.city.name])
         else:
             location = ",".join([response.country.iso_code, ""])
     except Exception as e:
         print("fail to get location with geoip2: %s" % str(e))
         try:
             api_key = 'at_IW99hSbVb4uxQq1SbaoIanDbulTbU'
             api_url = 'https://geo.ipify.org/api/v1?'
             url = api_url + 'apiKey=' + api_key + '&ipAddress=' + ip
             temp_region = loads(
                 urlopen(url).read().decode('utf8'))["location"]
             try:
                 location = ",".join(
                     [temp_region["country"], temp_region["city"]])
             except [KeyError, ValueError]:
                 location = temp_region
         except URLError:
             location = "network error"
     return location
Exemple #6
0
def get_location_for_ip(ip_addr):

    try:
        reader = Reader(
            os.path.join(GEOLOCATION_DATABASE_PATH, GEOLOCATION_DATABASE_CITY))
        return reader.city(ip_addr)
    except Exception as e:
        logger.warn("Failed retrieving geoip info for %s: %s" %
                    (ip_addr, str(e)))
        return None
Exemple #7
0
def fetch_geoip(ip_address, language=None):
    # Prepare response object
    response = {}

    # Get hostname from IP address, pass if fail
    try:
        response['ip_address'] = ip_address
        response['hostname'] = socket.gethostbyaddr(ip_address)[0]
    except Exception as ex:
        pass

    # Load GeoLite2 City database
    geoip = Reader(path.join(config.MMDB_PATH, "GeoLite2-City.mmdb"))

    # Try to fetch data and build response, otherwise raise exception
    try:
        data = geoip.city(ip_address)

        # geoip.city
        response['city'] = {
            "name": data.city.name,
            "id": data.city.geoname_id,
            "region": data.subdivisions.most_specific.name,
            "region_code": data.subdivisions.most_specific.iso_code
        }

        # geoip.country
        response['country'] = {
            "name": data.country.name,
            "iso_code": data.country.iso_code,
            "continent": data.continent.name,
            "continent_code": data.continent.code,
            "is_eu": data.country.is_in_european_union
        }

        # geoip.location
        response['location'] = {
            "accuracy_radius": data.location.accuracy_radius,
            "zip": data.postal.code,
            "latitude": data.location.latitude,
            "longitude": data.location.longitude,
            "timezone": data.location.time_zone
        }
    except AddressNotFoundError as ex:
        raise ex

    # Close database instances
    geoip.close()

    # Load GeoLite2 ASN database (optional)
    response['asn'] = fetch_asn(ip_address)

    # Return built response object
    return response
Exemple #8
0
 def find_location(ip):
     reader = Reader(TannerConfig.get('DATA', 'geo_db'))
     try:
         location = reader.city(ip)
         info = dict(
             country=location.country.name,
             country_code=location.country.iso_code,
             city=location.city.name,
             zip_code=location.postal.code,
         )
     except geoip2.errors.AddressNotFoundError:
         info = "NA"  # When IP doesn't exist in the db, set info as "NA - Not Available"
     return info
Exemple #9
0
 def find_location(ip):
     reader = Reader(TannerConfig.get('DATA', 'geo_db'))
     try:
         location = reader.city(ip)
         info = dict(
             country=location.country.name,
             country_code=location.country.iso_code,
             city=location.city.name,
             zip_code=location.postal.code,
         )
     except geoip2.errors.AddressNotFoundError:
         info = "NA"  # When IP doesn't exist in the db, set info as "NA - Not Available"
     return info
 def ip2city_py(ip):
     global reader
     if reader is None:
         # assume all work nodes have mmdb installed in the following path
         reader = Reader(
             "/home/spark/spark-2.4.5-bin-hadoop2.7/maxmind/GeoLite2-City.mmdb"
         )
     try:
         response = reader.city(ip)
         city = response.city.name
         if city is None:
             return None
         return city
     except:
         return None
Exemple #11
0
    def ip2city_py(ip):
        global reader
        if reader is None:
            # assume all work nodes have mmdb installed in the following path
            reader = Reader(
                "/home/cloudera/Desktop/spark_sql_101/maxmind/GeoLite2-City.mmdb"
            )
        try:
            response = reader.city(ip)
            city = response.city.name
            if city is None:
                return None
            return city

        except:
            return None
Exemple #12
0
def statics_city():
    """统计区域数据"""
    city = {}
    geoip2_reader = Reader("GeoLite2-City.mmdb")
    for _ip, _cnt in statics_all()["vistors"].items():
        _city_name = "Unknow"
        try:
            _city = geoip2_reader.city(_ip)
            # _city_name = "{}/{}".format(_city.country.names.get("en","").encode("utf8"),_city.city.names.get("en","").encode("utf8"))
            _city_name = "{}/{}".format(_city.country.names.get("zh-CN", ""),
                                        _city.city.names.get("zh-CN", ""))
        except BaseException as e:
            print(e)
            pass
        city.setdefault(_city_name, 0)
        city[_city_name] += _cnt
    return city
Exemple #13
0
class MaxMindCityDatabase(BaseDriver):
    """
    MaxMind City Database
    """
    def __init__(self, database):
        """
        Constructor
        :param database:    Location of the city-database
        """
        self._reader = Reader(database)

        # Close reader when app closes down
        atexit.register(lambda: self._reader.close())

    def insights(self, ip):
        """
        Get insights in ip
        :param ip:  The ip
        :return:    Insights
        :rtype:     geoip2.models.City
        """
        return self._reader.city(ip)
Exemple #14
0
def extract_location(line):
    global reader

    if reader is None:
        reader = Reader(
            "/home/spark/spark-2.4.5-bin-hadoop2.7/maxmind/GeoLite2-City.mmdb")

    match = pattern.match(line)

    if match:
        ip = match.group(1)
        response = reader.city(ip)
        country = response.country.name
        city = response.city.name

        if city is None:
            return country
        else:
            return "{},{}".format(country, city)

    else:
        return "InvalidLogFound"
Exemple #15
0
def get_lng_lat(ip_address):
    """
    Takes an IPv4 or IPv6 address and returns a 2-tuple of (longitude, latitude).
    """
    coord = None

    if ip_address and is_public_ip_addr(ip_address):

        city_db_path = get_city_db_path()
        db_reader = Reader(city_db_path)

        try:
            result = db_reader.city(ip_address)
            coord = (result.location.longitude, result.location.latitude)

        except AddressNotFoundError:
            _LOGGER.warning('The address %s is not in the GeoLite2 database.',
                            ip_address)

        finally:
            db_reader.close()

    return coord
Exemple #16
0
class Handler(GeoipService.ContextIface):
    def __init__(self, city_db_path):
        # maxmind docs recommend reusing a Reader across requests
        self.reader = Reader(city_db_path)

    def is_healthy(self, context):
        return True

    def get_country(self, context, ip_address):
        # TODO: add anonymous info (tor/ec2/rackspace/etc)
        try:
            response = self.reader.city(ip_address)
        except AddressNotFoundError:
            country_code = ""
            country_name = ""
        else:
            country_code = response.country.iso_code
            country_name = response.country.name

        return GeoIpRecord(
            country_code=country_code,
            country_name=country_name,
        )
def add_location_info(record):
    global reader
    if reader is None:
        reader = Reader(
            "/home/cloudera/spark-2.4.4-bin-hadoop2.6/maxmind/GeoLite2-City.mmdb"
        )

    ip = record.split(",")[13]

    try:
        response = reader.city(ip)
        country = response.country.name
        city = response.city.name
        latitude = response.location.latitude
        longitude = response.location.longitude
        qk = quadkey.from_geo((latitude, longitude), 15)
        acc_num_good_records.add(1)
        return "{},{},{},{},{},{}".format(record, country, city, latitude,
                                          longitude, qk.key)

    except:
        acc_num_bad_records.add(1)
        return "-----"
 def get_location_by_remote_service(cls, ip=None):
     if not ip:
         ip = cls.get_ip()
     if ip.find("error") >= 0 or ip.find(".") < 0:
         location = "ip data error"
     else:
         try:
             city_client = Reader("./utils/geoip2/GeoLite2-City.mmdb")
             response = city_client.city(ip)
             location = {
                 "country":
                 response.country.names["zh-CN"],
                 "subdivision":
                 response.subdivisions.most_specific.names["zh-CN"],
                 "city":
                 response.city.names["zh-CN"],
                 "accuracy_radius":
                 response.location.accuracy_radius,
                 "latitude":
                 response.location.latitude,
                 "longitude":
                 response.location.longitude,
                 "time_zone":
                 response.location.time_zone
             }
         except Exception as e:
             print("fail to get location with geoip2: %s" % str(e))
             try:
                 api_key = 'at_IW99hSbVb4uxQq1SbaoIanDbulTbU'
                 api_url = 'https://geo.ipify.org/api/v1?'
                 url = api_url + 'apiKey=' + api_key + '&ipAddress=' + ip
                 location = loads(
                     urlopen(url).read().decode('utf8'))["location"]
             except URLError:
                 location = "network error"
     return location
class GeoLite(Database):
    """ru, en, de languages, country, continent, city"""
    def __init__(self, path):
        super().__init__(path)
        self._database = Reader(path)

    def _get_main_information(self, location):
        if location:
            return location.city.names.get('ru', '')
        else:
            return ''

    def get_data(self, ips, proc_num, return_dict):
        result = []

        for ip in ips:
            try:
                location = self._database.city(Database.int2ip(ip))
            except AddressNotFoundError:
                location = None

            result.append(self._get_main_information(location))

        return_dict[proc_num] = result
Exemple #20
0
def city():
    geoip2_reader = Reader('GeoLite2-City.mmdb')
    geoip2_reader.city()
Exemple #21
0
class GeoIPHandler(object):
    def __init__(self, data_folder, maxmind_license_key):
        self.data_folder = data_folder
        self.maxmind_license_key = maxmind_license_key
        self.dbfile = abspath(join(self.data_folder, 'GeoLite2-City.mmdb'))
        self.logger = getLogger()
        self.reader = None
        self.reader_manager(action='open')

        self.logger.info('Opening persistent connection to the MaxMind DB...')

    def reader_manager(self, action=None):
        if action == 'open':
            try:
                self.reader = Reader(self.dbfile)
            except FileNotFoundError:
                self.logger.error("Could not find MaxMind DB! Downloading!")
                result_status = self.download()
                if result_status:
                    self.logger.error(
                        "Could not download MaxMind DB! You may need to manually install it."
                    )
                    exit(1)
                else:
                    self.reader = Reader(self.dbfile)
        else:
            self.reader.close()

    def lookup(self, ipaddress):
        ip = ipaddress
        self.logger.debug(
            'Getting lat/long for Tautulli stream using ip with last octet ending in %s',
            ip.split('.')[-1:][0])
        return self.reader.city(ip)

    def update(self):
        today = date.today()

        try:
            dbdate = date.fromtimestamp(stat(self.dbfile).st_mtime)
            db_next_update = date.fromtimestamp(stat(
                self.dbfile).st_mtime) + timedelta(days=30)

        except FileNotFoundError:
            self.logger.error("Could not find MaxMind DB as: %s", self.dbfile)
            self.download()
            dbdate = date.fromtimestamp(stat(self.dbfile).st_mtime)
            db_next_update = date.fromtimestamp(stat(
                self.dbfile).st_mtime) + timedelta(days=30)

        if db_next_update < today:
            self.logger.info("Newer MaxMind DB available, Updating...")
            self.logger.debug(
                "MaxMind DB date %s, DB updates after: %s, Today: %s", dbdate,
                db_next_update, today)
            self.reader_manager(action='close')
            self.download()
            self.reader_manager(action='open')
        else:
            db_days_update = db_next_update - today
            self.logger.debug("MaxMind DB will update in %s days",
                              abs(db_days_update.days))
            self.logger.debug(
                "MaxMind DB date %s, DB updates after: %s, Today: %s", dbdate,
                db_next_update, today)

    def download(self):
        tar_dbfile = abspath(join(self.data_folder, 'GeoLite2-City.tar.gz'))
        maxmind_url = (
            'https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City'
            f'&suffix=tar.gz&license_key={self.maxmind_license_key}')
        downloaded = False

        retry_counter = 0

        while not downloaded:
            self.logger.info('Downloading GeoLite2 DB from MaxMind...')
            try:
                urlretrieve(maxmind_url, tar_dbfile)
                downloaded = True
            except URLError as e:
                self.logger.error("Problem downloading new MaxMind DB: %s", e)
                result_status = 1
                return result_status
            except HTTPError as e:
                if e.code == 401:
                    self.logger.error(
                        "Your MaxMind license key is incorect! Check your config: %s",
                        e)
                    result_status = 1
                    return result_status
                else:
                    self.logger.error(
                        "Problem downloading new MaxMind DB... Trying again: %s",
                        e)
                    sleep(2)
                    retry_counter = (retry_counter + 1)

                if retry_counter >= 3:
                    self.logger.error(
                        "Retried downloading the new MaxMind DB 3 times and failed... Aborting!"
                    )
                    result_status = 1
                    return result_status
        try:
            remove(self.dbfile)
        except FileNotFoundError:
            self.logger.warning(
                "Cannot remove MaxMind DB as it does not exist!")

        self.logger.debug("Opening MaxMind tar file : %s", tar_dbfile)

        tar = taropen(tar_dbfile, 'r:gz')

        for files in tar.getmembers():
            if 'GeoLite2-City.mmdb' in files.name:
                self.logger.debug('"GeoLite2-City.mmdb" FOUND in tar file')
                files.name = basename(files.name)
                tar.extract(files, self.data_folder)
                self.logger.debug('%s has been extracted to %s', files,
                                  self.data_folder)
        tar.close()
        try:
            remove(tar_dbfile)
            self.logger.debug('Removed the MaxMind DB tar file.')
        except FileNotFoundError:
            self.logger.warning(
                "Cannot remove MaxMind DB TAR file as it does not exist!")
Exemple #22
0
        .getOrCreate()

    # (1) Define a normal Python function and match arguments to your UDF
    reader = None

    def ip2city_py(ip):
        global reader
        if reader is None:
            # assume all work nodes have mmdb installed in the following path
<<<<<<< Updated upstream
            reader = Reader("/home/spark/dataaccess_log_analysis/maxmind/GeoLite2-City.mmdb")
=======
            reader = Reader("./GeoLite2-City.mmdb")
>>>>>>> Stashed changes
        try:
            response = reader.city(ip)
            city = response.city.name
            if city is None:
                return None
            return city
        except:
            return None

    # (2) Register UDF function
    ip2city = udf(ip2city_py, StringType())

    # Use it
    page_view = spark.read.csv("hdfs://master/user/spark/spark_sql_101/page_views/data",
                               sep="\t",
                               schema="logtime string, userid int, ip string, page string, \
                                      ref string, os string, os_ver string, agent string")
        stat_all['vistors'][_ip] += _cnt

    for _status, _cnt in _stat['status'].items():
        stat_all['status'].setdefault(_status, 0)
        stat_all['status'][_status] += _cnt

#统计区域数据
stat_all['city'] = {}

from geoip2.database import Reader

geoip2_reader = Reader('GeoLite2-City.mmdb')
for _ip, _cnt in stat_all['vistors'].items():
    _city_name = 'unknow'
    try:
        _city = geoip2_reader.city(_ip)
        _city_name = '{}/{}'.format(_city.country.names.get('en', ''),
                                    _city.city.names.get('en', ''))
        #_city_name = '{}/{}'.format(_city.country.names.get('zh-CN', ''), _city.city.names.get('zh-CN', ''))
    except BaseException as e:
        print(e)
        pass
    stat_all['city'].setdefault(_city_name, 0)
    stat_all['city'][_city_name] += _cnt

geoip2_reader.close()

# 打印结果

print('=' * 70)
print('|1. 概览{:>61}|'.format(''))
Exemple #24
0
def logparse(
        log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention,
        influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode):
    # Preparing variables and params
    ips = {}
    geohash_fields = {}
    geohash_tags = {}
    log_data_fields = {}
    log_data_tags = {}
    nginx_log = {}
    hostname = uname()[1]
    client = InfluxDBClient(
        host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database)

    try:
        logging.debug('Testing InfluxDB connection')
        version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version']
        logging.debug(f'Influxdb version: {version}')
    except ConnectionError as e:
        logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n'
                         f'Error: {e}'
                        )
        exit(1)

    try:
        databases = [db['name'] for db in client.get_list_database()]
        if influxdb_database in databases:    
            logging.debug(f'Found database: {influxdb_database}')
    except InfluxDBClientError as e:
        logging.critical('Error getting database list! Please check your InfluxDB configuration.\n'
                         f'Error: {e}'
                        )
        exit(1)

    if influxdb_database not in databases:
        logging.info(f'Creating database: {influxdb_database}')
        client.create_database(influxdb_database)

        retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)]
        if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies:
            logging.info(f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})')
            client.create_retention_policy(name=f'{influxdb_database} {influxdb_retention}-{influxdb_shard}', duration=influxdb_retention, replication='1',
                                                database=influxdb_database, default=True, shard_duration=influxdb_shard)

    re_ipv4 = IPV4_NGINX_LOG_LINE
    re_ipv6 = IPV6_NGINX_LOG_LINE

    gi = Reader(geoip_db_path)

    if send_nginx_logs in ('true', 'True'):
        send_logs = True
    else:
        send_logs = False
        re_ipv4 = IPV4_REGEX
        re_ipv6 = IPV6_REGEX
        logging.info('SEND_NGINX_LOGS set to false')
        pass
    # if not regex_tester(log_path,3):
    #     if send_logs:
    #         re_ipv4 = IPV4_REGEX
    #         re_ipv6 = IPV6_REGEX
    #         send_logs = False
    #         logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..')

    # Main loop to parse access.log file in tailf style with sending metrics.
    with open(log_path, 'r') as log_file:
        logging.info('Starting log parsing')
        str_results = stat(log_path)
        st_size = str_results[6]
        log_file.seek(st_size)
        while True:
            geo_metrics = []
            log_metrics = []
            where = log_file.tell()
            line = log_file.readline()
            inodenew = stat(log_path).st_ino
            if inode != inodenew:
                break
            if not line:
                sleep(1)
                log_file.seek(where)
            else:
                if re_ipv4.match(line):
                    m = re_ipv4.match(line)
                    ip = m.group(1)
                    log = re_ipv4
                elif re_ipv6.match(line):
                    m = re_ipv6.match(line)
                    ip = m.group(1)
                    log = re_ipv6
                else:
                    logging.warning('Failed to match regex that previously matched!? Skipping this line!\n'
                                    'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n' 
                                    f'Line: {line}'
                                   )
                    continue
                if ipadd(ip).iptype() == 'PUBLIC' and ip:
                    info = gi.city(ip)
                    if info is not None:
                        geohash = encode(info.location.latitude, info.location.longitude)
                        geohash_fields['count'] = 1
                        geohash_tags['geohash'] = geohash
                        geohash_tags['ip'] = ip
                        geohash_tags['host'] = hostname
                        geohash_tags['country_code'] = info.country.iso_code
                        geohash_tags['country_name'] = info.country.name
                        geohash_tags['state'] = info.subdivisions.most_specific.name
                        geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code
                        geohash_tags['city'] = info.city.name
                        geohash_tags['postal_code'] = info.postal.code
                        geohash_tags['latitude'] = info.location.latitude
                        geohash_tags['longitude'] = info.location.longitude
                        ips['tags'] = geohash_tags
                        ips['fields'] = geohash_fields
                        ips['measurement'] = geo_measurement
                        geo_metrics.append(ips)
                        logging.debug(f'Geo metrics: {geo_metrics}')
                        try:
                            client.write_points(geo_metrics)
                        except (InfluxDBServerError, ConnectionError) as e:
                            logging.error('Error writing data to InfluxDB! Check your database!\n'
                                          f'Error: {e}'
                                         )

                if send_logs:
                    data = search(log, line)
                    if ipadd(ip).iptype() == 'PUBLIC' and ip:
                        info = gi.city(ip)
                        if info is not None:
                            datadict = data.groupdict()
                            log_data_fields['count'] = 1
                            log_data_fields['bytes_sent'] = int(datadict['bytes_sent'])
                            log_data_fields['request_time'] = float(datadict['request_time'])
                            if datadict['connect_time'] == '-':
                                log_data_fields['connect_time'] = 0.0
                            else:
                                log_data_fields['connect_time'] = float(datadict['connect_time'])
                            log_data_tags['ip'] = datadict['ipaddress']
                            log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z')
                            log_data_tags['remote_user'] = datadict['remote_user']
                            log_data_tags['method'] = datadict['method']
                            log_data_tags['referrer'] = datadict['referrer']
                            log_data_tags['host'] = datadict['host']
                            log_data_tags['http_version'] = datadict['http_version']
                            log_data_tags['status_code'] = datadict['status_code']
                            log_data_tags['bytes_sent'] = datadict['bytes_sent']
                            log_data_tags['url'] = datadict['url']
                            log_data_tags['user_agent'] = datadict['user_agent']
                            log_data_tags['request_time'] = datadict['request_time']
                            log_data_tags['connect_time'] = datadict['connect_time']
                            log_data_tags['city'] = datadict['city']
                            log_data_tags['country_code'] = datadict['country_code']
                            log_data_tags['country_name'] = info.country.name
                            nginx_log['tags'] = log_data_tags
                            nginx_log['fields'] = log_data_fields
                            nginx_log['measurement'] = log_measurement
                            log_metrics.append(nginx_log)
                            logging.debug(f'NGINX log metrics: {log_metrics}')
                            try:
                                client.write_points(log_metrics)
                            except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e:
                                logging.error('Error writing data to InfluxDB! Check your database!\n'
                                            f'Error: {e}'
                                            )
Exemple #25
0
            flatten(current[k], new_key, result)
    else:
        result[key] = current
    return result


for message in consumer:
    # message value and key are raw bytes -- decode if necessary!
    # e.g., for unicode: `message.value.decode('utf-8')`
    # print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
    #                                  message.offset, message.key,
    #                                 message.value))

    data = json.loads(message.value)
    try:
        res = mmdb.city(data['dest_ip'])
        asn = asndb.asn(data['dest_ip'])
        data['dest_latitude'] = res.location.latitude
        data['dest_longitude'] = res.location.longitude
        data['dest_country_name'] = res.country.names['en']
        data['dest_asn'] = asn.autonomous_system_number
        data['dest_autonomous_system'] = asn.autonomous_system_organization
    except:
        data['dest_latitude'] = 0.0
        data['dest_longitude'] = 0.0
        data['dest_country_name'] = 'unknown'
        data['dest_asn'] = 'unknown'
        data['dest_autonomous_system'] = 'unknown'
    try:
        res = mmdb.city(data['src_ip'])
        asn = asndb.asn(data['src_ip'])
Exemple #26
0
class GeoIPReader:
    """Slim wrapper around GeoIP API"""
    def __init__(self):
        self.__reader: Optional[Reader] = None
        self.__last_mtime: float = 0.0
        self.__open()

    def __open(self):
        """Get GeoIP Reader, if configured, otherwise none"""
        path = CONFIG.y("geoip")
        if path == "" or not path:
            return
        try:
            self.__reader = Reader(path)
            self.__last_mtime = stat(path).st_mtime
            LOGGER.info("Loaded GeoIP database", last_write=self.__last_mtime)
        except OSError as exc:
            LOGGER.warning("Failed to load GeoIP database", exc=exc)

    def __check_expired(self):
        """Check if the geoip database has been opened longer than 8 hours,
        and re-open it, as it will probably will have been re-downloaded"""
        now = time()
        diff = datetime.fromtimestamp(now) - datetime.fromtimestamp(
            self.__last_mtime)
        diff_hours = diff.total_seconds() // 3600
        if diff_hours >= 8:
            LOGGER.info("GeoIP databased loaded too long, re-opening",
                        diff=diff)
            self.__open()

    @property
    def enabled(self) -> bool:
        """Check if GeoIP is enabled"""
        return bool(self.__reader)

    def city(self, ip_address: str) -> Optional[City]:
        """Wrapper for Reader.city"""
        with Hub.current.start_span(
                op="authentik.events.geo.city",
                description=ip_address,
        ):
            if not self.enabled:
                return None
            self.__check_expired()
            try:
                return self.__reader.city(ip_address)
            except (GeoIP2Error, ValueError):
                return None

    def city_dict(self, ip_address: str) -> Optional[GeoIPDict]:
        """Wrapper for self.city that returns a dict"""
        city = self.city(ip_address)
        if not city:
            return None
        city_dict: GeoIPDict = {
            "continent": city.continent.code,
            "country": city.country.iso_code,
            "lat": city.location.latitude,
            "long": city.location.longitude,
            "city": "",
        }
        if city.city.name:
            city_dict["city"] = city.city.name
        return city_dict
Exemple #27
0
class NginxLogParser():
    POLLING_PERIOD = 3
    RE_IPV4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
    RE_IPV6 = compile(
        r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))'
    )  # NOQA
    RE_LOGIPV4 = compile(
        r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])',
        IGNORECASE)  # NOQA
    RE_LOGIPV6 = compile(
        r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])',
        IGNORECASE)  # NOQA

    DEFAULT_LOG_PATH = '/config/log/nginx/access.log'
    DEFAULT_INFLUX_HOST = 'localhost'
    DEFAULT_INFLUX_HOST_PORT = '8086'
    DEFAULT_INFLUX_DATABASE = 'geoip2influx'
    DEFAULT_INFLUX_USER = '******'
    DEFAULT_INFLUX_PASS = '******'
    DEFAULT_INFLUX_RETENTION = '7d'
    DEFAULT_INFLUX_SHARD = '1d'
    DEFAULT_GEO_MEASUREMENT = 'geoip2influx'
    DEFAULT_LOG_MEASUREMENT = 'nginx_access_logs'
    DEFAULT_SEND_NGINX_LOGS = 'true'

    def __init__(
        self,
        geoip_db_path='/config/geoip2db/GeoLite2-City.mmdb',
        log_path=DEFAULT_LOG_PATH,
        influxdb_host=DEFAULT_INFLUX_HOST,
        influxdb_port=DEFAULT_INFLUX_HOST_PORT,
        influxdb_database=DEFAULT_INFLUX_DATABASE,
        influxdb_user=DEFAULT_INFLUX_USER,
        influxdb_user_pass=DEFAULT_INFLUX_PASS,
        influxdb_retention=DEFAULT_INFLUX_RETENTION,
        influxdb_shard=DEFAULT_INFLUX_SHARD,
        geo_measurement=DEFAULT_GEO_MEASUREMENT,
        log_measurement=DEFAULT_LOG_MEASUREMENT,
        send_nginx_logs=DEFAULT_SEND_NGINX_LOGS,
    ):
        self.geoip_db_path = geoip_db_path
        self.log_path = log_path
        self.influxdb_host = influxdb_host
        self.influxdb_port = influxdb_port
        self.influxdb_database = influxdb_database
        self.influxdb_user = influxdb_user
        self.influxdb_user_pass = influxdb_user_pass
        self.influxdb_retention = influxdb_retention
        self.influxdb_shard = influxdb_shard
        self.geo_measurement = geo_measurement
        self.log_measurement = log_measurement
        self.send_nginx_logs = send_nginx_logs
        self.geoip_db_path = geoip_db_path
        logging.debug('Log parser config:' +
                      f'\n geoip_db_path      :: {self.geoip_db_path}' +
                      f'\n log_path           :: {self.log_path}' +
                      f'\n influxdb_host      :: {self.influxdb_host}' +
                      f'\n influxdb_port      :: {self.influxdb_port}' +
                      f'\n influxdb_database  :: {self.influxdb_database}' +
                      f'\n influxdb_retention :: {self.influxdb_retention}' +
                      f'\n influxdb_shard     :: {self.influxdb_shard}' +
                      f'\n influxdb_user      :: {self.influxdb_user}' +
                      f'\n influxdb_user_pass :: {self.influxdb_user_pass}' +
                      f'\n geo_measurement    :: {self.geo_measurement}' +
                      f'\n log_measurement    :: {self.log_measurement}' +
                      f'\n send_nginx_logs    :: {self.send_nginx_logs}')
        self.influxdb = self.init_influxdb()
        self.geoip = Reader(self.geoip_db_path)
        self.hostname = uname()[1]

    @classmethod
    def from_env(cls):
        # Getting params from envs
        return cls(
            log_path=env.get('NGINX_LOG_PATH', cls.DEFAULT_LOG_PATH),
            influxdb_host=env.get('INFLUX_HOST', cls.DEFAULT_INFLUX_HOST),
            influxdb_port=env.get('INFLUX_HOST_PORT',
                                  cls.DEFAULT_INFLUX_HOST_PORT),
            influxdb_database=env.get('INFLUX_DATABASE',
                                      cls.DEFAULT_INFLUX_DATABASE),
            influxdb_user=env.get('INFLUX_USER', cls.DEFAULT_INFLUX_USER),
            influxdb_user_pass=env.get('INFLUX_PASS', cls.DEFAULT_INFLUX_PASS),
            influxdb_retention=env.get('INFLUX_RETENTION',
                                       cls.DEFAULT_INFLUX_RETENTION),
            influxdb_shard=env.get('INFLUX_SHARD', cls.DEFAULT_INFLUX_SHARD),
            geo_measurement=env.get('GEO_MEASUREMENT',
                                    cls.DEFAULT_GEO_MEASUREMENT),
            log_measurement=env.get('LOG_MEASUREMENT',
                                    cls.DEFAULT_LOG_MEASUREMENT),
            send_nginx_logs=env.get('SEND_NGINX_LOGS',
                                    cls.DEFAULT_SEND_NGINX_LOGS),
        )

    def regex_tester(self, N=3):
        """Verify the regex to use on log file.

        Try to parse the last N lines of the log file. wait up to 1 min for a valid log.
        If no enriched log can be parsed, only extract the ip, which assu,e default nginx log format.
        """
        time_out = time() + 60
        while True:
            assert N >= 0
            pos = N + 1
            lines = []
            with open(self.log_path) as f:
                while len(lines) <= N:
                    try:
                        f.seek(-pos, 2)
                    except IOError:
                        f.seek(0)
                        break
                    finally:
                        lines = list(f)
                    pos *= 2
            log_lines = lines[-N:]
            for line in log_lines:
                if self.RE_IPV4.match(line):
                    if self.RE_LOGIPV4.match(line):
                        logging.debug(
                            f'Regex is matching {self.log_path} continuing...')
                        return True
                if self.RE_IPV6.match(line):
                    if self.RE_LOGIPV6.match(line):
                        logging.debug(
                            f'Regex is matching {self.log_path} continuing...')
                        return True
                else:
                    logging.debug(f'Testing regex on: {self.log_path}')
                    sleep(2)
            if time() > time_out:
                logging.warning(f'Failed to match regex on: {self.log_path}')
                break

    def file_exists(self):
        """ Verify the log file and geoip db validity."""
        time_out = time() + 30
        while True:
            file_list = [self.log_path, self.geoip_db_path]
            if not exists(self.log_path):
                logging.warning((f'File: {self.log_path} not found...'))
                sleep(1)
            if not exists(self.geoip_db_path):
                logging.warning((f'File: {self.geoip_db_path} not found...'))
                sleep(1)
            if all([isfile(f) for f in file_list]):
                for f in file_list:
                    logging.debug(f'Found: {f}')
                return True
            if time() > time_out:
                if not exists(self.geoip_db_path) and not exists(
                        self.log_path):
                    logging.critical(
                        f"Can't find: {self.geoip_db_path} or {self.log_path} exiting!"
                    )
                    break
                elif not exists(self.geoip_db_path):
                    logging.critical(
                        f"Can't find: {self.geoip_db_path}, exiting!")
                    break
                elif not exists(self.log_path):
                    logging.critical(f"Can't find: {self.log_path}, exiting!")
                    break

    def init_influxdb(self):
        client = InfluxDBClient(
            host=self.influxdb_host,
            port=self.influxdb_port,
            username=self.influxdb_user,
            password=self.influxdb_user_pass,
            database=self.influxdb_database,
        )

        try:
            logging.debug('Testing InfluxDB connection')
            version = client.request(
                'ping',
                expected_response_code=204).headers['X-Influxdb-Version']
            logging.debug(f'Influxdb version: {version}')
        except ConnectionError as e:
            logging.critical(
                f'Error testing connection to InfluxDB. Please check your url/hostname.\nError: {e}'
            )
            raise

        try:
            databases = [db['name'] for db in client.get_list_database()]
            if self.influxdb_database in databases:
                logging.debug(f'Found database: {self.influxdb_database}')
        except InfluxDBClientError as e:
            logging.critical(
                f'Error getting database list! Please check your InfluxDB configuration.\nError: {e}'
            )
            raise

        if self.influxdb_database not in databases:
            logging.info(f'Creating database: {self.influxdb_database}')
            client.create_database(self.influxdb_database)

            retention_policies = [
                policy['name']
                for policy in client.get_list_retention_policies(
                    database=self.influxdb_database)
            ]
            if f'{self.influxdb_database} {self.influxdb_retention}-{self.influxdb_shard}' not in retention_policies:
                logging.info(
                    f'Creating {self.influxdb_database} retention policy ({self.influxdb_retention}-{self.influxdb_shard})'
                )
                client.create_retention_policy(
                    name=
                    f'{self.influxdb_database} {self.influxdb_retention}-{self.influxdb_shard}',
                    duration=self.influxdb_retention,
                    replication='1',
                    database=self.influxdb_database,
                    default=True,
                    shard_duration=self.influxdb_shard)
        return client

    def store_geo_metric(self, ip, geo_info, log_data):
        geo_metrics = []
        geohash = encode(geo_info.location.latitude,
                         geo_info.location.longitude)
        geohash_fields = {'count': 1}

        geohash_tags = {}
        geohash_tags['geohash'] = geohash
        geohash_tags['ip'] = ip
        geohash_tags['host'] = self.hostname
        geohash_tags['country_code'] = geo_info.country.iso_code
        geohash_tags['country_name'] = geo_info.country.name
        geohash_tags['state'] = geo_info.subdivisions.most_specific.name
        geohash_tags[
            'state_code'] = geo_info.subdivisions.most_specific.iso_code
        geohash_tags['city'] = geo_info.city.name
        geohash_tags['postal_code'] = geo_info.postal.code
        geohash_tags['latitude'] = geo_info.location.latitude
        geohash_tags['longitude'] = geo_info.location.longitude
        geo_metrics = [{
            'tags': geohash_tags,
            'fields': geohash_fields,
            'measurement': self.geo_measurement,
        }]
        logging.debug(f'Geo metrics: {geo_metrics}')
        try:
            self.influxdb.write_points(geo_metrics)
        except (InfluxDBServerError, ConnectionError) as e:
            logging.error(
                f'Error writing data to InfluxDB! Check your database!\nError: {e}'
            )

    def store_log_metric(self, ip, geo_info, log_data):
        log_data_fields = {
            'count': 1,
            'bytes_sent': int(log_data['bytes_sent']),
            'request_time': float(log_data['request_time']),
        }
        # If several connection times are provided, use the last one
        log_data['connect_time'] = log_data['connect_time'].split(',')[-1]
        if log_data['connect_time'] == '-':
            log_data_fields['connect_time'] = 0.0
        else:
            log_data_fields['connect_time'] = float(log_data['connect_time'])

        log_data_tags = {}
        log_data_tags['ip'] = log_data['ipaddress']
        log_data_tags['datetime'] = datetime.strptime(log_data['dateandtime'],
                                                      '%d/%b/%Y:%H:%M:%S %z')
        log_data_tags['remote_user'] = log_data['remote_user']
        log_data_tags['method'] = log_data['method']
        log_data_tags['referrer'] = log_data['referrer']
        log_data_tags['host'] = log_data['host']
        log_data_tags['http_version'] = log_data['http_version']
        log_data_tags['status_code'] = log_data['status_code']
        log_data_tags['bytes_sent'] = log_data['bytes_sent']
        log_data_tags['url'] = log_data['url']
        log_data_tags['user_agent'] = log_data['user_agent']
        log_data_tags['request_time'] = log_data['request_time']
        log_data_tags['connect_time'] = log_data['connect_time']
        log_data_tags['city'] = geo_info.city.name if geo_info else "-"
        log_data_tags[
            'country_code'] = geo_info.country.iso_code if geo_info else "-"
        log_data_tags[
            'country_name'] = geo_info.country.name if geo_info else "-"
        log_metrics = [{
            'tags': log_data_tags,
            'fields': log_data_fields,
            'measurement': self.log_measurement,
        }]
        logging.debug(f'NGINX log metrics: {log_metrics}')
        try:
            self.influxdb.write_points(log_metrics)
        except (InfluxDBServerError, InfluxDBClientError,
                ConnectionError) as e:
            logging.error(
                f'Error writing data to InfluxDB! Check your database!\nError: {e}'
            )

    def logparse(self):
        inode = stat(self.log_path).st_ino

        # Determine whether to use enriched or basic log parsing
        send_logs = self.send_nginx_logs.lower() == 'true'
        if not self.regex_tester() and send_logs:
            send_logs = False
            logging.warning(
                'NGINX log metrics disabled! Double check your NGINX custom log format..'
            )
        if send_logs:
            re_log_ipv4 = self.RE_LOGIPV4
            re_log_ipv6 = self.RE_LOGIPV6
        else:
            re_log_ipv4 = self.RE_IPV4
            re_log_ipv6 = self.RE_IPV6

        # Main loop to parse access.log file in tailf style with sending metrics.
        with open(self.log_path, 'r') as log_file:
            logging.info('Starting log parsing')
            str_results = stat(self.log_path)
            st_size = str_results[6]
            log_file.seek(st_size)

            # Keep waiting for new logs
            while True:
                where = log_file.tell()
                line = log_file.readline()
                inodenew = stat(self.log_path).st_ino
                if inode != inodenew:
                    # File has changed, we need to reload it, exit this parsing loop
                    break
                if not line:
                    # No new data, wait for a bit
                    sleep(self.POLLING_PERIOD)
                    log_file.seek(where)
                else:
                    re_match = re_log_ipv4.match(line)
                    if not re_match:
                        re_match = re_log_ipv6.match(line)
                    if not re_match:
                        logging.warning(
                            'Failed to match regex that previously matched!? Skipping this line!\n'
                            'Please share the log line below on Discord or Github!\n'
                            f'Line: {line}')
                        continue
                    log_data = re_match.groupdict()
                    ip = log_data.get('ipaddress', re_match.group(1))
                    if ipadd(ip).iptype() == 'PUBLIC' and ip:
                        geo_info = self.geoip.city(ip)
                        if geo_info:
                            self.store_geo_metric(ip, geo_info, log_data)
                        if send_logs:
                            self.store_log_metric(ip, geo_info, log_data)
Exemple #28
0
def logparse(log_path, influxdb_host, influxdb_port, influxdb_database,
             influxdb_user, influxdb_user_pass, influxdb_retention,
             influxdb_shard, geo_measurement, log_measurement, send_nginx_logs,
             geoip_db_path, inode):
    # Preparing variables and params
    ips = {}
    geohash_fields = {}
    geohash_tags = {}
    log_data_fields = {}
    log_data_tags = {}
    nginx_log = {}
    hostname = uname()[1]
    client = InfluxDBClient(host=influxdb_host,
                            port=influxdb_port,
                            username=influxdb_user,
                            password=influxdb_user_pass,
                            database=influxdb_database)

    try:
        logging.debug('Testing InfluxDB connection')
        version = client.request(
            'ping', expected_response_code=204).headers['X-Influxdb-Version']
        logging.debug(f'Influxdb version: {version}')
    except ConnectionError as e:
        logging.critical(
            'Error testing connection to InfluxDB. Please check your url/hostname.\n'
            f'Error: {e}')
        exit(1)

    try:
        databases = [db['name'] for db in client.get_list_database()]
        if influxdb_database in databases:
            logging.debug(f'Found database: {influxdb_database}')
    except InfluxDBClientError as e:
        logging.critical(
            'Error getting database list! Please check your InfluxDB configuration.\n'
            f'Error: {e}')
        exit(1)

    if influxdb_database not in databases:
        logging.info(f'Creating database: {influxdb_database}')
        client.create_database(influxdb_database)

        retention_policies = [
            policy['name'] for policy in client.get_list_retention_policies(
                database=influxdb_database)
        ]
        if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies:
            logging.info(
                f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})'
            )
            client.create_retention_policy(
                name=
                f'{influxdb_database} {influxdb_retention}-{influxdb_shard}',
                duration=influxdb_retention,
                replication='1',
                database=influxdb_database,
                default=True,
                shard_duration=influxdb_shard)

    re_ipv4 = compile(
        r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])',
        IGNORECASE)  # NOQA
    re_ipv6 = compile(
        r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])',
        IGNORECASE)  # NOQA

    gi = Reader(geoip_db_path)

    if send_nginx_logs in ('true', 'True'):
        send_logs = True
    else:
        send_logs = False
        re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
        re_ipv6 = compile(
            r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))'
        )  # NOQA
        logging.info('SEND_NGINX_LOGS set to false')
        pass
    if not regex_tester(log_path, 3):
        if send_logs:
            re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
            re_ipv6 = compile(
                r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))'
            )  # NOQA
            send_logs = False
            logging.warning(
                'NGINX log metrics disabled! Double check your NGINX custom log format..'
            )

    # Main loop to parse access.log file in tailf style with sending metrics.
    with open(log_path, 'r') as log_file:
        logging.info('Starting log parsing')
        str_results = stat(log_path)
        st_size = str_results[6]
        log_file.seek(st_size)
        while True:
            geo_metrics = []
            log_metrics = []
            where = log_file.tell()
            line = log_file.readline()
            inodenew = stat(log_path).st_ino
            if inode != inodenew:
                break
            if not line:
                sleep(1)
                log_file.seek(where)
            else:
                if re_ipv4.match(line):
                    m = re_ipv4.match(line)
                    ip = m.group(1)
                    log = re_ipv4
                elif re_ipv6.match(line):
                    m = re_ipv6.match(line)
                    ip = m.group(1)
                    log = re_ipv6
                else:
                    logging.warning(
                        'Failed to match regex that previously matched!? Skipping this line!\n'
                        'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n'
                        f'Line: {line}')
                    continue
                ip_type = ipadd(ip).iptype()
                if ip_type in monitored_ip_types and ip:
                    info = gi.city(ip)
                    if info:
                        geohash = encode(info.location.latitude,
                                         info.location.longitude)
                        geohash_fields['count'] = 1
                        geohash_tags['geohash'] = geohash
                        geohash_tags['ip'] = ip
                        geohash_tags['host'] = hostname
                        geohash_tags['country_code'] = info.country.iso_code
                        geohash_tags['country_name'] = info.country.name
                        geohash_tags[
                            'state'] = info.subdivisions.most_specific.name if info.subdivisions.most_specific.name else "-"
                        geohash_tags[
                            'state_code'] = info.subdivisions.most_specific.iso_code if info.subdivisions.most_specific.iso_code else "-"
                        geohash_tags[
                            'city'] = info.city.name if info.city.name else "-"
                        geohash_tags[
                            'postal_code'] = info.postal.code if info.postal.code else "-"
                        geohash_tags[
                            'latitude'] = info.location.latitude if info.location.latitude else "-"
                        geohash_tags[
                            'longitude'] = info.location.longitude if info.location.longitude else "-"
                        ips['tags'] = geohash_tags
                        ips['fields'] = geohash_fields
                        ips['measurement'] = geo_measurement
                        geo_metrics.append(ips)
                        logging.debug(f'Geo metrics: {geo_metrics}')
                        try:
                            client.write_points(geo_metrics)
                        except (InfluxDBServerError, ConnectionError) as e:
                            logging.error(
                                'Error writing data to InfluxDB! Check your database!\n'
                                f'Error: {e}')
                else:
                    logging.debug(f"Incorrect IP type: {ip_type}")
                if send_logs:
                    data = search(log, line)
                    if ip_type in monitored_ip_types and ip:
                        info = gi.city(ip)
                        if info:
                            datadict = data.groupdict()
                            log_data_fields['count'] = 1
                            log_data_fields['bytes_sent'] = int(
                                datadict['bytes_sent'])
                            log_data_fields['request_time'] = float(
                                datadict['request_time'])
                            try:
                                log_data_fields['connect_time'] = float(
                                    datadict['connect_time']
                                ) if datadict['connect_time'] != '-' else 0.0
                            except ValueError:
                                log_data_fields['connect_time'] = str(
                                    datadict['connect_time'])
                            log_data_tags['ip'] = datadict['ipaddress']
                            log_data_tags['datetime'] = datetime.strptime(
                                datadict['dateandtime'],
                                '%d/%b/%Y:%H:%M:%S %z')
                            log_data_tags['remote_user'] = datadict[
                                'remote_user']
                            log_data_tags['method'] = datadict['method']
                            log_data_tags['referrer'] = datadict['referrer']
                            log_data_tags['host'] = datadict['host']
                            log_data_tags['http_version'] = datadict[
                                'http_version']
                            log_data_tags['status_code'] = datadict[
                                'status_code']
                            log_data_tags['bytes_sent'] = datadict[
                                'bytes_sent']
                            log_data_tags['url'] = datadict['url']
                            log_data_tags['user_agent'] = datadict[
                                'user_agent']
                            log_data_tags['request_time'] = datadict[
                                'request_time']
                            log_data_tags['connect_time'] = datadict[
                                'connect_time']
                            log_data_tags['city'] = datadict['city']
                            log_data_tags['country_code'] = datadict[
                                'country_code']
                            log_data_tags['country_name'] = info.country.name
                            nginx_log['tags'] = log_data_tags
                            nginx_log['fields'] = log_data_fields
                            nginx_log['measurement'] = log_measurement
                            log_metrics.append(nginx_log)
                            logging.debug(f'NGINX log metrics: {log_metrics}')
                            try:
                                client.write_points(log_metrics)
                            except (InfluxDBServerError, InfluxDBClientError,
                                    ConnectionError) as e:
                                logging.error(
                                    'Error writing data to InfluxDB! Check your database!\n'
                                    f'Error: {e}')
Exemple #29
0
class GeoIP2(GeoIPOO):

    reader = None
    cache = {}

    def __init__(self, path_to_db):

        from geoip2.database import Reader

        try:
            self.reader = Reader(path_to_db)
        except:
            pass

        self.cache = {}

    def data(self, ip):
        try:
            return self.cache[ip]
        except KeyError:
            try:
                response = self.reader.city(ip)
            except:
                return None

            self.cache[ip] = response
            return response

    def country(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.country.iso_code
        except:
            return default

    def country_name(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.country.name
        except:
            return default

    def region_name(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.subdivisions.most_specific.name
        except:
            return default

    def city_name(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.city.name
        except:
            return default

    def postal_code(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.postal.code
        except:
            return default

    def latitude(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.location.latitude
        except:
            return default

    def longitude(self, ip, default=None):
        rsp = self.data(ip)
        if rsp is None:
            return default
        try:
            return rsp.location.longitude
        except:
            return default

    def close(self):
        self.reader.close()