def logparse(LOGPATH, INFLUXHOST, INFLUXPORT, INFLUXDBDB, INFLUXUSER, INFLUXUSERPASS, MEASUREMENT, GEOIPDB, INODE, HOSTNAME): # NOQA # Preparing variables and params IPS = {} COUNT = {} GEOHASH = {} # HOSTNAME = os.uname()[1] CLIENT = InfluxDBClient(host=INFLUXHOST, port=INFLUXPORT, username=INFLUXUSER, password=INFLUXUSERPASS, database=INFLUXDBDB) # NOQA re_IPV4 = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') re_IPV6 = re.compile( '(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA GI = geoip2.database.Reader(GEOIPDB) # Main loop to parse access.log file in tailf style with sending metrcs with open(LOGPATH, "r") as FILE: STR_RESULTS = os.stat(LOGPATH) ST_SIZE = STR_RESULTS[6] FILE.seek(ST_SIZE) while True: METRICS = [] WHERE = FILE.tell() LINE = FILE.readline() INODENEW = os.stat(LOGPATH).st_ino if INODE != INODENEW: break if not LINE: time.sleep(1) FILE.seek(WHERE) else: if re_IPV4.match(LINE): m = re_IPV4.match(LINE) IP = m.group(1) elif re_IPV6.match(LINE): m = re_IPV6.match(LINE) IP = m.group(1) if ipadd(IP).iptype() == 'PUBLIC' and IP: INFO = GI.city(IP) if INFO is not None: HASH = Geohash.encode(INFO.location.latitude, INFO.location.longitude) # NOQA COUNT['count'] = 1 GEOHASH['geohash'] = HASH GEOHASH['host'] = HOSTNAME GEOHASH['country_code'] = INFO.country.iso_code IPS['tags'] = GEOHASH IPS['fields'] = COUNT IPS['measurement'] = MEASUREMENT METRICS.append(IPS) # Sending json data to InfluxDB CLIENT.write_points(METRICS)
def logparse(self): inode = stat(self.log_path).st_ino # Determine whether to use enriched or basic log parsing send_logs = self.send_nginx_logs.lower() == 'true' if not self.regex_tester() and send_logs: send_logs = False logging.warning( 'NGINX log metrics disabled! Double check your NGINX custom log format..' ) if send_logs: re_log_ipv4 = self.RE_LOGIPV4 re_log_ipv6 = self.RE_LOGIPV6 else: re_log_ipv4 = self.RE_IPV4 re_log_ipv6 = self.RE_IPV6 # Main loop to parse access.log file in tailf style with sending metrics. with open(self.log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(self.log_path) st_size = str_results[6] log_file.seek(st_size) # Keep waiting for new logs while True: where = log_file.tell() line = log_file.readline() inodenew = stat(self.log_path).st_ino if inode != inodenew: # File has changed, we need to reload it, exit this parsing loop break if not line: # No new data, wait for a bit sleep(self.POLLING_PERIOD) log_file.seek(where) else: re_match = re_log_ipv4.match(line) if not re_match: re_match = re_log_ipv6.match(line) if not re_match: logging.warning( 'Failed to match regex that previously matched!? Skipping this line!\n' 'Please share the log line below on Discord or Github!\n' f'Line: {line}') continue log_data = re_match.groupdict() ip = log_data.get('ipaddress', re_match.group(1)) if ipadd(ip).iptype() == 'PUBLIC' and ip: geo_info = self.geoip.city(ip) if geo_info: self.store_geo_metric(ip, geo_info, log_data) if send_logs: self.store_log_metric(ip, geo_info, log_data)
def logparse(log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode): # Preparing variables and params ips = {} geohash_fields = {} geohash_tags = {} log_data_fields = {} log_data_tags = {} nginx_log = {} hostname = uname()[1] client = InfluxDBClient(host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database) try: logging.debug('Testing InfluxDB connection') version = client.request( 'ping', expected_response_code=204).headers['X-Influxdb-Version'] logging.debug(f'Influxdb version: {version}') except ConnectionError as e: logging.critical( 'Error testing connection to InfluxDB. Please check your url/hostname.\n' f'Error: {e}') exit(1) try: databases = [db['name'] for db in client.get_list_database()] if influxdb_database in databases: logging.debug(f'Found database: {influxdb_database}') except InfluxDBClientError as e: logging.critical( 'Error getting database list! Please check your InfluxDB configuration.\n' f'Error: {e}') exit(1) if influxdb_database not in databases: logging.info(f'Creating database: {influxdb_database}') client.create_database(influxdb_database) retention_policies = [ policy['name'] for policy in client.get_list_retention_policies( database=influxdb_database) ] if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies: logging.info( f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})' ) client.create_retention_policy( name= f'{influxdb_database} {influxdb_retention}-{influxdb_shard}', duration=influxdb_retention, replication='1', database=influxdb_database, default=True, shard_duration=influxdb_shard) re_ipv4 = compile( r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA re_ipv6 = compile( r'(?P<ipaddress>(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))) - (?P<remote_user>.+) \[(?P<dateandtime>\d{2}\/[A-Z]{1}[a-z]{2}\/\d{4}:\d{2}:\d{2}:\d{2} ((\+|\-)\d{4}))\](["](?P<method>.+)) (?P<referrer>.+) ((?P<http_version>HTTP\/[1-3]\.[0-9])["]) (?P<status_code>\d{3}) (?P<bytes_sent>\d{1,99})(["](?P<url>(\-)|(.+))["]) (?P<host>.+) (["](?P<user_agent>.+)["])(["](?P<request_time>.+)["]) (["](?P<connect_time>.+)["])(["](?P<city>.+)["]) (["](?P<country_code>.+)["])', IGNORECASE) # NOQA gi = Reader(geoip_db_path) if send_nginx_logs in ('true', 'True'): send_logs = True else: send_logs = False re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') re_ipv6 = compile( r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA logging.info('SEND_NGINX_LOGS set to false') pass if not regex_tester(log_path, 3): if send_logs: re_ipv4 = compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') re_ipv6 = compile( r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))' ) # NOQA send_logs = False logging.warning( 'NGINX log metrics disabled! Double check your NGINX custom log format..' ) # Main loop to parse access.log file in tailf style with sending metrics. with open(log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(log_path) st_size = str_results[6] log_file.seek(st_size) while True: geo_metrics = [] log_metrics = [] where = log_file.tell() line = log_file.readline() inodenew = stat(log_path).st_ino if inode != inodenew: break if not line: sleep(1) log_file.seek(where) else: if re_ipv4.match(line): m = re_ipv4.match(line) ip = m.group(1) log = re_ipv4 elif re_ipv6.match(line): m = re_ipv6.match(line) ip = m.group(1) log = re_ipv6 else: logging.warning( 'Failed to match regex that previously matched!? Skipping this line!\n' 'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n' f'Line: {line}') continue ip_type = ipadd(ip).iptype() if ip_type in monitored_ip_types and ip: info = gi.city(ip) if info: geohash = encode(info.location.latitude, info.location.longitude) geohash_fields['count'] = 1 geohash_tags['geohash'] = geohash geohash_tags['ip'] = ip geohash_tags['host'] = hostname geohash_tags['country_code'] = info.country.iso_code geohash_tags['country_name'] = info.country.name geohash_tags[ 'state'] = info.subdivisions.most_specific.name if info.subdivisions.most_specific.name else "-" geohash_tags[ 'state_code'] = info.subdivisions.most_specific.iso_code if info.subdivisions.most_specific.iso_code else "-" geohash_tags[ 'city'] = info.city.name if info.city.name else "-" geohash_tags[ 'postal_code'] = info.postal.code if info.postal.code else "-" geohash_tags[ 'latitude'] = info.location.latitude if info.location.latitude else "-" geohash_tags[ 'longitude'] = info.location.longitude if info.location.longitude else "-" ips['tags'] = geohash_tags ips['fields'] = geohash_fields ips['measurement'] = geo_measurement geo_metrics.append(ips) logging.debug(f'Geo metrics: {geo_metrics}') try: client.write_points(geo_metrics) except (InfluxDBServerError, ConnectionError) as e: logging.error( 'Error writing data to InfluxDB! Check your database!\n' f'Error: {e}') else: logging.debug(f"Incorrect IP type: {ip_type}") if send_logs: data = search(log, line) if ip_type in monitored_ip_types and ip: info = gi.city(ip) if info: datadict = data.groupdict() log_data_fields['count'] = 1 log_data_fields['bytes_sent'] = int( datadict['bytes_sent']) log_data_fields['request_time'] = float( datadict['request_time']) try: log_data_fields['connect_time'] = float( datadict['connect_time'] ) if datadict['connect_time'] != '-' else 0.0 except ValueError: log_data_fields['connect_time'] = str( datadict['connect_time']) log_data_tags['ip'] = datadict['ipaddress'] log_data_tags['datetime'] = datetime.strptime( datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') log_data_tags['remote_user'] = datadict[ 'remote_user'] log_data_tags['method'] = datadict['method'] log_data_tags['referrer'] = datadict['referrer'] log_data_tags['host'] = datadict['host'] log_data_tags['http_version'] = datadict[ 'http_version'] log_data_tags['status_code'] = datadict[ 'status_code'] log_data_tags['bytes_sent'] = datadict[ 'bytes_sent'] log_data_tags['url'] = datadict['url'] log_data_tags['user_agent'] = datadict[ 'user_agent'] log_data_tags['request_time'] = datadict[ 'request_time'] log_data_tags['connect_time'] = datadict[ 'connect_time'] log_data_tags['city'] = datadict['city'] log_data_tags['country_code'] = datadict[ 'country_code'] log_data_tags['country_name'] = info.country.name nginx_log['tags'] = log_data_tags nginx_log['fields'] = log_data_fields nginx_log['measurement'] = log_measurement log_metrics.append(nginx_log) logging.debug(f'NGINX log metrics: {log_metrics}') try: client.write_points(log_metrics) except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: logging.error( 'Error writing data to InfluxDB! Check your database!\n' f'Error: {e}')
def logparse( log_path, influxdb_host, influxdb_port, influxdb_database, influxdb_user, influxdb_user_pass, influxdb_retention, influxdb_shard, geo_measurement, log_measurement, send_nginx_logs, geoip_db_path, inode): # Preparing variables and params ips = {} geohash_fields = {} geohash_tags = {} log_data_fields = {} log_data_tags = {} nginx_log = {} hostname = uname()[1] client = InfluxDBClient( host=influxdb_host, port=influxdb_port, username=influxdb_user, password=influxdb_user_pass, database=influxdb_database) try: logging.debug('Testing InfluxDB connection') version = client.request('ping', expected_response_code=204).headers['X-Influxdb-Version'] logging.debug(f'Influxdb version: {version}') except ConnectionError as e: logging.critical('Error testing connection to InfluxDB. Please check your url/hostname.\n' f'Error: {e}' ) exit(1) try: databases = [db['name'] for db in client.get_list_database()] if influxdb_database in databases: logging.debug(f'Found database: {influxdb_database}') except InfluxDBClientError as e: logging.critical('Error getting database list! Please check your InfluxDB configuration.\n' f'Error: {e}' ) exit(1) if influxdb_database not in databases: logging.info(f'Creating database: {influxdb_database}') client.create_database(influxdb_database) retention_policies = [policy['name'] for policy in client.get_list_retention_policies(database=influxdb_database)] if f'{influxdb_database} {influxdb_retention}-{influxdb_shard}' not in retention_policies: logging.info(f'Creating {influxdb_database} retention policy ({influxdb_retention}-{influxdb_shard})') client.create_retention_policy(name=f'{influxdb_database} {influxdb_retention}-{influxdb_shard}', duration=influxdb_retention, replication='1', database=influxdb_database, default=True, shard_duration=influxdb_shard) re_ipv4 = IPV4_NGINX_LOG_LINE re_ipv6 = IPV6_NGINX_LOG_LINE gi = Reader(geoip_db_path) if send_nginx_logs in ('true', 'True'): send_logs = True else: send_logs = False re_ipv4 = IPV4_REGEX re_ipv6 = IPV6_REGEX logging.info('SEND_NGINX_LOGS set to false') pass # if not regex_tester(log_path,3): # if send_logs: # re_ipv4 = IPV4_REGEX # re_ipv6 = IPV6_REGEX # send_logs = False # logging.warning('NGINX log metrics disabled! Double check your NGINX custom log format..') # Main loop to parse access.log file in tailf style with sending metrics. with open(log_path, 'r') as log_file: logging.info('Starting log parsing') str_results = stat(log_path) st_size = str_results[6] log_file.seek(st_size) while True: geo_metrics = [] log_metrics = [] where = log_file.tell() line = log_file.readline() inodenew = stat(log_path).st_ino if inode != inodenew: break if not line: sleep(1) log_file.seek(where) else: if re_ipv4.match(line): m = re_ipv4.match(line) ip = m.group(1) log = re_ipv4 elif re_ipv6.match(line): m = re_ipv6.match(line) ip = m.group(1) log = re_ipv6 else: logging.warning('Failed to match regex that previously matched!? Skipping this line!\n' 'If you think the regex should have mathed the line, please share the log line below on https://discord.gg/HSPa4cz or Github: https://github.com/gilbN/geoip2influx\n' f'Line: {line}' ) continue if ipadd(ip).iptype() == 'PUBLIC' and ip: info = gi.city(ip) if info is not None: geohash = encode(info.location.latitude, info.location.longitude) geohash_fields['count'] = 1 geohash_tags['geohash'] = geohash geohash_tags['ip'] = ip geohash_tags['host'] = hostname geohash_tags['country_code'] = info.country.iso_code geohash_tags['country_name'] = info.country.name geohash_tags['state'] = info.subdivisions.most_specific.name geohash_tags['state_code'] = info.subdivisions.most_specific.iso_code geohash_tags['city'] = info.city.name geohash_tags['postal_code'] = info.postal.code geohash_tags['latitude'] = info.location.latitude geohash_tags['longitude'] = info.location.longitude ips['tags'] = geohash_tags ips['fields'] = geohash_fields ips['measurement'] = geo_measurement geo_metrics.append(ips) logging.debug(f'Geo metrics: {geo_metrics}') try: client.write_points(geo_metrics) except (InfluxDBServerError, ConnectionError) as e: logging.error('Error writing data to InfluxDB! Check your database!\n' f'Error: {e}' ) if send_logs: data = search(log, line) if ipadd(ip).iptype() == 'PUBLIC' and ip: info = gi.city(ip) if info is not None: datadict = data.groupdict() log_data_fields['count'] = 1 log_data_fields['bytes_sent'] = int(datadict['bytes_sent']) log_data_fields['request_time'] = float(datadict['request_time']) if datadict['connect_time'] == '-': log_data_fields['connect_time'] = 0.0 else: log_data_fields['connect_time'] = float(datadict['connect_time']) log_data_tags['ip'] = datadict['ipaddress'] log_data_tags['datetime'] = datetime.strptime(datadict['dateandtime'], '%d/%b/%Y:%H:%M:%S %z') log_data_tags['remote_user'] = datadict['remote_user'] log_data_tags['method'] = datadict['method'] log_data_tags['referrer'] = datadict['referrer'] log_data_tags['host'] = datadict['host'] log_data_tags['http_version'] = datadict['http_version'] log_data_tags['status_code'] = datadict['status_code'] log_data_tags['bytes_sent'] = datadict['bytes_sent'] log_data_tags['url'] = datadict['url'] log_data_tags['user_agent'] = datadict['user_agent'] log_data_tags['request_time'] = datadict['request_time'] log_data_tags['connect_time'] = datadict['connect_time'] log_data_tags['city'] = datadict['city'] log_data_tags['country_code'] = datadict['country_code'] log_data_tags['country_name'] = info.country.name nginx_log['tags'] = log_data_tags nginx_log['fields'] = log_data_fields nginx_log['measurement'] = log_measurement log_metrics.append(nginx_log) logging.debug(f'NGINX log metrics: {log_metrics}') try: client.write_points(log_metrics) except (InfluxDBServerError, InfluxDBClientError, ConnectionError) as e: logging.error('Error writing data to InfluxDB! Check your database!\n' f'Error: {e}' )