def test_lightweight_parse_includes(self): # simple cfg = NginxConfigParser(simple_config) files, directories = cfg.get_structure() assert_that( files.keys(), contains_inanyorder( '/amplify/test/fixtures/nginx/simple/conf.d/something.conf', '/amplify/test/fixtures/nginx/simple/mime.types', '/amplify/test/fixtures/nginx/simple/nginx.conf')) assert_that( directories.keys(), contains_inanyorder('/amplify/test/fixtures/nginx/simple/', '/amplify/test/fixtures/nginx/simple/conf.d/')) # includes cfg = NginxConfigParser(includes_config) files, directories = cfg.get_structure() assert_that( files.keys(), contains_inanyorder( '/amplify/test/fixtures/nginx/includes/conf.d/something.conf', '/amplify/test/fixtures/nginx/includes/mime.types', '/amplify/test/fixtures/nginx/includes/conf.d/additional.conf', '/amplify/test/fixtures/nginx/includes/conf.d/include.conf', '/amplify/test/fixtures/nginx/includes/nginx.conf')) assert_that( directories.keys(), contains_inanyorder( '/amplify/test/fixtures/nginx/includes/', '/amplify/test/fixtures/nginx/includes/conf.d/'))
def test_lightweight_parse_includes(self): # simple cfg = NginxConfigParser(simple_config) files, directories = cfg.get_structure() assert_that(files.keys(), equal_to([ '/amplify/test/fixtures/nginx/simple/conf.d/something.conf', '/amplify/test/fixtures/nginx/simple/mime.types', '/amplify/test/fixtures/nginx/simple/nginx.conf' ])) assert_that(directories.keys(), equal_to([ '/amplify/test/fixtures/nginx/simple/', '/amplify/test/fixtures/nginx/simple/conf.d/' ])) # includes cfg = NginxConfigParser(includes_config) files, directories = cfg.get_structure() assert_that(files.keys(), equal_to([ '/amplify/test/fixtures/nginx/includes/conf.d/something.conf', '/amplify/test/fixtures/nginx/includes/mime.types', '/amplify/test/fixtures/nginx/includes/conf.d/additional.conf', '/amplify/test/fixtures/nginx/includes/conf.d/include.conf', '/amplify/test/fixtures/nginx/includes/nginx.conf' ])) assert_that(directories.keys(), equal_to([ '/amplify/test/fixtures/nginx/includes/', '/amplify/test/fixtures/nginx/includes/conf.d/' ]))
def test_lightweight_parse_includes_permissions(self): """ Checks that we get file permissions during lightweight parsing """ cfg = NginxConfigParser(simple_config) files, directories = cfg.get_structure() test_file = '/amplify/test/fixtures/nginx/simple/conf.d/something.conf' size = os.path.getsize(test_file) mtime = int(os.path.getmtime(test_file)) permissions = oct(os.stat(test_file).st_mode & 0777) assert_that( files[test_file], equal_to({ 'size': size, 'mtime': mtime, 'permissions': permissions })) test_directory = '/amplify/test/fixtures/nginx/simple/conf.d/' size = os.path.getsize(test_directory) mtime = int(os.path.getmtime(test_directory)) permissions = oct(os.stat(test_directory).st_mode & 0777) assert_that( directories[test_directory], equal_to({ 'size': size, 'mtime': mtime, 'permissions': permissions }))
def test_lightweight_parse_includes_permissions(self): """ Checks that we get file permissions during lightweight parsing """ cfg = NginxConfigParser(simple_config) files, directories = cfg.get_structure() test_file = '/amplify/test/fixtures/nginx/simple/conf.d/something.conf' size = os.path.getsize(test_file) mtime = int(os.path.getmtime(test_file)) permissions = oct(os.stat(test_file).st_mode & 0777) assert_that( files[test_file], equal_to({'size': size, 'mtime': mtime, 'permissions': permissions}) ) test_directory = '/amplify/test/fixtures/nginx/simple/conf.d/' size = os.path.getsize(test_directory) mtime = int(os.path.getmtime(test_directory)) permissions = oct(os.stat(test_directory).st_mode & 0777) assert_that( directories[test_directory], equal_to({'size': size, 'mtime': mtime, 'permissions': permissions}) )
class NginxConfig(object): """ Nginx config representation **for a running NGINX instance** Main tasks: - find all log formats - find all access logs - find all error logs - find stub_status url """ def __init__(self, filename, binary=None, prefix=None): self.filename = filename self.binary = binary self.prefix = prefix self.log_formats = {} self.access_logs = {} self.error_logs = {} self.test_errors = [] self.tree = {} self.files = {} self.directories = {} self.directory_map = {} self.subtree = [] self.ssl_certificates = {} self.parser_ssl_certificates = [] self.parser_errors = [] self.stub_status_urls = [] self.plus_status_external_urls = [] self.plus_status_internal_urls = [] self.api_external_urls = [] self.api_internal_urls = [] self.parser = None self.wait_until = 0 def _setup_parser(self): self.parser = NginxConfigParser(filename=self.filename) def _teardown_parser(self): self.parser = None def full_parse(self, include_ssl_certs=True): context.log.debug('parsing full tree of %s' % self.filename) # parse raw data try: self._setup_parser() self.parser.parse(include_ssl_certs=include_ssl_certs) self._handle_parse() except Exception as e: context.log.error('failed to parse config at %s (due to %s)' % (self.filename, e.__class__.__name__)) context.log.debug('additional info:', exc_info=True) self._setup_parser( ) # Re-init parser to discard partial data (if any) # Post-handling # try to add logs from nginx -V configure options self.add_configured_variable_logs() # try to locate and use default logs (PREFIX/logs/*) self.add_default_logs() # Go through log files and apply exclude rules (log files are added during .__colect_data() self._exclude_logs() # try to read from each log file to check if it can be parsed self._check_logs() # dump access log files, access log formats, and error log files to the debug log context.log.debug( 'parsed log formats, access logs, and error logs:' + '\nlog formats: ' + json.dumps(self.log_formats, indent=4, sort_keys=True) + '\naccess logs: ' + json.dumps(self.access_logs, indent=4, sort_keys=True) + '\nerror logs: ' + json.dumps(self.error_logs, indent=4, sort_keys=True)) def _handle_parse(self): self.tree = self.parser.tree self.files = self.parser.files self.directories = self.parser.directories self.directory_map = self.parser.directory_map self.subtree = self.parser.simplify() self.ssl_certificates = {} # gets populated in run_ssl_analysis() self.parser_ssl_certificates = self.parser.ssl_certificates self.parser_errors = self.parser.errors # now that we have all the things we need from parser, we can tear it down self._teardown_parser() # clear url values in the config that can/will be used to find metrics # do this now because self._collect_data() will repopulate the lists self.stub_status_urls = [] self.plus_status_external_urls = [] self.plus_status_internal_urls = [] self.api_external_urls = [] self.api_internal_urls = [] # go through and collect all logical data self._collect_data(self.subtree) def collect_structure(self, include_ssl_certs=False): """ Goes through all files (light-parsed includes) and collects their mtime :param include_ssl_certs: bool - include ssl certs or not :return: {} - dict of files """ # if self.parser is None, set it up if self.parser is None: self._setup_parser() files, directories = self.parser.get_structure( include_ssl_certs=include_ssl_certs) context.log.debug('found %s files for %s' % (len(files.keys()), self.filename)) context.log.debug('found %s directories for %s' % (len(directories.keys()), self.filename)) # always teardown the parser self._teardown_parser() return files, directories def total_size(self): """ Returns the total size of a config tree :return: int size in bytes """ return sum(data['size'] for data in self.files.itervalues()) def _collect_data(self, block, ctx=None): """ Searches needed data in config's tree :param block: list of statement dicts to parse :param ctx: dict with context """ ctx = ctx if ctx is not None else {} def usable_log_args(args): is_disabled = not args or args[0] == 'off' uses_variable = any('$' in arg for arg in args if not arg.startswith('if=')) return not is_disabled and not uses_variable for stmt in block: directive = stmt['directive'] args = stmt['args'] if directive == 'error_log' and usable_log_args(args): path = args[0].replace('"', '').replace("'", '') # if not syslog, assume it is a file...if not starts with '/' assume relative path if not path.startswith('syslog') and not path.startswith('/'): path = os.path.join(self.prefix, path) if path not in self.error_logs: if len(args) > 1 and args[1] in ERROR_LOG_LEVELS: self.error_logs[path] = {'log_level': args[1]} else: self.error_logs[path] = { 'log_level': 'error' } # nginx default log level elif directive == 'access_log' and usable_log_args(args): path = args[0].replace('"', '').replace("'", '') # if not syslog, assume it is a file...if not starts with '/' assume relative path if not path.startswith('syslog') and not path.startswith('/'): path = os.path.join(self.prefix, path) format = args[1] if len(args) > 1 else None self.access_logs[path] = {'log_format': format} elif directive == 'log_format': name, strings = args[0], args[1:] # disregard the (optional) escape parameter if len(strings) > 1 and strings[0].startswith('escape='): strings.pop(0) self.log_formats[name] = ''.join( x.encode('utf-8').decode('string_escape') for x in strings) elif directive == 'server' and 'upstream' not in ctx: listens = [] for inner_stmt in stmt['block']: if inner_stmt['directive'] == 'listen': listens.append(inner_stmt['args'][0]) if not listens: listens += ['80', '8000'] ip_port = [] for listen in listens: try: ip_port.append(self._parse_listen(listen)) except: context.log.error( 'failed to parse bad ipv6 listen directive: %s' % listen) context.log.debug('additional info:', exc_info=True) server_ctx = dict(ctx, ip_port=ip_port) for inner_stmt in stmt['block']: if inner_stmt['directive'] == 'server_name': server_ctx['server_name'] = inner_stmt['args'][0] break for inner_stmt in stmt['block']: if inner_stmt['directive'] == 'listen': server_ctx[ 'server_schema'] = 'https' if 'ssl' in inner_stmt[ 'args'] else 'http' break self._collect_data(stmt['block'], ctx=server_ctx) elif directive == 'upstream': upstream = args[0] upstream_ctx = dict(ctx, upstream=upstream) self._collect_data(stmt['block'], ctx=upstream_ctx) elif directive == 'location': location = ' '.join(map(_enquote, args)) location_ctx = dict(ctx, location=location) self._collect_data(stmt['block'], ctx=location_ctx) elif directive == 'stub_status' and 'ip_port' in ctx: for url in self._status_url(ctx): if url not in self.stub_status_urls: self.stub_status_urls.append(url) elif (directive == 'status' or self._is_plus_dashboard(stmt, ctx)) and 'ip_port' in ctx: # use different url builders for external and internal urls for url in self._status_url(ctx, server_preferred=True): if url not in self.plus_status_external_urls: self.plus_status_external_urls.append(url) # for internal (agent) usage local ip address is a better choice, # because the external url might not be accessible from a host for url in self._status_url(ctx, server_preferred=False): if url not in self.plus_status_internal_urls: self.plus_status_internal_urls.append(url) elif directive == 'api' and 'ip_port' in ctx: # use different url builders for external and internal urls for url in self._status_url(ctx, server_preferred=True): if url not in self.api_external_urls: self.api_external_urls.append(url) # for internal (agent) usage local ip address is a better choice, # because the external url might not be accessible from a host for url in self._status_url(ctx, server_preferred=False): if url not in self.api_internal_urls: self.api_internal_urls.append(url) elif 'block' in stmt: self._collect_data(stmt['block'], ctx=ctx) @staticmethod def _is_plus_dashboard(stmt, ctx): """ Now that the `status` directive is deprecated this method is used to determine plus dashboard urls. It does so by checking to see if the config follows the conventional pattern for including the plus dashboard: location = /dashboard.html { root /usr/share/nginx/html; } Obviously this is not perfect, but it's the best we can do now that the `status` directive is gone. """ correct_directive = stmt['directive'] == 'root' correct_arguments = stmt['args'] == ['/usr/share/nginx/html'] correct_location = ctx.get('location', '/').endswith('dashboard.html') return correct_directive and correct_arguments and correct_location @staticmethod def _status_url(ctx, server_preferred=False): """ Creates stub/plus status url based on context :param ctx: {} of current parsing context :param server_preferred: bool - use server_name instead of listen :return: [] of urls """ location = ctx.get('location', '/') # remove all modifiers location_parts = location.split(' ') final_location_part = location_parts[-1] # generate a random sting that will fit regex location if location.startswith('~'): try: exact_location = rstr.xeger(final_location_part) # check that regex location has / and add it if not exact_location.startswith('/'): exact_location = '/%s' % exact_location except: context.log.debug('bad regex location: %s' % final_location_part) exact_location = None else: exact_location = final_location_part # if an exact location doesn't have / that's not a working location, we should not use it if not exact_location.startswith('/'): context.log.debug('bad exact location: %s' % final_location_part) exact_location = None if exact_location: for ip_port in ctx.get('ip_port'): address, port = ip_port if server_preferred and 'server_name' in ctx: address = ctx['server_name'] schema = 'http' if 'server_schema' in ctx: schema = ctx['server_schema'] yield '%s://%s:%s%s' % (schema, address, port, exact_location) def run_test(self): """ Tests the configuration using nginx -t Saves event info if syntax check was not successful """ start_time = time.time() context.log.info('running %s -t -c %s' % (self.binary, self.filename)) if self.binary: try: _, nginx_t_err = subp.call("sudo %s -t -c %s" % (self.binary, self.filename), check=False) for line in nginx_t_err: if 'syntax is' in line and 'syntax is ok' not in line: self.test_errors.append(line) except Exception as e: exception_name = e.__class__.__name__ context.log.error('failed to %s -t -c %s due to %s' % (self.binary, self.filename, exception_name)) context.log.debug('additional info:', exc_info=True) end_time = time.time() return end_time - start_time def checksum(self): """ Calculates total checksum of all config files, certificates and permissions :return: str checksum """ checksums = [] for file_path, file_data in self.files.iteritems(): checksums.append( hashlib.sha256(open(file_path).read()).hexdigest()) checksums.append(file_data['permissions']) checksums.append(str(file_data['mtime'])) for dir_data in self.directories.itervalues(): checksums.append(dir_data['permissions']) checksums.append(str(dir_data['mtime'])) for cert in self.ssl_certificates.iterkeys(): checksums.append(hashlib.sha256(open(cert).read()).hexdigest()) return hashlib.sha256('.'.join(checksums)).hexdigest() def _parse_listen(self, listen): """ Parses listen directive value and return ip:port string, like *:80 and so on :param listen: str raw listen :return: str ip:port """ if '[' in listen: # ipv6 parts = filter(len, listen.rsplit(']', 1)) address = '%s]' % parts[0] port = '80' if len(parts) == 1 else parts[1].split(':')[1] else: # ipv4 parts = filter(len, listen.rsplit(':', 1)) if len(parts) == 1 and parts[0].isdigit(): address, port = '*', parts[0] elif len(parts) == 1: address, port = parts[0], '80' else: address, port = parts # standardize address if address in ('*', '0.0.0.0'): address = '127.0.0.1' elif address == '[::]': address = '[::1]' return address, port def add_configured_variable_logs(self): """ Get logs configured through nginx -V options and try to find access and error logs This happens only if nginx access and error logs are not configured in nginx.conf """ if self.binary is not None and (len(self.access_logs) < 1 or len(self.error_logs) < 1): try: v_options = nginx_v(self.binary) configure = v_options['configure'] # adding access or error logs from options only if they are empty if len(self.access_logs) < 1: access_log_path = configure.get('http-log-path') if os.path.isfile( access_log_path) and access_log_path is not None: self.access_logs[access_log_path] = { 'log_format': None } if len(self.error_logs) < 1: error_log_path = configure.get('error-log-path') if os.path.isfile( error_log_path) and error_log_path is not None: self.error_logs[error_log_path] = { 'log_level': 'error' } except Exception as e: exception_name = e.__class__.__name__ context.log.error( 'failed to get configured variables from %s -V due to %s' % (self.binary, exception_name)) context.log.debug('additional info:', exc_info=True) def add_default_logs(self): """ By default nginx uses logs placed in --prefix/logs/ directory This method tries to find and add them """ access_log_path = '%s/logs/access.log' % self.prefix if os.path.isfile( access_log_path) and access_log_path not in self.access_logs: self.access_logs[access_log_path] = {'log_format': None} error_log_path = '%s/logs/error.log' % self.prefix if os.path.isfile( error_log_path) and error_log_path not in self.error_logs: self.error_logs[error_log_path] = {'log_level': 'error'} def run_ssl_analysis(self): """ Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with ssl_certificate value paired with results fo ssl_analysis. :return: float run time """ if not self.parser_ssl_certificates: return start_time = time.time() for cert_filename in set(self.parser_ssl_certificates): ssl_analysis_result = ssl_analysis(cert_filename) if ssl_analysis_result: self.ssl_certificates[cert_filename] = ssl_analysis_result end_time = time.time() return end_time - start_time def _exclude_logs(self): """ Iterate through log file stores and remove ones that match exclude rules. """ # Take comma-separated string of pathname patterns and separate them into individual patterns exclude_rules = context.app_config.get('nginx', {}).get('exclude_logs', '').split(',') for rule in [ x for x in exclude_rules if x ]: # skip potentially empty rules due to improper formatting # access logs for excluded_file in glib(self.access_logs.keys(), rule): del self.access_logs[excluded_file] # error logs for excluded_file in glib(self.error_logs.keys(), rule): del self.error_logs[excluded_file] def _check_logs(self): """ Iterate through log file stores and add permissions and if it is readable to the log data """ for logs in (self.access_logs, self.error_logs): for log_name in filter(lambda name: not name.startswith('syslog'), logs): info = get_filesystem_info(log_name) logs[log_name]['permissions'] = info['permissions'] try: with open(log_name, 'r'): pass except: logs[log_name]['readable'] = False else: logs[log_name]['readable'] = True
class NginxConfig(object): """ Nginx config representation Parses configs with all includes, etc Main tasks: - find all log formats - find all access logs - find all error logs - find stub_status url """ def __init__(self, filename, binary=None, prefix=None): self.filename = filename self.binary = binary self.prefix = prefix self.log_formats = {} self.access_logs = {} self.error_logs = {} self.test_errors = [] self.tree = {} self.files = {} self.directories = {} self.directory_map = {} self.index = [] self.subtree = {} self.ssl_certificates = {} self.parser_ssl_certificates = [] self.parser_errors = [] self.stub_status_urls = [] self.plus_status_external_urls = [] self.plus_status_internal_urls = [] self.api_external_urls = [] self.api_internal_urls = [] self.parser = None self.wait_until = 0 def _setup_parser(self): self.parser = NginxConfigParser(filename=self.filename) def _teardown_parser(self): self.parser = None def full_parse(self): context.log.debug('parsing full tree of %s' % self.filename) # parse raw data try: self._setup_parser() self.parser.parse() self._handle_parse() except Exception as e: context.log.error('failed to parse config at %s (due to %s)' % (self.filename, e.__class__.__name__)) context.log.debug('additional info:', exc_info=True) self._setup_parser() # Re-init parser to discard partial data (if any) # Post-handling # try to locate and use default logs (PREFIX/logs/*) self.add_default_logs() # Go through log files and apply exclude rules (log files are added during .__colect_data() self._exclude_logs() # try to read from each log file to check if it can be parsed self._check_logs() # dump access log files, access log formats, and error log files to the debug log context.log.debug( 'parsed log formats, access logs, and error logs:' + \ '\nlog formats: ' + json.dumps(self.log_formats, indent=4, sort_keys=True) + \ '\naccess logs: ' + json.dumps(self.access_logs, indent=4, sort_keys=True) + \ '\nerror logs: ' + json.dumps(self.error_logs, indent=4, sort_keys=True) ) def _handle_parse(self): self.tree = self.parser.tree self.files = self.parser.files self.directories = self.parser.directories self.directory_map = self.parser.directory_map self.index = self.parser.index self.subtree = self.parser.simplify() self.parser_ssl_certificates = self.parser.ssl_certificates self.parser_errors = self.parser.errors # now that we have all the things we need from parser, we can tear it down self._teardown_parser() # go through and collect all logical data self.__collect_data(subtree=self.subtree) def collect_structure(self, include_ssl_certs=False): """ Goes through all files (light-parsed includes) and collects their mtime :param include_ssl_certs: bool - include ssl certs or not :return: {} - dict of files """ # if self.parser is None, set it up if self.parser is None: self._setup_parser() files, directories = self.parser.get_structure(include_ssl_certs=include_ssl_certs) context.log.debug('found %s files for %s' % (len(files.keys()), self.filename)) context.log.debug('found %s directories for %s' % (len(directories.keys()), self.filename)) # always teardown the parser self._teardown_parser() return files, directories def total_size(self): """ Returns the total size of a config tree :return: int size in bytes """ return sum(data['size'] for data in self.files.itervalues()) def __collect_data(self, subtree=None, ctx=None): """ Searches needed data in config's tree :param subtree: dict with tree to parse :param ctx: dict with context """ ctx = ctx if ctx is not None else {} subtree = subtree if subtree is not None else {} for key, value in subtree.iteritems(): if key == 'error_log': error_logs = value if isinstance(value, list) else [value] for er_log_definition in error_logs: if er_log_definition == 'off': continue split_er_log_definition = er_log_definition.split(' ') log_name = split_er_log_definition[0] log_level = split_er_log_definition[-1] \ if split_er_log_definition[-1] in ERROR_LOG_LEVELS else 'error' # nginx default log level log_name = re.sub('[\'"]', '', log_name) # remove all ' and " # if not syslog, assume it is a file...if not starts with '/' assume relative path if not log_name.startswith('syslog') and not log_name.startswith('/'): log_name = '%s/%s' % (self.prefix, log_name) if log_name not in self.error_logs: self.error_logs[log_name] = {'log_level': log_level} elif key == 'access_log': access_logs = value if isinstance(value, list) else [value] for ac_log_definition in access_logs: if ac_log_definition == 'off': continue parts = filter(len, ac_log_definition.split(' ')) log_format = None if len(parts) == 1 else parts[1] log_name = parts[0] log_name = re.sub('[\'"]', '', log_name) # remove all ' and " # if not syslog, assume it is a file...if not starts with '/' assume relative path if not log_name.startswith('syslog') and not log_name.startswith('/'): log_name = '%s/%s' % (self.prefix, log_name) self.access_logs[log_name] = {'log_format': log_format} elif key == 'log_format': for k, v in value.iteritems(): self.log_formats[k] = v elif key == 'server' and isinstance(value, list) and 'upstream' not in ctx: for server in value: listen = server.get('listen') if listen is None: listen = ['80', '8000'] elif not isinstance(listen, list): listen = [listen] ip_port = [] for item in listen: listen_first_part = item.split(' ')[0] try: addr, port = self.__parse_listen(listen_first_part) if addr in ('*', '0.0.0.0'): addr = '127.0.0.1' elif addr == '[::]': addr = '[::1]' ip_port.append((addr, port)) except: context.log.error('failed to parse bad ipv6 listen directive: %s' % listen_first_part) context.log.debug('additional info:', exc_info=True) server_ctx = dict(ctx, ip_port=ip_port) if 'server_name' in server: server_ctx['server_name'] = server.get('server_name') self.__collect_data(subtree=server, ctx=server_ctx) elif key == 'upstream': for upstream, upstream_info in value.iteritems(): upstream_ctx = dict(ctx, upstream=upstream) self.__collect_data(subtree=upstream_info, ctx=upstream_ctx) elif key == 'location': for location, location_info in value.iteritems(): location_ctx = dict(ctx, location=location) self.__collect_data(subtree=location_info, ctx=location_ctx) elif key == 'stub_status' and ctx and 'ip_port' in ctx: for url in self.__status_url(ctx): if url not in self.stub_status_urls: self.stub_status_urls.append(url) elif key == 'status' and ctx and 'ip_port' in ctx: # use different url builders for external and internal urls for url in self.__status_url(ctx, server_preferred=True): if url not in self.plus_status_external_urls: self.plus_status_external_urls.append(url) # for internal (agent) usage local ip address is a better choice, # because the external url might not be accessible from a host for url in self.__status_url(ctx, server_preferred=False): if url not in self.plus_status_internal_urls: self.plus_status_internal_urls.append(url) elif key == 'api' and ctx and 'ip_port' in ctx: # use different url builders for external and internal urls for url in self.__status_url(ctx, server_preferred=True): if url not in self.plus_status_external_urls: self.api_external_urls.append(url) # for internal (agent) usage local ip address is a better choice, # because the external url might not be accessible from a host for url in self.__status_url(ctx, server_preferred=False): if url not in self.plus_status_internal_urls: self.api_internal_urls.append(url) elif isinstance(value, dict): self.__collect_data(subtree=value, ctx=ctx) elif isinstance(value, list): for next_subtree in value: if isinstance(next_subtree, dict): self.__collect_data(subtree=next_subtree, ctx=ctx) @staticmethod def __status_url(ctx, server_preferred=False): """ Creates stub/plus status url based on context :param ctx: {} of current parsing context :param server_preferred: bool - use server_name instead of listen :return: [] of urls """ results = [] location = ctx.get('location', '/') # remove all modifiers location_parts = location.split(' ') final_location_part = location_parts[-1] # generate a random sting that will fit regex location if location.startswith('~'): try: exact_location = rstr.xeger(final_location_part) # check that regex location has / and add it if not exact_location.startswith('/'): exact_location = '/%s' % exact_location except: context.log.debug('bad regex location: %s' % final_location_part) exact_location = None else: exact_location = final_location_part # if an exact location doesn't have / that's not a working location, we should not use it if not exact_location.startswith('/'): context.log.debug('bad exact location: %s' % final_location_part) exact_location = None if exact_location: for ip_port in ctx.get('ip_port'): address, port = ip_port if server_preferred and 'server_name' in ctx: if isinstance(ctx['server_name'], list): address = ctx['server_name'][0].split(' ')[0] else: address = ctx['server_name'].split(' ')[0] results.append('%s:%s%s' % (address, port, exact_location)) return results def run_test(self): """ Tests the configuration using nginx -t Saves event info if syntax check was not successful """ start_time = time.time() context.log.info('running %s -t -c %s' % (self.binary, self.filename)) if self.binary: try: _, nginx_t_err = subp.call("%s -t -c %s" % (self.binary, self.filename), check=False) for line in nginx_t_err: if 'syntax is' in line and 'syntax is ok' not in line: self.test_errors.append(line) except Exception as e: exception_name = e.__class__.__name__ context.log.error('failed to %s -t -c %s due to %s' % (self.binary, self.filename, exception_name)) context.log.debug('additional info:', exc_info=True) end_time = time.time() return end_time - start_time def checksum(self): """ Calculates total checksum of all config files, certificates and permissions :return: str checksum """ checksums = [] for file_path, file_data in self.files.iteritems(): checksums.append(hashlib.sha256(open(file_path).read()).hexdigest()) checksums.append(file_data['permissions']) checksums.append(str(file_data['mtime'])) for dir_data in self.directories.itervalues(): checksums.append(dir_data['permissions']) checksums.append(str(dir_data['mtime'])) for cert in self.ssl_certificates.iterkeys(): checksums.append(hashlib.sha256(open(cert).read()).hexdigest()) return hashlib.sha256('.'.join(checksums)).hexdigest() def __parse_listen(self, listen): """ Parses listen directive value and return ip:port string, like *:80 and so on :param listen: str raw listen :return: str ip:port """ if '[' in listen: # ipv6 addr_port_parts = filter(len, listen.rsplit(']', 1)) address = '%s]' % addr_port_parts[0] if len(addr_port_parts) == 1: # only address specified, add default 80 return address, '80' else: # get port bracket, port = addr_port_parts[1].split(':') return address, port else: # ipv4 addr_port_parts = filter(len, listen.rsplit(':', 1)) if len(addr_port_parts) == 1: # can be address or port only is_port = addr_port_parts[0].isdigit() if is_port: # port! port = addr_port_parts[0] return '*', port else: # it was address only, add default 80 address = addr_port_parts[0] return address, '80' else: address, port = addr_port_parts return address, port def add_default_logs(self): """ By default nginx uses logs placed in --prefix/logs/ directory This method tries to find and add them """ access_log_path = '%s/logs/access.log' % self.prefix if os.path.isfile(access_log_path) and access_log_path not in self.access_logs: self.access_logs[access_log_path] = {'log_format': None} error_log_path = '%s/logs/error.log' % self.prefix if os.path.isfile(error_log_path) and error_log_path not in self.error_logs: self.error_logs[error_log_path] = {'log_level': 'error'} def run_ssl_analysis(self): """ Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with ssl_certificate value paired with results fo ssl_analysis. :return: float run time """ if not self.parser_ssl_certificates: return start_time = time.time() for cert_filename in set(self.parser_ssl_certificates): ssl_analysis_result = ssl_analysis(cert_filename) if ssl_analysis_result: self.ssl_certificates[cert_filename] = ssl_analysis_result end_time = time.time() return end_time - start_time def _exclude_logs(self): """ Iterate through log file stores and remove ones that match exclude rules. """ # Take comma-separated string of pathname patterns and separate them into individual patterns exclude_rules = context.app_config.get('nginx', {}).get('exclude_logs', '').split(',') for rule in [x for x in exclude_rules if x]: # skip potentially empty rules due to improper formatting # access logs for excluded_file in glib(self.access_logs.keys(), rule): del self.access_logs[excluded_file] # error logs for excluded_file in glib(self.error_logs.keys(), rule): del self.error_logs[excluded_file] def _check_logs(self): """ Iterate through log file stores and add permissions and if it is readable to the log data """ for logs in (self.access_logs, self.error_logs): for log_name in filter(lambda name: not name.startswith('syslog'), logs): info = get_filesystem_info(log_name) logs[log_name]['permissions'] = info['permissions'] try: with open(log_name, 'r'): pass except: logs[log_name]['readable'] = False else: logs[log_name]['readable'] = True
class NginxConfig(object): """ Nginx config representation Parses configs with all includes, etc Main tasks: - find all log formats - find all access logs - find all error logs - find stub_status url """ def __init__(self, filename, binary=None, prefix=None): self.filename = filename self.binary = binary self.prefix = prefix self.log_formats = {} self.access_logs = {} self.error_logs = {} self.test_errors = [] self.tree = {} self.files = {} self.directories = {} self.directory_map = {} self.index = [] self.ssl_certificates = {} self.parser_errors = [] self.stub_status_urls = [] self.plus_status_external_urls = [] self.plus_status_internal_urls = [] self.parser = NginxConfigParser(filename) self.wait_until = 0 def full_parse(self): context.log.debug('parsing full tree of %s' % self.filename) # parse raw data try: self.parser.parse() self._handle_parse() except Exception as e: context.log.error('failed to parse config at %s (due to %s)' % (self.filename, e.__class__.__name__)) context.log.debug('additional info:', exc_info=True) self.parser = NginxConfigParser(self.filename) # Re-init parser to discard partial data (if any) # Post-handling # try to locate and use default logs (PREFIX/logs/*) self.add_default_logs() # Go through log files and apply exclude rules (log files are added during .__colect_data() self._exclude_logs() def _handle_parse(self): self.tree = self.parser.tree self.files = self.parser.files self.directories = self.parser.directories self.directory_map = self.parser.directory_map self.index = self.parser.index self.parser_errors = self.parser.errors # go through and collect all logical data self.__collect_data(subtree=self.parser.simplify()) def collect_structure(self, include_ssl_certs=False): """ Goes through all files (light-parsed includes) and collects their mtime :param include_ssl_certs: bool - include ssl certs or not :return: {} - dict of files """ files, directories = self.parser.get_structure(include_ssl_certs=include_ssl_certs) context.log.debug('found %s files for %s' % (len(files.keys()), self.filename)) context.log.debug('found %s directories for %s' % (len(directories.keys()), self.filename)) return files, directories def total_size(self): """ Returns the total size of a config tree :return: int size in bytes """ return sum(file_data['size'] for file_data in self.files.itervalues()) def __collect_data(self, subtree=None, ctx=None): """ Searches needed data in config's tree :param subtree: dict with tree to parse :param ctx: dict with context """ ctx = ctx if ctx is not None else {} subtree = subtree if subtree is not None else {} for key, value in subtree.iteritems(): if key == 'error_log': error_logs = value if isinstance(value, list) else [value] for er_log_definition in error_logs: if er_log_definition == 'off': continue split_er_log_definition = er_log_definition.split(' ') log_name = split_er_log_definition[0] log_level = split_er_log_definition[-1] \ if split_er_log_definition[-1] in ERROR_LOG_LEVELS else 'error' # nginx default log level log_name = re.sub('[\'"]', '', log_name) # remove all ' and " # if not syslog, assume it is a file...if not starts with '/' assume relative path if not log_name.startswith('syslog') and not log_name.startswith('/'): log_name = '%s/%s' % (self.prefix, log_name) if log_name not in self.error_logs: self.error_logs[log_name] = log_level elif key == 'access_log': access_logs = value if isinstance(value, list) else [value] for ac_log_definition in access_logs: if ac_log_definition == 'off': continue parts = filter(len, ac_log_definition.split(' ')) log_format = None if len(parts) == 1 else parts[1] log_name = parts[0] log_name = re.sub('[\'"]', '', log_name) # remove all ' and " # if not syslog, assume it is a file...if not starts with '/' assume relative path if not log_name.startswith('syslog') and not log_name.startswith('/'): log_name = '%s/%s' % (self.prefix, log_name) self.access_logs[log_name] = log_format elif key == 'log_format': for k, v in value.iteritems(): self.log_formats[k] = v elif key == 'server' and isinstance(value, list) and 'upstream' not in ctx: for server in value: current_ctx = copy.copy(ctx) if server.get('listen') is None: # if no listens specified, then use default *:80 and *:8000 listen = ['80', '8000'] else: listen = server.get('listen') listen = listen if isinstance(listen, list) else [listen] ctx['ip_port'] = [] for item in listen: listen_first_part = item.split(' ')[0] try: addr, port = self.__parse_listen(listen_first_part) if addr in ('*', '0.0.0.0'): addr = '127.0.0.1' elif addr == '[::]': addr = '[::1]' ctx['ip_port'].append((addr, port)) except Exception as e: context.log.error('failed to parse bad ipv6 listen directive: %s' % listen_first_part) context.log.debug('additional info:', exc_info=True) if 'server_name' in server: ctx['server_name'] = server.get('server_name') self.__collect_data(subtree=server, ctx=ctx) ctx = current_ctx elif key == 'upstream': for upstream, upstream_info in value.iteritems(): current_ctx = copy.copy(ctx) ctx['upstream'] = upstream self.__collect_data(subtree=upstream_info, ctx=ctx) ctx = current_ctx elif key == 'location': for location, location_info in value.iteritems(): current_ctx = copy.copy(ctx) ctx['location'] = location self.__collect_data(subtree=location_info, ctx=ctx) ctx = current_ctx elif key == 'stub_status' and ctx and 'ip_port' in ctx: for url in self.__status_url(ctx): if url not in self.stub_status_urls: self.stub_status_urls.append(url) elif key == 'status' and ctx and 'ip_port' in ctx: # use different url builders for external and internal urls for url in self.__status_url(ctx, server_preferred=True): if url not in self.plus_status_external_urls: self.plus_status_external_urls.append(url) # for internal (agent) usage local ip address is a better choice, # because the external url might not be accessible from a host for url in self.__status_url(ctx, server_preferred=False): if url not in self.plus_status_internal_urls: self.plus_status_internal_urls.append(url) elif isinstance(value, dict): self.__collect_data(subtree=value, ctx=ctx) elif isinstance(value, list): for next_subtree in value: if isinstance(next_subtree, dict): self.__collect_data(subtree=next_subtree, ctx=ctx) @staticmethod def __status_url(ctx, server_preferred=False): """ Creates stub/plus status url based on context :param ctx: {} of current parsing context :param server_preferred: bool - use server_name instead of listen :return: [] of urls """ results = [] location = ctx.get('location', '/') # remove all modifiers location_parts = location.split(' ') final_location_part = location_parts[-1] # generate a random sting that will fit regex location if location.startswith('~'): try: exact_location = rstr.xeger(final_location_part) # check that regex location has / and add it if not exact_location.startswith('/'): exact_location = '/%s' % exact_location except: context.log.debug('bad regex location: %s' % final_location_part) exact_location = None else: exact_location = final_location_part # if an exact location doesn't have / that's not a working location, we should not use it if not exact_location.startswith('/'): context.log.debug('bad exact location: %s' % final_location_part) exact_location = None if exact_location: for ip_port in ctx.get('ip_port'): address, port = ip_port if server_preferred and 'server_name' in ctx: if isinstance(ctx['server_name'], list): address = ctx['server_name'][0].split(' ')[0] else: address = ctx['server_name'].split(' ')[0] results.append('%s:%s%s' % (address, port, exact_location)) return results def run_test(self): """ Tests the configuration using nginx -t Saves event info if syntax check was not successful """ start_time = time.time() context.log.info('running %s -t -c %s' % (self.binary, self.filename)) if self.binary: try: _, nginx_t_err = subp.call("%s -t -c %s" % (self.binary, self.filename), check=False) for line in nginx_t_err: if 'syntax is' in line and 'syntax is ok' not in line: self.test_errors.append(line) except Exception as e: exception_name = e.__class__.__name__ context.log.error('failed to %s -t -c %s due to %s' % (self.binary, self.filename, exception_name)) context.log.debug('additional info:', exc_info=True) end_time = time.time() return end_time - start_time def checksum(self): """ Calculates total checksum of all config files, certificates and permissions :return: str checksum """ checksums = [] for file_path, file_data in self.files.iteritems(): checksums.append(hashlib.sha256(open(file_path).read()).hexdigest()) checksums.append(file_data['permissions']) checksums.append(str(file_data['mtime'])) for dir_data in self.directories.itervalues(): checksums.append(dir_data['permissions']) checksums.append(str(dir_data['mtime'])) for cert in self.ssl_certificates.iterkeys(): checksums.append(hashlib.sha256(open(cert).read()).hexdigest()) return hashlib.sha256('.'.join(checksums)).hexdigest() def __parse_listen(self, listen): """ Parses listen directive value and return ip:port string, like *:80 and so on :param listen: str raw listen :return: str ip:port """ if '[' in listen: # ipv6 addr_port_parts = filter(len, listen.rsplit(']', 1)) address = '%s]' % addr_port_parts[0] if len(addr_port_parts) == 1: # only address specified, add default 80 return address, '80' else: # get port bracket, port = addr_port_parts[1].split(':') return address, port else: # ipv4 addr_port_parts = filter(len, listen.rsplit(':', 1)) if len(addr_port_parts) == 1: # can be address or port only is_port = addr_port_parts[0].isdigit() if is_port: # port! port = addr_port_parts[0] return '*', port else: # it was address only, add default 80 address = addr_port_parts[0] return address, '80' else: address, port = addr_port_parts return address, port def add_default_logs(self): """ By default nginx uses logs placed in --prefix/logs/ directory This method tries to find and add them """ access_log_path = '%s/logs/access.log' % self.prefix if os.path.isfile(access_log_path) and access_log_path not in self.access_logs: self.access_logs[access_log_path] = None error_log_path = '%s/logs/error.log' % self.prefix if os.path.isfile(error_log_path) and error_log_path not in self.error_logs: self.error_logs[error_log_path] = 'error' def run_ssl_analysis(self): """ Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with ssl_certificate value paired with results fo ssl_analysis. :return: float run time """ if not self.parser.ssl_certificates: return start_time = time.time() for cert_filename in set(self.parser.ssl_certificates): if cert_filename not in self.ssl_certificates: ssl_analysis_result = ssl_analysis(cert_filename) if ssl_analysis_result: self.ssl_certificates[cert_filename] = ssl_analysis_result end_time = time.time() return end_time - start_time def _exclude_logs(self): """ Iterate through log file stores and remove ones that match exclude rules. """ # Take comma-separated string of pathname patterns and separate them into individual patterns exclude_rules = context.app_config.get('nginx', {}).get('exclude_logs', '').split(',') for rule in [x for x in exclude_rules if x]: # skip potentially empty rules due to improper formatting # access logs for excluded_file in glib(self.access_logs.keys(), rule): del self.access_logs[excluded_file] # error logs for excluded_file in glib(self.error_logs.keys(), rule): del self.error_logs[excluded_file]