예제 #1
0
    def test_lightweight_parse_includes(self):
        # simple
        cfg = NginxConfigParser(simple_config)
        files, directories = cfg.get_structure()
        assert_that(
            files.keys(),
            contains_inanyorder(
                '/amplify/test/fixtures/nginx/simple/conf.d/something.conf',
                '/amplify/test/fixtures/nginx/simple/mime.types',
                '/amplify/test/fixtures/nginx/simple/nginx.conf'))
        assert_that(
            directories.keys(),
            contains_inanyorder('/amplify/test/fixtures/nginx/simple/',
                                '/amplify/test/fixtures/nginx/simple/conf.d/'))

        # includes
        cfg = NginxConfigParser(includes_config)
        files, directories = cfg.get_structure()
        assert_that(
            files.keys(),
            contains_inanyorder(
                '/amplify/test/fixtures/nginx/includes/conf.d/something.conf',
                '/amplify/test/fixtures/nginx/includes/mime.types',
                '/amplify/test/fixtures/nginx/includes/conf.d/additional.conf',
                '/amplify/test/fixtures/nginx/includes/conf.d/include.conf',
                '/amplify/test/fixtures/nginx/includes/nginx.conf'))
        assert_that(
            directories.keys(),
            contains_inanyorder(
                '/amplify/test/fixtures/nginx/includes/',
                '/amplify/test/fixtures/nginx/includes/conf.d/'))
예제 #2
0
    def test_lightweight_parse_includes(self):
        # simple
        cfg = NginxConfigParser(simple_config)
        files, directories = cfg.get_structure()
        assert_that(files.keys(), equal_to([
            '/amplify/test/fixtures/nginx/simple/conf.d/something.conf',
            '/amplify/test/fixtures/nginx/simple/mime.types',
            '/amplify/test/fixtures/nginx/simple/nginx.conf'
        ]))
        assert_that(directories.keys(), equal_to([
            '/amplify/test/fixtures/nginx/simple/',
            '/amplify/test/fixtures/nginx/simple/conf.d/'
        ]))

        # includes
        cfg = NginxConfigParser(includes_config)
        files, directories = cfg.get_structure()
        assert_that(files.keys(), equal_to([
            '/amplify/test/fixtures/nginx/includes/conf.d/something.conf',
            '/amplify/test/fixtures/nginx/includes/mime.types',
            '/amplify/test/fixtures/nginx/includes/conf.d/additional.conf',
            '/amplify/test/fixtures/nginx/includes/conf.d/include.conf',
            '/amplify/test/fixtures/nginx/includes/nginx.conf'
        ]))
        assert_that(directories.keys(), equal_to([
            '/amplify/test/fixtures/nginx/includes/',
            '/amplify/test/fixtures/nginx/includes/conf.d/'
        ]))
예제 #3
0
    def test_lightweight_parse_includes_permissions(self):
        """
        Checks that we get file permissions during lightweight parsing
        """
        cfg = NginxConfigParser(simple_config)
        files, directories = cfg.get_structure()

        test_file = '/amplify/test/fixtures/nginx/simple/conf.d/something.conf'
        size = os.path.getsize(test_file)
        mtime = int(os.path.getmtime(test_file))
        permissions = oct(os.stat(test_file).st_mode & 0777)

        assert_that(
            files[test_file],
            equal_to({
                'size': size,
                'mtime': mtime,
                'permissions': permissions
            }))

        test_directory = '/amplify/test/fixtures/nginx/simple/conf.d/'
        size = os.path.getsize(test_directory)
        mtime = int(os.path.getmtime(test_directory))
        permissions = oct(os.stat(test_directory).st_mode & 0777)

        assert_that(
            directories[test_directory],
            equal_to({
                'size': size,
                'mtime': mtime,
                'permissions': permissions
            }))
예제 #4
0
    def test_lightweight_parse_includes_permissions(self):
        """
        Checks that we get file permissions during lightweight parsing
        """
        cfg = NginxConfigParser(simple_config)
        files, directories = cfg.get_structure()

        test_file = '/amplify/test/fixtures/nginx/simple/conf.d/something.conf'
        size = os.path.getsize(test_file)
        mtime = int(os.path.getmtime(test_file))
        permissions = oct(os.stat(test_file).st_mode & 0777)

        assert_that(
            files[test_file],
            equal_to({'size': size, 'mtime': mtime, 'permissions': permissions})
        )

        test_directory = '/amplify/test/fixtures/nginx/simple/conf.d/'
        size = os.path.getsize(test_directory)
        mtime = int(os.path.getmtime(test_directory))
        permissions = oct(os.stat(test_directory).st_mode & 0777)

        assert_that(
            directories[test_directory],
            equal_to({'size': size, 'mtime': mtime, 'permissions': permissions})
        )
예제 #5
0
class NginxConfig(object):
    """
    Nginx config representation **for a running NGINX instance**

    Main tasks:
    - find all log formats
    - find all access logs
    - find all error logs
    - find stub_status url
    """
    def __init__(self, filename, binary=None, prefix=None):
        self.filename = filename
        self.binary = binary
        self.prefix = prefix
        self.log_formats = {}
        self.access_logs = {}
        self.error_logs = {}
        self.test_errors = []
        self.tree = {}
        self.files = {}
        self.directories = {}
        self.directory_map = {}
        self.subtree = []
        self.ssl_certificates = {}
        self.parser_ssl_certificates = []
        self.parser_errors = []
        self.stub_status_urls = []
        self.plus_status_external_urls = []
        self.plus_status_internal_urls = []
        self.api_external_urls = []
        self.api_internal_urls = []
        self.parser = None
        self.wait_until = 0

    def _setup_parser(self):
        self.parser = NginxConfigParser(filename=self.filename)

    def _teardown_parser(self):
        self.parser = None

    def full_parse(self, include_ssl_certs=True):
        context.log.debug('parsing full tree of %s' % self.filename)

        # parse raw data
        try:
            self._setup_parser()
            self.parser.parse(include_ssl_certs=include_ssl_certs)
            self._handle_parse()
        except Exception as e:
            context.log.error('failed to parse config at %s (due to %s)' %
                              (self.filename, e.__class__.__name__))
            context.log.debug('additional info:', exc_info=True)
            self._setup_parser(
            )  # Re-init parser to discard partial data (if any)

        # Post-handling
        # try to add logs from nginx -V configure options
        self.add_configured_variable_logs()

        # try to locate and use default logs (PREFIX/logs/*)
        self.add_default_logs()

        # Go through log files and apply exclude rules (log files are added during .__colect_data()
        self._exclude_logs()

        # try to read from each log file to check if it can be parsed
        self._check_logs()

        # dump access log files, access log formats, and error log files to the debug log
        context.log.debug(
            'parsed log formats, access logs, and error logs:' +
            '\nlog formats: ' +
            json.dumps(self.log_formats, indent=4, sort_keys=True) +
            '\naccess logs: ' +
            json.dumps(self.access_logs, indent=4, sort_keys=True) +
            '\nerror logs: ' +
            json.dumps(self.error_logs, indent=4, sort_keys=True))

    def _handle_parse(self):
        self.tree = self.parser.tree
        self.files = self.parser.files
        self.directories = self.parser.directories
        self.directory_map = self.parser.directory_map
        self.subtree = self.parser.simplify()
        self.ssl_certificates = {}  # gets populated in run_ssl_analysis()
        self.parser_ssl_certificates = self.parser.ssl_certificates
        self.parser_errors = self.parser.errors

        # now that we have all the things we need from parser, we can tear it down
        self._teardown_parser()

        # clear url values in the config that can/will be used to find metrics
        # do this now because self._collect_data() will repopulate the lists
        self.stub_status_urls = []
        self.plus_status_external_urls = []
        self.plus_status_internal_urls = []
        self.api_external_urls = []
        self.api_internal_urls = []

        # go through and collect all logical data
        self._collect_data(self.subtree)

    def collect_structure(self, include_ssl_certs=False):
        """
        Goes through all files (light-parsed includes) and collects their mtime

        :param include_ssl_certs: bool - include ssl certs  or not
        :return: {} - dict of files
        """
        # if self.parser is None, set it up
        if self.parser is None:
            self._setup_parser()

        files, directories = self.parser.get_structure(
            include_ssl_certs=include_ssl_certs)
        context.log.debug('found %s files for %s' %
                          (len(files.keys()), self.filename))
        context.log.debug('found %s directories for %s' %
                          (len(directories.keys()), self.filename))

        # always teardown the parser
        self._teardown_parser()

        return files, directories

    def total_size(self):
        """
        Returns the total size of a config tree
        :return: int size in bytes
        """
        return sum(data['size'] for data in self.files.itervalues())

    def _collect_data(self, block, ctx=None):
        """
        Searches needed data in config's tree

        :param block: list of statement dicts to parse
        :param ctx: dict with context
        """
        ctx = ctx if ctx is not None else {}

        def usable_log_args(args):
            is_disabled = not args or args[0] == 'off'
            uses_variable = any('$' in arg for arg in args
                                if not arg.startswith('if='))
            return not is_disabled and not uses_variable

        for stmt in block:
            directive = stmt['directive']
            args = stmt['args']

            if directive == 'error_log' and usable_log_args(args):
                path = args[0].replace('"', '').replace("'", '')
                # if not syslog, assume it is a file...if not starts with '/' assume relative path
                if not path.startswith('syslog') and not path.startswith('/'):
                    path = os.path.join(self.prefix, path)

                if path not in self.error_logs:
                    if len(args) > 1 and args[1] in ERROR_LOG_LEVELS:
                        self.error_logs[path] = {'log_level': args[1]}
                    else:
                        self.error_logs[path] = {
                            'log_level': 'error'
                        }  # nginx default log level

            elif directive == 'access_log' and usable_log_args(args):
                path = args[0].replace('"', '').replace("'", '')
                # if not syslog, assume it is a file...if not starts with '/' assume relative path
                if not path.startswith('syslog') and not path.startswith('/'):
                    path = os.path.join(self.prefix, path)

                format = args[1] if len(args) > 1 else None
                self.access_logs[path] = {'log_format': format}

            elif directive == 'log_format':
                name, strings = args[0], args[1:]

                # disregard the (optional) escape parameter
                if len(strings) > 1 and strings[0].startswith('escape='):
                    strings.pop(0)

                self.log_formats[name] = ''.join(
                    x.encode('utf-8').decode('string_escape') for x in strings)

            elif directive == 'server' and 'upstream' not in ctx:
                listens = []
                for inner_stmt in stmt['block']:
                    if inner_stmt['directive'] == 'listen':
                        listens.append(inner_stmt['args'][0])

                if not listens:
                    listens += ['80', '8000']

                ip_port = []
                for listen in listens:
                    try:
                        ip_port.append(self._parse_listen(listen))
                    except:
                        context.log.error(
                            'failed to parse bad ipv6 listen directive: %s' %
                            listen)
                        context.log.debug('additional info:', exc_info=True)

                server_ctx = dict(ctx, ip_port=ip_port)
                for inner_stmt in stmt['block']:
                    if inner_stmt['directive'] == 'server_name':
                        server_ctx['server_name'] = inner_stmt['args'][0]
                        break

                for inner_stmt in stmt['block']:
                    if inner_stmt['directive'] == 'listen':
                        server_ctx[
                            'server_schema'] = 'https' if 'ssl' in inner_stmt[
                                'args'] else 'http'
                        break

                self._collect_data(stmt['block'], ctx=server_ctx)

            elif directive == 'upstream':
                upstream = args[0]
                upstream_ctx = dict(ctx, upstream=upstream)
                self._collect_data(stmt['block'], ctx=upstream_ctx)

            elif directive == 'location':
                location = ' '.join(map(_enquote, args))
                location_ctx = dict(ctx, location=location)
                self._collect_data(stmt['block'], ctx=location_ctx)

            elif directive == 'stub_status' and 'ip_port' in ctx:
                for url in self._status_url(ctx):
                    if url not in self.stub_status_urls:
                        self.stub_status_urls.append(url)

            elif (directive == 'status'
                  or self._is_plus_dashboard(stmt, ctx)) and 'ip_port' in ctx:
                # use different url builders for external and internal urls
                for url in self._status_url(ctx, server_preferred=True):
                    if url not in self.plus_status_external_urls:
                        self.plus_status_external_urls.append(url)

                # for internal (agent) usage local ip address is a better choice,
                # because the external url might not be accessible from a host
                for url in self._status_url(ctx, server_preferred=False):
                    if url not in self.plus_status_internal_urls:
                        self.plus_status_internal_urls.append(url)

            elif directive == 'api' and 'ip_port' in ctx:
                # use different url builders for external and internal urls
                for url in self._status_url(ctx, server_preferred=True):
                    if url not in self.api_external_urls:
                        self.api_external_urls.append(url)

                # for internal (agent) usage local ip address is a better choice,
                # because the external url might not be accessible from a host
                for url in self._status_url(ctx, server_preferred=False):
                    if url not in self.api_internal_urls:
                        self.api_internal_urls.append(url)

            elif 'block' in stmt:
                self._collect_data(stmt['block'], ctx=ctx)

    @staticmethod
    def _is_plus_dashboard(stmt, ctx):
        """
        Now that the `status` directive is deprecated this method is used to determine
        plus dashboard urls. It does so by checking to see if the config follows the
        conventional pattern for including the plus dashboard:
            location = /dashboard.html {
                root /usr/share/nginx/html;
            }
        Obviously this is not perfect, but it's the best we can do now that the `status`
        directive is gone.
        """
        correct_directive = stmt['directive'] == 'root'
        correct_arguments = stmt['args'] == ['/usr/share/nginx/html']
        correct_location = ctx.get('location', '/').endswith('dashboard.html')
        return correct_directive and correct_arguments and correct_location

    @staticmethod
    def _status_url(ctx, server_preferred=False):
        """
        Creates stub/plus status url based on context

        :param ctx: {} of current parsing context
        :param server_preferred: bool - use server_name instead of listen
        :return: [] of urls
        """
        location = ctx.get('location', '/')

        # remove all modifiers
        location_parts = location.split(' ')
        final_location_part = location_parts[-1]

        # generate a random sting that will fit regex location
        if location.startswith('~'):
            try:
                exact_location = rstr.xeger(final_location_part)

                # check that regex location has / and add it
                if not exact_location.startswith('/'):
                    exact_location = '/%s' % exact_location
            except:
                context.log.debug('bad regex location: %s' %
                                  final_location_part)
                exact_location = None
        else:
            exact_location = final_location_part

            # if an exact location doesn't have / that's not a working location, we should not use it
            if not exact_location.startswith('/'):
                context.log.debug('bad exact location: %s' %
                                  final_location_part)
                exact_location = None

        if exact_location:
            for ip_port in ctx.get('ip_port'):
                address, port = ip_port
                if server_preferred and 'server_name' in ctx:
                    address = ctx['server_name']

                schema = 'http'
                if 'server_schema' in ctx:
                    schema = ctx['server_schema']

                yield '%s://%s:%s%s' % (schema, address, port, exact_location)

    def run_test(self):
        """
        Tests the configuration using nginx -t
        Saves event info if syntax check was not successful
        """
        start_time = time.time()
        context.log.info('running %s -t -c %s' % (self.binary, self.filename))
        if self.binary:
            try:
                _, nginx_t_err = subp.call("sudo %s -t -c %s" %
                                           (self.binary, self.filename),
                                           check=False)
                for line in nginx_t_err:
                    if 'syntax is' in line and 'syntax is ok' not in line:
                        self.test_errors.append(line)
            except Exception as e:
                exception_name = e.__class__.__name__
                context.log.error('failed to %s -t -c %s due to %s' %
                                  (self.binary, self.filename, exception_name))
                context.log.debug('additional info:', exc_info=True)
        end_time = time.time()
        return end_time - start_time

    def checksum(self):
        """
        Calculates total checksum of all config files, certificates and permissions

        :return: str checksum
        """
        checksums = []
        for file_path, file_data in self.files.iteritems():
            checksums.append(
                hashlib.sha256(open(file_path).read()).hexdigest())
            checksums.append(file_data['permissions'])
            checksums.append(str(file_data['mtime']))
        for dir_data in self.directories.itervalues():
            checksums.append(dir_data['permissions'])
            checksums.append(str(dir_data['mtime']))
        for cert in self.ssl_certificates.iterkeys():
            checksums.append(hashlib.sha256(open(cert).read()).hexdigest())
        return hashlib.sha256('.'.join(checksums)).hexdigest()

    def _parse_listen(self, listen):
        """
        Parses listen directive value and return ip:port string, like *:80 and so on

        :param listen: str raw listen
        :return: str ip:port
        """
        if '[' in listen:
            # ipv6
            parts = filter(len, listen.rsplit(']', 1))
            address = '%s]' % parts[0]
            port = '80' if len(parts) == 1 else parts[1].split(':')[1]
        else:
            # ipv4
            parts = filter(len, listen.rsplit(':', 1))
            if len(parts) == 1 and parts[0].isdigit():
                address, port = '*', parts[0]
            elif len(parts) == 1:
                address, port = parts[0], '80'
            else:
                address, port = parts

        # standardize address
        if address in ('*', '0.0.0.0'):
            address = '127.0.0.1'
        elif address == '[::]':
            address = '[::1]'

        return address, port

    def add_configured_variable_logs(self):
        """
        Get logs configured through nginx -V options and try to find access and error logs
        This happens only if nginx access and error logs are not configured in nginx.conf
        """
        if self.binary is not None and (len(self.access_logs) < 1
                                        or len(self.error_logs) < 1):
            try:
                v_options = nginx_v(self.binary)
                configure = v_options['configure']
                # adding access or error logs from options only if they are empty
                if len(self.access_logs) < 1:
                    access_log_path = configure.get('http-log-path')
                    if os.path.isfile(
                            access_log_path) and access_log_path is not None:
                        self.access_logs[access_log_path] = {
                            'log_format': None
                        }
                if len(self.error_logs) < 1:
                    error_log_path = configure.get('error-log-path')
                    if os.path.isfile(
                            error_log_path) and error_log_path is not None:
                        self.error_logs[error_log_path] = {
                            'log_level': 'error'
                        }
            except Exception as e:
                exception_name = e.__class__.__name__
                context.log.error(
                    'failed to get configured variables from %s -V due to %s' %
                    (self.binary, exception_name))
                context.log.debug('additional info:', exc_info=True)

    def add_default_logs(self):
        """
        By default nginx uses logs placed in --prefix/logs/ directory
        This method tries to find and add them
        """
        access_log_path = '%s/logs/access.log' % self.prefix
        if os.path.isfile(
                access_log_path) and access_log_path not in self.access_logs:
            self.access_logs[access_log_path] = {'log_format': None}

        error_log_path = '%s/logs/error.log' % self.prefix
        if os.path.isfile(
                error_log_path) and error_log_path not in self.error_logs:
            self.error_logs[error_log_path] = {'log_level': 'error'}

    def run_ssl_analysis(self):
        """
        Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with
        ssl_certificate value paired with results fo ssl_analysis.

        :return: float run time
        """
        if not self.parser_ssl_certificates:
            return

        start_time = time.time()

        for cert_filename in set(self.parser_ssl_certificates):
            ssl_analysis_result = ssl_analysis(cert_filename)
            if ssl_analysis_result:
                self.ssl_certificates[cert_filename] = ssl_analysis_result

        end_time = time.time()
        return end_time - start_time

    def _exclude_logs(self):
        """
        Iterate through log file stores and remove ones that match exclude rules.
        """
        # Take comma-separated string of pathname patterns and separate them into individual patterns
        exclude_rules = context.app_config.get('nginx',
                                               {}).get('exclude_logs',
                                                       '').split(',')

        for rule in [
                x for x in exclude_rules if x
        ]:  # skip potentially empty rules due to improper formatting
            # access logs
            for excluded_file in glib(self.access_logs.keys(), rule):
                del self.access_logs[excluded_file]

            # error logs
            for excluded_file in glib(self.error_logs.keys(), rule):
                del self.error_logs[excluded_file]

    def _check_logs(self):
        """
        Iterate through log file stores and add permissions and if it is readable to the log data
        """
        for logs in (self.access_logs, self.error_logs):
            for log_name in filter(lambda name: not name.startswith('syslog'),
                                   logs):

                info = get_filesystem_info(log_name)
                logs[log_name]['permissions'] = info['permissions']

                try:
                    with open(log_name, 'r'):
                        pass
                except:
                    logs[log_name]['readable'] = False
                else:
                    logs[log_name]['readable'] = True
예제 #6
0
class NginxConfig(object):
    """
    Nginx config representation
    Parses configs with all includes, etc

    Main tasks:
    - find all log formats
    - find all access logs
    - find all error logs
    - find stub_status url
    """

    def __init__(self, filename, binary=None, prefix=None):
        self.filename = filename
        self.binary = binary
        self.prefix = prefix
        self.log_formats = {}
        self.access_logs = {}
        self.error_logs = {}
        self.test_errors = []
        self.tree = {}
        self.files = {}
        self.directories = {}
        self.directory_map = {}
        self.index = []
        self.subtree = {}
        self.ssl_certificates = {}
        self.parser_ssl_certificates = []
        self.parser_errors = []
        self.stub_status_urls = []
        self.plus_status_external_urls = []
        self.plus_status_internal_urls = []
        self.api_external_urls = []
        self.api_internal_urls = []
        self.parser = None
        self.wait_until = 0

    def _setup_parser(self):
        self.parser = NginxConfigParser(filename=self.filename)

    def _teardown_parser(self):
        self.parser = None

    def full_parse(self):
        context.log.debug('parsing full tree of %s' % self.filename)

        # parse raw data
        try:
            self._setup_parser()
            self.parser.parse()
            self._handle_parse()
        except Exception as e:
            context.log.error('failed to parse config at %s (due to %s)' % (self.filename, e.__class__.__name__))
            context.log.debug('additional info:', exc_info=True)
            self._setup_parser()  # Re-init parser to discard partial data (if any)

        # Post-handling

        # try to locate and use default logs (PREFIX/logs/*)
        self.add_default_logs()

        # Go through log files and apply exclude rules (log files are added during .__colect_data()
        self._exclude_logs()

        # try to read from each log file to check if it can be parsed
        self._check_logs()

        # dump access log files, access log formats, and error log files to the debug log
        context.log.debug(
            'parsed log formats, access logs, and error logs:' + \
            '\nlog formats: ' + json.dumps(self.log_formats, indent=4, sort_keys=True) + \
            '\naccess logs: ' + json.dumps(self.access_logs, indent=4, sort_keys=True) + \
            '\nerror logs: ' + json.dumps(self.error_logs, indent=4, sort_keys=True)
        )

    def _handle_parse(self):
        self.tree = self.parser.tree
        self.files = self.parser.files
        self.directories = self.parser.directories
        self.directory_map = self.parser.directory_map
        self.index = self.parser.index
        self.subtree = self.parser.simplify()
        self.parser_ssl_certificates = self.parser.ssl_certificates
        self.parser_errors = self.parser.errors

        # now that we have all the things we need from parser, we can tear it down
        self._teardown_parser()

        # go through and collect all logical data
        self.__collect_data(subtree=self.subtree)

    def collect_structure(self, include_ssl_certs=False):
        """
        Goes through all files (light-parsed includes) and collects their mtime

        :param include_ssl_certs: bool - include ssl certs  or not
        :return: {} - dict of files
        """
        # if self.parser is None, set it up
        if self.parser is None:
            self._setup_parser()

        files, directories = self.parser.get_structure(include_ssl_certs=include_ssl_certs)
        context.log.debug('found %s files for %s' % (len(files.keys()), self.filename))
        context.log.debug('found %s directories for %s' % (len(directories.keys()), self.filename))

        # always teardown the parser
        self._teardown_parser()

        return files, directories

    def total_size(self):
        """
        Returns the total size of a config tree
        :return: int size in bytes
        """
        return sum(data['size'] for data in self.files.itervalues())

    def __collect_data(self, subtree=None, ctx=None):
        """
        Searches needed data in config's tree

        :param subtree: dict with tree to parse
        :param ctx: dict with context
        """
        ctx = ctx if ctx is not None else {}
        subtree = subtree if subtree is not None else {}

        for key, value in subtree.iteritems():
            if key == 'error_log':
                error_logs = value if isinstance(value, list) else [value]
                for er_log_definition in error_logs:
                    if er_log_definition == 'off':
                        continue

                    split_er_log_definition = er_log_definition.split(' ')
                    log_name = split_er_log_definition[0]
                    log_level = split_er_log_definition[-1] \
                        if split_er_log_definition[-1] in ERROR_LOG_LEVELS else 'error'  # nginx default log level
                    log_name = re.sub('[\'"]', '', log_name)  # remove all ' and "

                    # if not syslog, assume it is a file...if not starts with '/' assume relative path
                    if not log_name.startswith('syslog') and not log_name.startswith('/'):
                        log_name = '%s/%s' % (self.prefix, log_name)

                    if log_name not in self.error_logs:
                        self.error_logs[log_name] = {'log_level': log_level}

            elif key == 'access_log':
                access_logs = value if isinstance(value, list) else [value]
                for ac_log_definition in access_logs:
                    if ac_log_definition == 'off':
                        continue

                    parts = filter(len, ac_log_definition.split(' '))
                    log_format = None if len(parts) == 1 else parts[1]
                    log_name = parts[0]
                    log_name = re.sub('[\'"]', '', log_name)  # remove all ' and "

                    # if not syslog, assume it is a file...if not starts with '/' assume relative path
                    if not log_name.startswith('syslog') and not log_name.startswith('/'):
                        log_name = '%s/%s' % (self.prefix, log_name)

                    self.access_logs[log_name] = {'log_format': log_format}

            elif key == 'log_format':
                for k, v in value.iteritems():
                    self.log_formats[k] = v

            elif key == 'server' and isinstance(value, list) and 'upstream' not in ctx:
                for server in value:
                    listen = server.get('listen')
                    if listen is None:
                        listen = ['80', '8000']
                    elif not isinstance(listen, list):
                        listen = [listen]

                    ip_port = []
                    for item in listen:
                        listen_first_part = item.split(' ')[0]
                        try:
                            addr, port = self.__parse_listen(listen_first_part)
                            if addr in ('*', '0.0.0.0'):
                                addr = '127.0.0.1'
                            elif addr == '[::]':
                                addr = '[::1]'
                            ip_port.append((addr, port))
                        except:
                            context.log.error('failed to parse bad ipv6 listen directive: %s' % listen_first_part)
                            context.log.debug('additional info:', exc_info=True)

                    server_ctx = dict(ctx, ip_port=ip_port)
                    if 'server_name' in server:
                        server_ctx['server_name'] = server.get('server_name')

                    self.__collect_data(subtree=server, ctx=server_ctx)

            elif key == 'upstream':
                for upstream, upstream_info in value.iteritems():
                    upstream_ctx = dict(ctx, upstream=upstream)
                    self.__collect_data(subtree=upstream_info, ctx=upstream_ctx)

            elif key == 'location':
                for location, location_info in value.iteritems():
                    location_ctx = dict(ctx, location=location)
                    self.__collect_data(subtree=location_info, ctx=location_ctx)

            elif key == 'stub_status' and ctx and 'ip_port' in ctx:
                for url in self.__status_url(ctx):
                    if url not in self.stub_status_urls:
                        self.stub_status_urls.append(url)

            elif key == 'status' and ctx and 'ip_port' in ctx:
                # use different url builders for external and internal urls
                for url in self.__status_url(ctx, server_preferred=True):
                    if url not in self.plus_status_external_urls:
                        self.plus_status_external_urls.append(url)

                # for internal (agent) usage local ip address is a better choice,
                # because the external url might not be accessible from a host
                for url in self.__status_url(ctx, server_preferred=False):
                    if url not in self.plus_status_internal_urls:
                        self.plus_status_internal_urls.append(url)

            elif key == 'api' and ctx and 'ip_port' in ctx:
                # use different url builders for external and internal urls
                for url in self.__status_url(ctx, server_preferred=True):
                    if url not in self.plus_status_external_urls:
                        self.api_external_urls.append(url)

                # for internal (agent) usage local ip address is a better choice,
                # because the external url might not be accessible from a host
                for url in self.__status_url(ctx, server_preferred=False):
                    if url not in self.plus_status_internal_urls:
                        self.api_internal_urls.append(url)

            elif isinstance(value, dict):
                self.__collect_data(subtree=value, ctx=ctx)

            elif isinstance(value, list):
                for next_subtree in value:
                    if isinstance(next_subtree, dict):
                        self.__collect_data(subtree=next_subtree, ctx=ctx)

    @staticmethod
    def __status_url(ctx, server_preferred=False):
        """
        Creates stub/plus status url based on context

        :param ctx: {} of current parsing context
        :param server_preferred: bool - use server_name instead of listen
        :return: [] of urls
        """
        results = []
        location = ctx.get('location', '/')

        # remove all modifiers
        location_parts = location.split(' ')
        final_location_part = location_parts[-1]

        # generate a random sting that will fit regex location
        if location.startswith('~'):
            try:
                exact_location = rstr.xeger(final_location_part)

                # check that regex location has / and add it
                if not exact_location.startswith('/'):
                    exact_location = '/%s' % exact_location
            except:
                context.log.debug('bad regex location: %s' % final_location_part)
                exact_location = None
        else:
            exact_location = final_location_part

            # if an exact location doesn't have / that's not a working location, we should not use it
            if not exact_location.startswith('/'):
                context.log.debug('bad exact location: %s' % final_location_part)
                exact_location = None

        if exact_location:
            for ip_port in ctx.get('ip_port'):
                address, port = ip_port
                if server_preferred and 'server_name' in ctx:
                    if isinstance(ctx['server_name'], list):
                        address = ctx['server_name'][0].split(' ')[0]
                    else:
                        address = ctx['server_name'].split(' ')[0]

                results.append('%s:%s%s' % (address, port, exact_location))

        return results

    def run_test(self):
        """
        Tests the configuration using nginx -t
        Saves event info if syntax check was not successful
        """
        start_time = time.time()
        context.log.info('running %s -t -c %s' % (self.binary, self.filename))
        if self.binary:
            try:
                _, nginx_t_err = subp.call("%s -t -c %s" % (self.binary, self.filename), check=False)
                for line in nginx_t_err:
                    if 'syntax is' in line and 'syntax is ok' not in line:
                        self.test_errors.append(line)
            except Exception as e:
                exception_name = e.__class__.__name__
                context.log.error('failed to %s -t -c %s due to %s' % (self.binary, self.filename, exception_name))
                context.log.debug('additional info:', exc_info=True)
        end_time = time.time()
        return end_time - start_time

    def checksum(self):
        """
        Calculates total checksum of all config files, certificates and permissions

        :return: str checksum
        """
        checksums = []
        for file_path, file_data in self.files.iteritems():
            checksums.append(hashlib.sha256(open(file_path).read()).hexdigest())
            checksums.append(file_data['permissions'])
            checksums.append(str(file_data['mtime']))
        for dir_data in self.directories.itervalues():
            checksums.append(dir_data['permissions'])
            checksums.append(str(dir_data['mtime']))
        for cert in self.ssl_certificates.iterkeys():
            checksums.append(hashlib.sha256(open(cert).read()).hexdigest())
        return hashlib.sha256('.'.join(checksums)).hexdigest()

    def __parse_listen(self, listen):
        """
        Parses listen directive value and return ip:port string, like *:80 and so on

        :param listen: str raw listen
        :return: str ip:port
        """
        if '[' in listen:
            # ipv6
            addr_port_parts = filter(len, listen.rsplit(']', 1))
            address = '%s]' % addr_port_parts[0]

            if len(addr_port_parts) == 1:  # only address specified, add default 80
                return address, '80'
            else:  # get port
                bracket, port = addr_port_parts[1].split(':')
                return address, port
        else:
            # ipv4
            addr_port_parts = filter(len, listen.rsplit(':', 1))

            if len(addr_port_parts) == 1:
                # can be address or port only
                is_port = addr_port_parts[0].isdigit()
                if is_port:  # port!
                    port = addr_port_parts[0]
                    return '*', port
                else:  # it was address only, add default 80
                    address = addr_port_parts[0]
                    return address, '80'
            else:
                address, port = addr_port_parts
                return address, port

    def add_default_logs(self):
        """
        By default nginx uses logs placed in --prefix/logs/ directory
        This method tries to find and add them
        """
        access_log_path = '%s/logs/access.log' % self.prefix
        if os.path.isfile(access_log_path) and access_log_path not in self.access_logs:
            self.access_logs[access_log_path] = {'log_format': None}

        error_log_path = '%s/logs/error.log' % self.prefix
        if os.path.isfile(error_log_path) and error_log_path not in self.error_logs:
            self.error_logs[error_log_path] = {'log_level': 'error'}

    def run_ssl_analysis(self):
        """
        Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with
        ssl_certificate value paired with results fo ssl_analysis.

        :return: float run time
        """
        if not self.parser_ssl_certificates:
            return

        start_time = time.time()

        for cert_filename in set(self.parser_ssl_certificates):
            ssl_analysis_result = ssl_analysis(cert_filename)
            if ssl_analysis_result:
                self.ssl_certificates[cert_filename] = ssl_analysis_result

        end_time = time.time()
        return end_time - start_time

    def _exclude_logs(self):
        """
        Iterate through log file stores and remove ones that match exclude rules.
        """
        # Take comma-separated string of pathname patterns and separate them into individual patterns
        exclude_rules = context.app_config.get('nginx', {}).get('exclude_logs', '').split(',')

        for rule in [x for x in exclude_rules if x]:  # skip potentially empty rules due to improper formatting
            # access logs
            for excluded_file in glib(self.access_logs.keys(), rule):
                del self.access_logs[excluded_file]

            # error logs
            for excluded_file in glib(self.error_logs.keys(), rule):
                del self.error_logs[excluded_file]

    def _check_logs(self):
        """
        Iterate through log file stores and add permissions and if it is readable to the log data
        """
        for logs in (self.access_logs, self.error_logs):
            for log_name in filter(lambda name: not name.startswith('syslog'), logs):

                info = get_filesystem_info(log_name)
                logs[log_name]['permissions'] = info['permissions']

                try:
                    with open(log_name, 'r'):
                        pass
                except:
                    logs[log_name]['readable'] = False
                else:
                    logs[log_name]['readable'] = True
예제 #7
0
class NginxConfig(object):
    """
    Nginx config representation
    Parses configs with all includes, etc

    Main tasks:
    - find all log formats
    - find all access logs
    - find all error logs
    - find stub_status url
    """

    def __init__(self, filename, binary=None, prefix=None):
        self.filename = filename
        self.binary = binary
        self.prefix = prefix
        self.log_formats = {}
        self.access_logs = {}
        self.error_logs = {}
        self.test_errors = []
        self.tree = {}
        self.files = {}
        self.directories = {}
        self.directory_map = {}
        self.index = []
        self.ssl_certificates = {}
        self.parser_errors = []
        self.stub_status_urls = []
        self.plus_status_external_urls = []
        self.plus_status_internal_urls = []
        self.parser = NginxConfigParser(filename)
        self.wait_until = 0

    def full_parse(self):
        context.log.debug('parsing full tree of %s' % self.filename)

        # parse raw data
        try:
            self.parser.parse()
            self._handle_parse()
        except Exception as e:
            context.log.error('failed to parse config at %s (due to %s)' % (self.filename, e.__class__.__name__))
            context.log.debug('additional info:', exc_info=True)
            self.parser = NginxConfigParser(self.filename)  # Re-init parser to discard partial data (if any)

        # Post-handling

        # try to locate and use default logs (PREFIX/logs/*)
        self.add_default_logs()

        # Go through log files and apply exclude rules (log files are added during .__colect_data()
        self._exclude_logs()

    def _handle_parse(self):
        self.tree = self.parser.tree
        self.files = self.parser.files
        self.directories = self.parser.directories
        self.directory_map = self.parser.directory_map
        self.index = self.parser.index
        self.parser_errors = self.parser.errors

        # go through and collect all logical data
        self.__collect_data(subtree=self.parser.simplify())

    def collect_structure(self, include_ssl_certs=False):
        """
        Goes through all files (light-parsed includes) and collects their mtime

        :param include_ssl_certs: bool - include ssl certs  or not
        :return: {} - dict of files
        """
        files, directories = self.parser.get_structure(include_ssl_certs=include_ssl_certs)
        context.log.debug('found %s files for %s' % (len(files.keys()), self.filename))
        context.log.debug('found %s directories for %s' % (len(directories.keys()), self.filename))
        return files, directories

    def total_size(self):
        """
        Returns the total size of a config tree
        :return: int size in bytes
        """
        return sum(file_data['size'] for file_data in self.files.itervalues())

    def __collect_data(self, subtree=None, ctx=None):
        """
        Searches needed data in config's tree

        :param subtree: dict with tree to parse
        :param ctx: dict with context
        """
        ctx = ctx if ctx is not None else {}
        subtree = subtree if subtree is not None else {}

        for key, value in subtree.iteritems():
            if key == 'error_log':
                error_logs = value if isinstance(value, list) else [value]
                for er_log_definition in error_logs:
                    if er_log_definition == 'off':
                        continue

                    split_er_log_definition = er_log_definition.split(' ')
                    log_name = split_er_log_definition[0]
                    log_level = split_er_log_definition[-1] \
                        if split_er_log_definition[-1] in ERROR_LOG_LEVELS else 'error'  # nginx default log level
                    log_name = re.sub('[\'"]', '', log_name)  # remove all ' and "

                    # if not syslog, assume it is a file...if not starts with '/' assume relative path
                    if not log_name.startswith('syslog') and not log_name.startswith('/'):
                        log_name = '%s/%s' % (self.prefix, log_name)

                    if log_name not in self.error_logs:
                        self.error_logs[log_name] = log_level
            elif key == 'access_log':
                access_logs = value if isinstance(value, list) else [value]
                for ac_log_definition in access_logs:
                    if ac_log_definition == 'off':
                        continue

                    parts = filter(len, ac_log_definition.split(' '))
                    log_format = None if len(parts) == 1 else parts[1]
                    log_name = parts[0]
                    log_name = re.sub('[\'"]', '', log_name)  # remove all ' and "

                    # if not syslog, assume it is a file...if not starts with '/' assume relative path
                    if not log_name.startswith('syslog') and not log_name.startswith('/'):
                        log_name = '%s/%s' % (self.prefix, log_name)

                    self.access_logs[log_name] = log_format
            elif key == 'log_format':
                for k, v in value.iteritems():
                    self.log_formats[k] = v
            elif key == 'server' and isinstance(value, list) and 'upstream' not in ctx:
                for server in value:

                    current_ctx = copy.copy(ctx)
                    if server.get('listen') is None:
                        # if no listens specified, then use default *:80 and *:8000
                        listen = ['80', '8000']
                    else:
                        listen = server.get('listen')
                    listen = listen if isinstance(listen, list) else [listen]

                    ctx['ip_port'] = []
                    for item in listen:
                        listen_first_part = item.split(' ')[0]
                        try:
                            addr, port = self.__parse_listen(listen_first_part)
                            if addr in ('*', '0.0.0.0'):
                                addr = '127.0.0.1'
                            elif addr == '[::]':
                                addr = '[::1]'
                            ctx['ip_port'].append((addr, port))
                        except Exception as e:
                            context.log.error('failed to parse bad ipv6 listen directive: %s' % listen_first_part)
                            context.log.debug('additional info:', exc_info=True)

                    if 'server_name' in server:
                        ctx['server_name'] = server.get('server_name')

                    self.__collect_data(subtree=server, ctx=ctx)
                    ctx = current_ctx
            elif key == 'upstream':
                for upstream, upstream_info in value.iteritems():
                    current_ctx = copy.copy(ctx)
                    ctx['upstream'] = upstream
                    self.__collect_data(subtree=upstream_info, ctx=ctx)
                    ctx = current_ctx
            elif key == 'location':
                for location, location_info in value.iteritems():
                    current_ctx = copy.copy(ctx)
                    ctx['location'] = location
                    self.__collect_data(subtree=location_info, ctx=ctx)
                    ctx = current_ctx
            elif key == 'stub_status' and ctx and 'ip_port' in ctx:
                for url in self.__status_url(ctx):
                    if url not in self.stub_status_urls:
                        self.stub_status_urls.append(url)
            elif key == 'status' and ctx and 'ip_port' in ctx:
                # use different url builders for external and internal urls
                for url in self.__status_url(ctx, server_preferred=True):
                    if url not in self.plus_status_external_urls:
                        self.plus_status_external_urls.append(url)

                # for internal (agent) usage local ip address is a better choice,
                # because the external url might not be accessible from a host
                for url in self.__status_url(ctx, server_preferred=False):
                    if url not in self.plus_status_internal_urls:
                        self.plus_status_internal_urls.append(url)
            elif isinstance(value, dict):
                self.__collect_data(subtree=value, ctx=ctx)
            elif isinstance(value, list):
                for next_subtree in value:
                    if isinstance(next_subtree, dict):
                        self.__collect_data(subtree=next_subtree, ctx=ctx)

    @staticmethod
    def __status_url(ctx, server_preferred=False):
        """
        Creates stub/plus status url based on context

        :param ctx: {} of current parsing context
        :param server_preferred: bool - use server_name instead of listen
        :return: [] of urls
        """
        results = []
        location = ctx.get('location', '/')

        # remove all modifiers
        location_parts = location.split(' ')
        final_location_part = location_parts[-1]

        # generate a random sting that will fit regex location
        if location.startswith('~'):
            try:
                exact_location = rstr.xeger(final_location_part)

                # check that regex location has / and add it
                if not exact_location.startswith('/'):
                    exact_location = '/%s' % exact_location
            except:
                context.log.debug('bad regex location: %s' % final_location_part)
                exact_location = None
        else:
            exact_location = final_location_part

            # if an exact location doesn't have / that's not a working location, we should not use it
            if not exact_location.startswith('/'):
                context.log.debug('bad exact location: %s' % final_location_part)
                exact_location = None

        if exact_location:
            for ip_port in ctx.get('ip_port'):
                address, port = ip_port
                if server_preferred and 'server_name' in ctx:
                    if isinstance(ctx['server_name'], list):
                        address = ctx['server_name'][0].split(' ')[0]
                    else:
                        address = ctx['server_name'].split(' ')[0]

                results.append('%s:%s%s' % (address, port, exact_location))

        return results

    def run_test(self):
        """
        Tests the configuration using nginx -t
        Saves event info if syntax check was not successful
        """
        start_time = time.time()
        context.log.info('running %s -t -c %s' % (self.binary, self.filename))
        if self.binary:
            try:
                _, nginx_t_err = subp.call("%s -t -c %s" % (self.binary, self.filename), check=False)
                for line in nginx_t_err:
                    if 'syntax is' in line and 'syntax is ok' not in line:
                        self.test_errors.append(line)
            except Exception as e:
                exception_name = e.__class__.__name__
                context.log.error('failed to %s -t -c %s due to %s' % (self.binary, self.filename, exception_name))
                context.log.debug('additional info:', exc_info=True)
        end_time = time.time()
        return end_time - start_time

    def checksum(self):
        """
        Calculates total checksum of all config files, certificates and permissions

        :return: str checksum
        """
        checksums = []
        for file_path, file_data in self.files.iteritems():
            checksums.append(hashlib.sha256(open(file_path).read()).hexdigest())
            checksums.append(file_data['permissions'])
            checksums.append(str(file_data['mtime']))
        for dir_data in self.directories.itervalues():
            checksums.append(dir_data['permissions'])
            checksums.append(str(dir_data['mtime']))
        for cert in self.ssl_certificates.iterkeys():
            checksums.append(hashlib.sha256(open(cert).read()).hexdigest())
        return hashlib.sha256('.'.join(checksums)).hexdigest()

    def __parse_listen(self, listen):
        """
        Parses listen directive value and return ip:port string, like *:80 and so on

        :param listen: str raw listen
        :return: str ip:port
        """
        if '[' in listen:
            # ipv6
            addr_port_parts = filter(len, listen.rsplit(']', 1))
            address = '%s]' % addr_port_parts[0]

            if len(addr_port_parts) == 1:  # only address specified, add default 80
                return address, '80'
            else:  # get port
                bracket, port = addr_port_parts[1].split(':')
                return address, port
        else:
            # ipv4
            addr_port_parts = filter(len, listen.rsplit(':', 1))

            if len(addr_port_parts) == 1:
                # can be address or port only
                is_port = addr_port_parts[0].isdigit()
                if is_port:  # port!
                    port = addr_port_parts[0]
                    return '*', port
                else:  # it was address only, add default 80
                    address = addr_port_parts[0]
                    return address, '80'
            else:
                address, port = addr_port_parts
                return address, port

    def add_default_logs(self):
        """
        By default nginx uses logs placed in --prefix/logs/ directory
        This method tries to find and add them
        """
        access_log_path = '%s/logs/access.log' % self.prefix
        if os.path.isfile(access_log_path) and access_log_path not in self.access_logs:
            self.access_logs[access_log_path] = None

        error_log_path = '%s/logs/error.log' % self.prefix
        if os.path.isfile(error_log_path) and error_log_path not in self.error_logs:
            self.error_logs[error_log_path] = 'error'

    def run_ssl_analysis(self):
        """
        Iterate over a list of ssl_certificate definitions and run ssl_analysis to construct a dictionary with
        ssl_certificate value paired with results fo ssl_analysis.

        :return: float run time
        """
        if not self.parser.ssl_certificates:
            return

        start_time = time.time()

        for cert_filename in set(self.parser.ssl_certificates):
            if cert_filename not in self.ssl_certificates:
                ssl_analysis_result = ssl_analysis(cert_filename)
                if ssl_analysis_result:
                    self.ssl_certificates[cert_filename] = ssl_analysis_result

        end_time = time.time()
        return end_time - start_time

    def _exclude_logs(self):
        """
        Iterate through log file stores and remove ones that match exclude rules.
        """
        # Take comma-separated string of pathname patterns and separate them into individual patterns
        exclude_rules = context.app_config.get('nginx', {}).get('exclude_logs', '').split(',')

        for rule in [x for x in exclude_rules if x]:  # skip potentially empty rules due to improper formatting
            # access logs
            for excluded_file in glib(self.access_logs.keys(), rule):
                del self.access_logs[excluded_file]

            # error logs
            for excluded_file in glib(self.error_logs.keys(), rule):
                del self.error_logs[excluded_file]