Esempio n. 1
0
    def test_parse_line_split_upstream_log_format_empty_upstreams(self):
        log_format = '$remote_addr - $remote_user [$time_local] ' + \
                     '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" ' + \
                     'rt=$request_time cs=$upstream_cache_status ut="$upstream_response_time"'

        keys, _, non_key_patterns, first_value_is_key = decompose_format(
            log_format, full=True
        )
        line = \
            '1.2.3.4 - - [22/Jan/2010:19:34:21 +0300] "GET /foo/ HTTP/1.1" 200 11078 ' + \
            '"http://www.rambler.ru/" "Mozilla/5.0 (Windows; U; Windows NT 5.1" rt=0.010 cs=- ut="-"'

        results = parse_line_split(
            line,
            keys=keys,
            non_key_patterns=non_key_patterns,
            first_value_is_key=first_value_is_key
        )
        assert_that(results, not_none())
        
        for key in keys:
            assert_that(results, has_item(key))
            assert_that(results[key], not_none())

        # check the last value to make sure complete parse
        assert_that(results['upstream_response_time'], equal_to('-'))
        
        assert_that(results['upstream_cache_status'], equal_to('-'))
Esempio n. 2
0
    def test_parse_line_split(self):
        keys, _, non_key_patterns, first_value_is_key = decompose_format(COMBINED_FORMAT, full=True)
        line = '127.0.0.1 - - [02/Jul/2015:14:49:48 +0000] "GET /basic_status HTTP/1.1" 200 110 "-" ' + \
               '"python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic"'

        results = parse_line_split(
            line,
            keys=keys,
            non_key_patterns=non_key_patterns,
            first_value_is_key=first_value_is_key
        )
        assert_that(results, not_none())

        for key in keys:
            assert_that(results, has_item(key))
            assert_that(results[key], not_none())

        # check the last value to make sure complete parse
        assert_that(results['http_user_agent'], equal_to(
            'python-requests/2.2.1 CPython/2.7.6 Linux/3.13.0-48-generic'
        ))
Esempio n. 3
0
    def parse(self, line):
        """
        Parses the line and if there are some special fields - parse them too
        For example we can get HTTP method and HTTP version from request

        The difference between this and above is that this one uses split
        mechanic rather than trie matching direclty.

        :param line: log line
        :return: dict with parsed info
        """
        result = {'malformed': False}

        # parse the line
        parsed = parse_line_split(line,
                                  keys=self.keys,
                                  non_key_patterns=self.non_key_patterns,
                                  first_value_is_key=self.first_value_is_key)

        if parsed:
            for key in self.keys:
                # key local vars
                time_var = False

                func = self.common_variables[key][1] \
                    if key in self.common_variables \
                    else self.default_variable[1]

                try:
                    value = func(parsed[key])
                # for example gzip ratio can be '-' and float
                except ValueError:  # couldn't cast log value
                    value = 0
                except KeyError:  # something went wrong with line parsing
                    context.default_log.warn(
                        'failed to find expected log variable "%s" in access '
                        'log line, skipping' % key)
                    context.default_log.debug('additional info:')
                    context.default_log.debug(
                        'keys: %s\nformat: "%s"\nline:"%s"' %
                        (self.keys, self.raw_format, line))

                # time variables should be parsed to array of float
                if key.endswith('_time'):
                    time_var = True
                    # skip empty vars
                    if value not in ('', '-'):
                        array_value = []
                        for x in value.replace(' ', '').split(','):
                            x = float(x)
                            # workaround for an old nginx bug with time. ask lonerr@ for details
                            if x > 10000000:
                                continue
                            else:
                                array_value.append(x)
                        if array_value:
                            result[key] = array_value

                # Handle comma separated keys
                if key in self.comma_separated_keys:
                    if ',' in value:
                        list_value = value.replace(' ', '').split(
                            ',')  # remove spaces and split values into list
                        result[key] = list_value
                    else:
                        result[key] = [value]

                if key not in result and not time_var:
                    result[key] = value
        else:
            context.default_log.debug(
                'could not parse line "%s" with format "%s"' %
                (line, self.raw_format))
            return None

        if 'request' in result:
            try:
                method, uri, proto = result['request'].split(' ')
                result['request_method'] = method
                result['request_uri'] = uri
                result['server_protocol'] = proto
            except:
                result['malformed'] = True
                method = ''

            if not result['malformed'] and len(method) < 3:
                result['malformed'] = True

        return result