Exemple #1
0
    def _get_format_expression(self, lexer: JavaTokenizer):
        token_type, token = lexer.peek()
        if token_type != 'var':
            self.log.info(f'Unexpected BOF token: {token_type}  ({token})')

        stack = []
        while not lexer.eof():
            token_type, token = lexer.peek()
            if token_type == 'punc' and token == '(':
                return stack
            stack.append(token)
            lexer.next()
Exemple #2
0
    def _read_variable(self, lexer: JavaTokenizer):
        stack = []
        variable_name = []
        previous_was_var = False
        while not lexer.eof():
            token_type, token = lexer.peek()
            if token_type == 'punc' and token == ',' and not stack:
                return 'simple', variable_name, None
            elif token_type == 'op' and token == '+' and not stack:
                return 'simple', variable_name, None
            elif token_type == 'var':
                previous_was_var = True

            # Function Call
            elif token_type == 'punc' and token == '(' and previous_was_var:
                previous_was_var = False
                func_type, default_args = self._map_function(variable_name[-1])

                # If it is another formatting call, follow it
                if func_type in self._processing_map.keys():
                    _new_stream = Stream(lexer.input.s[lexer.input.pos - 1:])
                    _new_lexer = JavaTokenizer(_new_stream)
                    args = self._count_arguments(_new_lexer)
                    param_mapping = self._create_params_mapping(
                        default_args, args)

                    func = self._processing_map[func_type]
                    msg, variables = func(self, lexer, param_mapping)
                    return 'nested', msg, variables
                # Elsewise we handle as normal expression bracket
                else:
                    stack.append('(')

            # Bracket expression
            elif token_type == 'punc' and token == '(':
                previous_was_var = False
                stack.append('(')
            elif token_type == 'punc' and token == ')':
                previous_was_var = False
                # If there's a closing bracket without an opening one it is not ours again and we release
                if not stack:
                    return 'simple', variable_name, None
                # In this case it is our bracket and we pop it
                else:
                    stack.pop()
            else:
                previous_was_var = False
            variable_name.append(token)
            lexer.next()
        raise ValueError('Unexpected EOF')
Exemple #3
0
    def _count_arguments(self, lexer: JavaTokenizer):
        previous_token_type = None
        previous_token = None
        while not lexer.eof():
            token_type, token = lexer.peek()
            if token_type == 'punc' and token == '(':
                previous_token_type = token_type
                previous_token = token
                lexer.next()
                break
            lexer.next()
        if lexer.eof():
            raise ValueError(f'Does not contain a function call')

        stack = []
        argument_count = 1
        while not lexer.eof():
            token_type, token = lexer.peek()

            if token_type == 'punc' and token == ')' and not stack:
                if previous_token_type and previous_token_type == 'punc' and previous_token and previous_token == '(':
                    return 0
                return argument_count
            elif token_type == 'punc' and token == '(':
                stack.append('(')
            elif token_type == 'punc' and token == ')':
                stack.pop()
            elif token_type == 'punc' and token == ',' and not stack:
                argument_count += 1
            previous_token_type = token_type
            previous_token = token
            lexer.next()
        return argument_count
Exemple #4
0
    def _parse(self, inp) -> Tuple[str, List[str]]:
        character_stream = Stream(inp)
        lexer = JavaTokenizer(character_stream)

        log_string = ""
        arguments = []
        argument_mode = False
        first_token = True

        # We basically have two modes: Parse string concatenation and parse arguments
        # If we find a String.format we know what to look for. If we don't we assume that the first occurrence of ','
        # is the delimiter between string concatenation and arguments for that string

        while not lexer.eof():
            current_type, current_token = lexer.peek()
            if current_type == 'str':
                if first_token:
                    argument_mode = True
                log_string += current_token
                lexer.next()
            elif current_type == 'op' and current_token == '+':
                lexer.next()
                current_type, current_token = lexer.peek()
                if current_type == 'str':
                    log_string += current_token
                    lexer.next()
                elif current_type == 'op' and not lexer.is_unary_ops(
                        current_token):
                    raise ValueError(
                        f'Operator {current_token} may not follow a +')
                elif current_type == 'op':
                    lexer.next()
                elif current_type == 'punc' and not current_token == '(':
                    raise ValueError(f'"{current_token}" may not follow a +')
                elif current_type == 'punc' and current_token == '(':
                    hints, _, string_only = self._read_expression(lexer)
                    if string_only:
                        pass
                    if argument_mode:
                        log_string += '{}'
                        arguments.append(hints[0])
                    else:
                        arguments.append(hints[0])

                elif current_type == 'var':
                    variable = self._read_var(lexer)
                    if argument_mode:
                        log_string += '{}'
                        arguments.append(variable)
                    else:
                        arguments.append(variable)
            elif current_type == 'punc' and current_token == ',':
                argument_mode = False
                lexer.next()
            elif current_type == 'op' and lexer.is_unary_ops(current_token):
                lexer.next()
            elif current_type == 'var':
                _, expression, _ = self._read_expression(lexer)
                if 'String.format' in expression:
                    expression = expression.replace("String.format(", '')
                    expression = expression[:expression.rindex(')')]
                    tmp = self._parse(expression)
                    return tmp
                    # handle this here:
                if argument_mode:
                    log_string += '{}'
                else:
                    arguments.append(expression)
            elif current_type == 'num':
                dtype, value = self._check_number(current_token)
                if argument_mode:
                    log_string += '{}'
                    arguments.append('{!Integer}' if dtype ==
                                     'int' else '{!Float}')
                else:
                    arguments.append('{!Integer}' if dtype ==
                                     'int' else '{!Float}')
                lexer.next()
            elif current_type == 'punc' and current_token == '(':
                hints, output, string_only = self._read_expression(lexer)
                if string_only:
                    stream = JavaTokenizer(Stream(output))
                    constructed_token = ""
                    while not stream.eof():
                        if (token := stream.next())[0] == 'str':
                            constructed_token += token[1]
                    log_string += constructed_token
                elif argument_mode:
                    log_string += '{}'
                else:
                    arguments.append(hints[0])
            else:
                print(
                    f'Weird behavio for token {current_token}<{current_type}>')
                lexer.next()
Exemple #5
0
    def _parse_format(self, lexer: JavaTokenizer, params: List[str]):

        if not params:
            raise ValueError(
                "Trying to parse format without argument. Aborting...")

        param_offset = 0
        param_type = params[param_offset]
        message = ''
        variables = []
        statement_stack = []
        while not lexer.eof():
            token_type, token = lexer.peek()

            # Advance argument
            if token_type == 'punc' and token == ',' and param_type != '...':
                param_offset = self._increase_index(param_offset, len(params))
                param_type = params[param_offset]

            # If the argument shall be skipped, e.g. when it's the log level argument or so on.
            elif param_type == 'skip':
                pass

            # New expression
            elif token_type == 'punc' and token == '(':
                statement_stack.append(token)
            # Closing expression
            elif token_type == 'punc' and token == ')':
                statement_stack.pop()
                # No expressions left
                if not statement_stack:
                    break

            # String literal
            elif token_type == 'str' and param_type == '...':
                variables.append(token)
            elif token_type == 'str':
                message += self._parse_format_string(token)

            elif token_type == 'num' and param_type == 'str':
                message += str(token)
            elif token_type == 'num' and param_type == '...':
                variables.append(token)

            # Variable
            elif token_type == 'var' or (token_type == 'op'
                                         and lexer.is_unary_ops(token)):

                # If it is not, handle as normal variable
                var_type, tokens, arguments = self._read_variable(lexer)
                if var_type == 'simple':
                    variables.append(''.join(tokens))
                    if param_type == 'str':
                        message += '{}'
                if var_type == 'nested':
                    message += tokens
                    variables.append(arguments)

            # Operator '+' on string
            elif param_type == 'str' and token_type == 'op' and token == '+':
                lexer.next()
                token_type, token = lexer.peek()
                if token_type == 'str':
                    message += token
                elif token_type == 'var':
                    tmp_mode, tmp_message, tmp_variables = self._read_variable(
                        lexer)

                    if tmp_mode == 'simple' and tmp_message:
                        message += '{}'
                        variables += tmp_message
                    elif tmp_mode == 'nested':
                        pass

            lexer.next()
        self._parse_format_string(message)
        return message, variables
Exemple #6
0
 def _read_expression(self, lexer: JavaTokenizer):
     value_hint = []
     stack = []
     string_only = True
     original_string = ""  # current_token
     while not lexer.eof():
         next_type, next_token = lexer.peek()
         if next_type == 'str':
             original_string += f'"{next_token}"'
         else:
             original_string += next_token
         if next_type == 'punc' and next_token == '(':
             lexer.next()
             stack.append(True)
         elif next_type == 'punc' and next_token == ')':
             lexer.next()
             stack.pop()
         elif next_type == 'punc' and next_token == ')':
             lexer.next()
             stack.pop()
             string_only = False
         elif not stack and (next_type == 'punc' and next_token == ','):
             break
         elif not stack and (next_type == 'punc' and next_token == ';'):
             break
         elif next_type == 'var':
             value_hint.append(next_token)
             lexer.next()
             string_only = False
         elif next_type == 'num':
             dtype, value = self._check_number(next_token)
             value_hint.append(dtype)
             lexer.next()
             string_only = False
         elif next_type == 'str':
             lexer.next()
         else:
             lexer.next()
             string_only = False
     # print(f'Expression parsed, I would suggest it be: {value_hint}')
     return value_hint, original_string, string_only
Exemple #7
0
 def _read_var(self, lexer: JavaTokenizer):
     initial_type, var_tokens = lexer.peek()
     if initial_type != 'var':
         raise ValueError(
             'Called _read_var on a stream that\'s not pointing to a var')
     stack = []
     current_type, current_token = lexer.next()
     while not lexer.eof():
         next_type, next_token = lexer.peek()
         if current_type == 'var' and (next_type == 'op'
                                       and next_token == '.'):
             var_tokens += next_token
             lexer.next()
         elif not stack and (current_type == 'var' and
                             (next_type == 'op' and next_token == '+')):
             break
         elif not stack and (current_type == 'var' and
                             (next_type == 'punc' and next_token == ',')):
             break
         elif next_type == 'punc' and next_token == '(':
             lexer.next()
             stack.append(True)
         elif next_type == 'punc' and next_token == ')':
             lexer.next()
             stack.pop()
         elif current_type != 'var' and next_type == 'var':
             var_tokens += next_token
             lexer.next()
         else:
             var_tokens += next_token
             lexer.next()
     return var_tokens