def _parse_new(self, inp: str): character_stream = Stream(inp) lexer = JavaTokenizer(character_stream) mode, message, variables = self._read_variable(lexer) if mode == 'simple': return '', [] elif mode == 'nested': return message.strip(), self._flatten(variables) else: self.log.warning( f'General Parsing problem [Error on evaluating first expression] on <{inp}>' )
def _read_variable(self, lexer: JavaTokenizer): stack = [] variable_name = [] previous_was_var = False while not lexer.eof(): token_type, token = lexer.peek() if token_type == 'punc' and token == ',' and not stack: return 'simple', variable_name, None elif token_type == 'op' and token == '+' and not stack: return 'simple', variable_name, None elif token_type == 'var': previous_was_var = True # Function Call elif token_type == 'punc' and token == '(' and previous_was_var: previous_was_var = False func_type, default_args = self._map_function(variable_name[-1]) # If it is another formatting call, follow it if func_type in self._processing_map.keys(): _new_stream = Stream(lexer.input.s[lexer.input.pos - 1:]) _new_lexer = JavaTokenizer(_new_stream) args = self._count_arguments(_new_lexer) param_mapping = self._create_params_mapping( default_args, args) func = self._processing_map[func_type] msg, variables = func(self, lexer, param_mapping) return 'nested', msg, variables # Elsewise we handle as normal expression bracket else: stack.append('(') # Bracket expression elif token_type == 'punc' and token == '(': previous_was_var = False stack.append('(') elif token_type == 'punc' and token == ')': previous_was_var = False # If there's a closing bracket without an opening one it is not ours again and we release if not stack: return 'simple', variable_name, None # In this case it is our bracket and we pop it else: stack.pop() else: previous_was_var = False variable_name.append(token) lexer.next() raise ValueError('Unexpected EOF')
def _parse(self, inp) -> Tuple[str, List[str]]: character_stream = Stream(inp) lexer = JavaTokenizer(character_stream) log_string = "" arguments = [] argument_mode = False first_token = True # We basically have two modes: Parse string concatenation and parse arguments # If we find a String.format we know what to look for. If we don't we assume that the first occurrence of ',' # is the delimiter between string concatenation and arguments for that string while not lexer.eof(): current_type, current_token = lexer.peek() if current_type == 'str': if first_token: argument_mode = True log_string += current_token lexer.next() elif current_type == 'op' and current_token == '+': lexer.next() current_type, current_token = lexer.peek() if current_type == 'str': log_string += current_token lexer.next() elif current_type == 'op' and not lexer.is_unary_ops( current_token): raise ValueError( f'Operator {current_token} may not follow a +') elif current_type == 'op': lexer.next() elif current_type == 'punc' and not current_token == '(': raise ValueError(f'"{current_token}" may not follow a +') elif current_type == 'punc' and current_token == '(': hints, _, string_only = self._read_expression(lexer) if string_only: pass if argument_mode: log_string += '{}' arguments.append(hints[0]) else: arguments.append(hints[0]) elif current_type == 'var': variable = self._read_var(lexer) if argument_mode: log_string += '{}' arguments.append(variable) else: arguments.append(variable) elif current_type == 'punc' and current_token == ',': argument_mode = False lexer.next() elif current_type == 'op' and lexer.is_unary_ops(current_token): lexer.next() elif current_type == 'var': _, expression, _ = self._read_expression(lexer) if 'String.format' in expression: expression = expression.replace("String.format(", '') expression = expression[:expression.rindex(')')] tmp = self._parse(expression) return tmp # handle this here: if argument_mode: log_string += '{}' else: arguments.append(expression) elif current_type == 'num': dtype, value = self._check_number(current_token) if argument_mode: log_string += '{}' arguments.append('{!Integer}' if dtype == 'int' else '{!Float}') else: arguments.append('{!Integer}' if dtype == 'int' else '{!Float}') lexer.next() elif current_type == 'punc' and current_token == '(': hints, output, string_only = self._read_expression(lexer) if string_only: stream = JavaTokenizer(Stream(output)) constructed_token = "" while not stream.eof(): if (token := stream.next())[0] == 'str': constructed_token += token[1] log_string += constructed_token elif argument_mode: log_string += '{}' else: arguments.append(hints[0]) else: print( f'Weird behavio for token {current_token}<{current_type}>') lexer.next()