def parse_ifdef_block(expression, defines, coord_until_here): """ Given a IfDefStatement and the defines, converts the statement.tokens into a list of tokens that can be analyzed after processing the #ifdef statement. `position_until_here` used to compute position of errors """ assert (isinstance(expression, IfDefStatement)) tokens = expression.tokens try: ifdef_i = rindex(tokens, Preprocessor('#ifdef')) is_ifdef = True except ValueError: ifdef_i = rindex(tokens, Preprocessor('#ifndef')) is_ifdef = False try: else_i = rindex(tokens, Preprocessor('#else')) except ValueError: else_i = None endif_i = rindex(tokens, Preprocessor('#endif')) try: # if there is an if_def statement before #endif, the remaining tokens are transferred to it nested_if_def = next(i for i, x in enumerate(tokens) if type(x) == IfDefStatement and i < endif_i) except StopIteration: nested_if_def = None variable, eol_i = get_ifdef_variable(tokens, ifdef_i, coord_until_here) is_def = (variable in defines) replacing_expression = [] if is_def and is_ifdef or not is_def and not is_ifdef: if else_i is None: to = endif_i if nested_if_def is not None and nested_if_def < endif_i: to = nested_if_def + 1 replacing_expression = tokens[eol_i:to] else: to = else_i if nested_if_def is not None and nested_if_def < else_i: to = nested_if_def + 1 replacing_expression = tokens[eol_i:to] elif else_i is not None: replacing_expression = tokens[else_i + 1:endif_i] try: # if there is an if_def statement after the #endif, the remaining tokens are transferred to it if_def_i = next(i for i, x in enumerate(tokens) if type(x) == IfDefStatement and i > endif_i) next_if_def = tokens[if_def_i] replacing_expression.append(next_if_def) remaining_tokens = tokens[if_def_i + 1:] next_if_def.tokens.extend(remaining_tokens) expression._tokens = tokens[:if_def_i + 1] except StopIteration: replacing_expression += tokens[endif_i + 1:] return replacing_expression
def identify_token(token): """ The function that converts a token from tokenize to a BaseType. """ if isinstance(token, (Comment, String)): return token if token == ' ': return Space() if token == '\t': return Tab() if token == '\\\n': return BrokenEndOfLine() if token in ('(', ')', '[', ']', '{', '}', ',', ';'): return ParserKeyword(token) if token in ('\n', '\r\n'): return EndOfLine(token) if token in ('true', 'false'): return Boolean(token == 'true') try: return Number(int(token)) except ValueError: pass try: return Number(float(token)) except ValueError: pass if token in PREPROCESSORS: return Preprocessor(token) if token.lower() in NAMESPACES: return Namespace(token) elif token.lower() in KEYWORDS: return Keyword(token) else: return Variable(token)
def replace_in_expression(expression, args, arg_indexes, all_tokens): """ Recursively replaces matches of `args` in expression (a list of Types). """ replacing_expression = [] commands = {'#': False, '##': False} for token in expression: if token in (Preprocessor('#'), Preprocessor('##')): commands[token.value] = True continue if isinstance(token, Statement): new_expression = replace_in_expression(token.content, args, arg_indexes, all_tokens) new_token = Statement(new_expression, ending=token.ending, parenthesis=token.parenthesis) else: for arg, arg_index in zip(args, arg_indexes): if str(token) == arg: new_token = all_tokens[arg_index] break else: new_token = token if commands['#']: new_token = preprocessor_stringify( new_token, is_variable=new_token != token) commands['#'] = False replacing_expression.append(new_token) if commands['##']: # re-parse whole statement if concatenation occured # todo: any errors will report wrong coordinate replacing_expression = parse("".join( [str(t) for t in replacing_expression])).tokens return replacing_expression
def _analyze_define(tokens): assert(tokens[0] == Preprocessor('#define')) valid_indexes = [i for i in range(len(tokens)) if not isinstance(tokens[i], ParserType)] if len(valid_indexes) < 2: raise SQFParserError(get_coord(str(tokens[0])), '#define needs at least one argument') variable = str(tokens[valid_indexes[1]]) if len(valid_indexes) == 2: return DefineStatement(tokens, variable) elif len(valid_indexes) >= 3 and valid_indexes[1] + 1 == valid_indexes[2] and isinstance(tokens[valid_indexes[2]], Statement) and tokens[valid_indexes[2]].parenthesis: args = str(tokens[valid_indexes[2]])[1:-1].split(',') remaining = tokens[valid_indexes[3]:] return DefineStatement(tokens, variable, remaining, args=args) elif len(valid_indexes) >= 3: remaining = tokens[valid_indexes[2]:] return DefineStatement(tokens, variable, remaining)
def execute_single(self, statement): assert (isinstance(statement, Statement)) outcome = Nothing() outcome.position = statement.position base_tokens = [] for token in statement.tokens: if not statement.is_base_token(token): self.execute_other(token) else: base_tokens.append(token) if not base_tokens: return outcome # operations that cannot evaluate the value of all base_tokens if type(base_tokens[0]) == DefineStatement: return base_tokens[0] elif base_tokens[0] == Preprocessor("#include"): if len(base_tokens) != 2: exception = SQFParserError(base_tokens[0].position, "#include requires one argument") self.exception(exception) elif type(self.execute_token(base_tokens[1])) != String: exception = SQFParserError( base_tokens[0].position, "#include first argument must be a string") self.exception(exception) return outcome elif isinstance(base_tokens[0], Keyword) and base_tokens[0].value in PREPROCESSORS: # remaining preprocessors are ignored return outcome elif len(base_tokens) == 2 and base_tokens[0] == Keyword('private'): # the rhs may be a variable, so we cannot get the value rhs = self.execute_token(base_tokens[1]) if isinstance(rhs, String): self.add_privates([rhs]) elif isinstance(rhs, Array): value = self.value(rhs) if value.is_undefined: self.exception( SQFWarning( base_tokens[0].position, 'Obfuscated statement. Consider explicitly set what is private.' )) else: self.add_privates(value) elif isinstance(rhs, Variable): var = String('"' + rhs.name + '"') var.position = rhs.position self.add_privates([var]) outcome = PrivateType(rhs) outcome.position = rhs.position self.privates.add(outcome) else: self.exception( SQFParserError(base_tokens[0].position, '`private` used incorrectly')) return outcome # assignment operator elif len(base_tokens) == 3 and base_tokens[1] == Keyword('='): lhs = self.execute_token(base_tokens[0]) if isinstance(lhs, PrivateType): self.privates.remove(lhs) lhs = lhs.variable else: lhs = self.get_variable(base_tokens[0]) if not isinstance(lhs, Variable): self.exception( SQFParserError( base_tokens[0].position, 'lhs of assignment operator must be a variable')) else: # if the rhs_v is code and calls `lhs` (recursion) it will assume lhs is anything (and not Nothing) scope = self.get_scope(lhs.name) if lhs.name not in scope or isinstance(scope[lhs.name], Nothing): scope[lhs.name] = Anything() rhs_v = self.value(base_tokens[2]) self.assign(lhs, rhs_v) if not statement.ending: outcome = rhs_v return outcome # A variable can only be evaluated if we need its value, so we will not call its value until the very end. elif len(base_tokens) == 1 and type( base_tokens[0]) in (Variable, Array): return self.execute_token(base_tokens[0]) # heuristic for defines (that are thus syntactically correct): # - is keyword but upper cased # - first token string starts uppercased elif len(base_tokens) == 1 and type(base_tokens[0]) == Keyword and str( base_tokens[0])[0].isupper(): outcome = Variable(str(base_tokens[0])) outcome.position = base_tokens[0].position return outcome elif is_undefined_define(base_tokens): # get all arguments and compute their value to analyze them if isinstance(base_tokens[1].base_tokens[0], Statement): sub_tokens = base_tokens[1].base_tokens[0].base_tokens else: sub_tokens = base_tokens[0] for sub_token in sub_tokens: self.value(sub_token) # finally, build the outcome outcome = Anything() outcome.position = base_tokens[0].position return outcome # evaluate all the base_tokens, trying to obtain their values values = [] tokens = [] for token in base_tokens: t = self.execute_token(token) v = self.value(t) tokens.append(t) values.append(v) # try to find a match for any expression, both typed and un-typed case_found = None possible_expressions = values_to_expressions(values, EXPRESSIONS_MAP, EXPRESSIONS) for case in possible_expressions: if case.is_signature_match(values): # match first occurrence case_found = case break
def parse_block(all_tokens, analyze_tokens, start=0, initial_lvls=None, stop_statement='both', defines=None): if not initial_lvls: initial_lvls = _LEVELS if defines is None: defines = defaultdict(dict) lvls = initial_lvls.copy() statements = [] tokens = [] i = start if not all_tokens: return Statement([]), 0 while i < len(all_tokens): token = all_tokens[i] # begin #ifdef controls if lvls['ifdef'] and token in OPEN_PARENTHESIS: lvls['ifdef_open_close'] += 1 stop = False if token in (Preprocessor('#ifdef'), Preprocessor('#ifndef')): stop = True lvls['ifdef'] += 1 expression, size = parse_block(all_tokens, _analyze_simple, i + 1, lvls, stop_statement, defines=defines) lvls['ifdef'] -= 1 if lvls['ifdef'] == 0: assert (isinstance(expression, IfDefStatement)) replacing_expression = parse_ifdef_block( expression, defines, get_coord(all_tokens[:i - 1])) new_all_tokens = sqf.base_type.get_all_tokens( tokens + replacing_expression) result, _ = parse_block(new_all_tokens, analyze_tokens, 0, None, stop_statement, defines=defines) expression.prepend(tokens) expression = IfDefResult(expression, result.tokens) statements.append(expression) len_expression = len(expression.get_all_tokens()) i += len_expression - len(tokens) - 1 tokens = [] else: tokens.append(expression) i += size + 1 # finish ifdef elif is_finish_ifdef_condition(tokens, lvls) and ( is_end_statement(token, stop_statement) or is_finish_ifdef_parenthesis(token, lvls) ) or lvls['ifdef'] > 1 and token == Preprocessor('#endif'): if token != EndOfFile() and token not in CLOSE_PARENTHESIS: tokens.append(token) if_def = finish_ifdef(tokens, all_tokens, start, statements) return if_def, i - start # parse during ifdef elif lvls['ifdef'] != 0: stop = True tokens.append(token) # end ifdef controls if lvls['ifdef'] and token in (STOP_KEYWORDS['single'] + CLOSE_PARENTHESIS): lvls['ifdef_open_close'] -= 1 if lvls['ifdef_open_close'] < 0: lvls['ifdef_open_close'] = 0 if stop: pass # try to match a #defined and get the arguments elif str(token) in defines: # is a define stop, define_statement, arg_indexes = find_match_if_def( all_tokens, i, defines, token) if stop: arg_number = len(define_statement.args) extra_tokens_to_move = 1 + 2 * ( arg_number != 0) + 2 * arg_number - 1 * (arg_number != 0) replaced_expression = all_tokens[i:i + extra_tokens_to_move] # the `all_tokens` after replacement replacing_expression = replace_in_expression( define_statement.expression, define_statement.args, arg_indexes, all_tokens) new_all_tokens = all_tokens[:i - len( tokens)] + tokens + replacing_expression + all_tokens[ i + extra_tokens_to_move:] new_start = i - len(tokens) expression, size = parse_block(new_all_tokens, analyze_tokens, new_start, lvls, stop_statement, defines=defines) # the all_tokens of the statement before replacement original_tokens_taken = len(replaced_expression) - len( replacing_expression) + size original_tokens = all_tokens[i - len(tokens):i - len(tokens) + original_tokens_taken] if isinstance(expression, Statement): expression = expression.content[0] if type(original_tokens[-1]) in (EndOfLine, Comment, EndOfFile): del original_tokens[-1] original_tokens_taken -= 1 expression = DefineResult(original_tokens, define_statement, expression) statements.append(expression) i += original_tokens_taken - len(tokens) - 1 tokens = [] if stop: pass elif token == ParserKeyword('['): lvls['[]'] += 1 expression, size = parse_block(all_tokens, analyze_tokens, i + 1, lvls, stop_statement='single', defines=defines) lvls['[]'] -= 1 tokens.append(expression) i += size + 1 elif token == ParserKeyword('('): lvls['()'] += 1 expression, size = parse_block(all_tokens, analyze_tokens, i + 1, lvls, stop_statement, defines=defines) lvls['()'] -= 1 tokens.append(expression) i += size + 1 elif token == ParserKeyword('{'): lvls['{}'] += 1 expression, size = parse_block(all_tokens, analyze_tokens, i + 1, lvls, stop_statement, defines=defines) lvls['{}'] -= 1 tokens.append(expression) i += size + 1 elif token == ParserKeyword(']'): if lvls['[]'] == 0: raise SQFParenthesisError( get_coord(all_tokens[:i]), 'Trying to close right parenthesis without them opened.') if statements: if isinstance(statements[0], DefineResult): statements[0]._tokens = [ Array( _analyze_array(statements[0]._tokens, analyze_tokens, all_tokens[:i])) ] return statements[0], i - start else: raise SQFParserError( get_coord(all_tokens[:i]), 'A statement %s cannot be in an array' % Statement(statements)) return Array(_analyze_array(tokens, analyze_tokens, all_tokens[:i])), i - start elif token == ParserKeyword(')'): if lvls['()'] == 0: raise SQFParenthesisError( get_coord(all_tokens[:i]), 'Trying to close parenthesis without opened parenthesis.') if tokens: statements.append(analyze_tokens(tokens)) return Statement(statements, parenthesis=True), i - start elif token == ParserKeyword('}'): if lvls['{}'] == 0: raise SQFParenthesisError( get_coord(all_tokens[:i]), 'Trying to close brackets without opened brackets.') if tokens: statements.append(analyze_tokens(tokens)) return Code(statements), i - start # end of statement when not in preprocessor states elif all(lvls[lvl_type] == 0 for lvl_type in ('#define', '#include')) and is_end_statement( token, stop_statement): if type(token) != EndOfFile: tokens.append(token) if tokens: statements.append(analyze_tokens(tokens)) tokens = [] elif token in (Preprocessor('#define'), Preprocessor('#include')): # notice that `token` is ignored here. It will be picked up in the end if tokens: # a pre-processor starts a new statement statements.append(analyze_tokens(tokens)) tokens = [] lvls[token.value] += 1 expression, size = parse_block(all_tokens, analyze_tokens, i + 1, lvls, stop_statement, defines=defines) lvls[token.value] -= 1 statements.append(expression) i += size elif token == Keyword('#') and lvls['#define'] != 0: # The # sqf command is superseded by the preprocessor directive's stringification command tokens.append(Preprocessor('#')) elif type(token) in (EndOfLine, Comment, EndOfFile) and any( lvls[x] != 0 for x in {'#define', '#include'}): tokens.insert( 0, all_tokens[start - 1]) # pick the token that triggered the statement if tokens[0] == Preprocessor('#define'): define_statement = _analyze_define(tokens) defines[define_statement.variable_name][len( define_statement.args)] = define_statement statements.append(define_statement) else: statements.append(analyze_tokens(tokens)) return Statement(statements), i - start elif type(token) != EndOfFile: tokens.append(token) i += 1 if is_finish_ifdef_condition(tokens, lvls): return finish_ifdef(tokens, all_tokens, start, statements), i - start for lvl_type in ('[]', '()', '{}', 'ifdef'): if lvls[lvl_type] != 0: message = 'Parenthesis "%s" not closed' % lvl_type[0] if lvl_type == 'ifdef': message = '#ifdef statement not closed' raise SQFParenthesisError(get_coord(all_tokens[:start - 1]), message) if tokens: statements.append(analyze_tokens(tokens)) return Statement(statements), i - start
def is_finish_ifdef_condition(tokens, lvls): return lvls['ifdef'] == sum(1 for token in tokens if token == Preprocessor('#endif')) > 0 and \ lvls['ifdef_open_close'] == 0