def raw(self, expected: []): token = CommonToken(type=Token.EOF) token.text = "<eof-token>" self.tokens.append(token) raw_types = self.tokens_to_types(self.tokens) self.assertEqual(expected, raw_types, "raw tokens") return self
def makeNumber(parent: ParserRuleContext, n: int): """This node represents a virtual ParseTree node that does not come from the parser but rather is constructed via a rewriting rule. For example, the expression "a-b" is parsed as "Plus[a, Times[-1, b]]" in exitPlusOp(), so a node for "-1" needs to be created even though "-1" does not appear as a token in the token stream. Note that makeNumber sets the NumberContext's parent but does not add anything to parent's children.""" # The hierarchy is: # CommonToken->TerminalNodeImpl->NumberLiteralContext # ->NumberBaseTenContext->NumberContext->parent digits_token = CommonToken(type=FoxySheepParser.DIGITS) digits_token._text = str(n) number = FoxySheepParser.NumberContext( None, FoxySheepParser.ExprContext(None, parent=parent)) number_literal = FoxySheepParser.NumberBaseTenContext( None, FoxySheepParser.NumberLiteralContext(None, parent=number)) number_literal.addTokenNode(digits_token) addChild(number, number_literal) return number
def nextToken(self): # consume pending token if it's there. if self.pending_token is not None: t = self.pending_token self.pending_token = None else: t = super(YAMLLexerWrapper, self).nextToken() if t.type == YAMLLexer.S_INDENT: num_spaces = len(t.text) if num_spaces == self.last_num_spaces: # skip to next token t = self.nextToken() elif num_spaces < self.last_num_spaces: self.pending_token = t t = CommonToken(type=YAMLParser.DEDENT) elif num_spaces > self.last_num_spaces: t = CommonToken(type=YAMLParser.INDENT) else: assert False self.last_num_spaces = num_spaces elif t.type == Token.EOF: if self.last_token.type == YAMLLexer.B_BREAK: if self.last_num_spaces > 0: self.pending_token = t t = CommonToken(type=YAMLParser.DEDENT) self.last_token = t return t
def createToken(self, type_, text="", length=0): start = self._tokenStartCharIndex stop = start + length t = CommonToken(self._tokenFactorySourcePair, type_, self.DEFAULT_TOKEN_CHANNEL, start, stop) t.text = text return t
def addToken(self, prefix: str, s: str, token_type: TT): token = CommonToken(type=token_type.value) token.text = prefix + s token.column = self.pos token.line = self.lineNo self.pos += len(s) self.builder.append(token)
def create(self, source, type, text, channel, start, stop, line, column): t = CommonToken(source, type, channel, start, stop) t.line = line t.column = column if text is not None: t.text = text elif self.copyText and source[1] is not None: t.text = source[1].getText(start,stop) return t
def emit_type(self, token_type, channel=Token.DEFAULT_CHANNEL, text=""): char_index = self.getCharIndex() token = CommonToken(self._tokenFactorySourcePair, token_type, channel, char_index - len(text), char_index) token.line = self.line token.column = self.column token.text = text self.emitToken(token) return token
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): t = CommonToken(source=(recognizer, recognizer._input), type=Token.INVALID_TYPE, channel=Token.DEFAULT_CHANNEL, start=recognizer._tokenStartCharIndex, stop=recognizer._tokenStartCharIndex) t.line = recognizer._tokenStartLine t.column = recognizer._tokenStartColumn recognizer._type = Token.MIN_USER_TOKEN_TYPE recognizer.emitToken(t)
def commonToken( self, _type, text ): stop = self.getCharIndex() - 1 if len(text) == 0: start = stop else: start = stop - len(text) + 1 return CommonToken( self._tokenFactorySourcePair, _type, self.DEFAULT_TOKEN_CHANNEL, start, stop )
def common_token(self, _type, text): from antlr4.Token import CommonToken stop = self.getCharIndex() - 1 if len(self.text) == 0: start = stop else: start = stop - len(self.text) + 1 return CommonToken(self._tokenFactorySourcePair, _type, Lexer.DEFAULT_TOKEN_CHANNEL, start, stop)
def commonToken(self, type: int, text: str): stop: int = self.getCharIndex() - 1 start: int = 0 if text: start = stop - len(text) + 1 ct = CommonToken(self._tokenFactorySourcePair, type, self.DEFAULT_TOKEN_CHANNEL, start, stop) return ct
def exitPlusOp(self, ctx: FoxySheepParser.PlusOpContext): """PlusOp[expr1,expr2] We have to treat PlusOp special, because we have to keep the operators intact, and only plus and minus (not PlusMinus or MinusPlus) are flat. The situation is complicated by the fact that Mathematica parses "a-b" as "Plus[a, Times[-1, b]]". We Rewrite the parse tree, inserting the Times context and changing BINARYMINUS to BINARYPLUS.""" # If the op isn't Plus or Minus, nothing to do. if ctx.BINARYMINUS() is None and ctx.BINARYPLUS() is None: return # Since ANTLR4 parses this operator as left associative, we only # need to check the left hand side expr. rhs = ctx.getChild(2) # If the operator of the PlusOp is BINARYMINUS, we rewrite the tree as # "Plus[lhs, Times[-1, rhs]]". Note that if rhs is TIMES, we have to # keep that TIMES flat. if ctx.BINARYMINUS() is not None: # Construct Times, or obtain it from the rhs. times = None if isinstance(rhs, FoxySheepParser.TimesContext): times = rhs else: # If rhs is already a times, keep it flat. times = FoxySheepParser.TimesContext( None, FoxySheepParser.ExprContext(None)) ctx.children.remove(rhs) adopt(ctx, times) adopt(times, rhs) # Add "-1" as the first child of Times. addChild(times, makeNumber(times, -1), 0) # Finally, we have to change operator to BINARYPLUS. plustoken = CommonToken(type=FoxySheepParser.BINARYPLUS) plustoken.text = '+' plus = TerminalNodeImpl(plustoken) # Replace minus token with plus. ctx.children[1] = plus plus.parentCtx = ctx # Flatten flatten(ctx)
def __emit_token_type_on_channel(self, token_type: int, channel: int, text: str) -> None: char_index: int = self.getCharIndex() token: CommonToken = CommonToken(self._tokenFactorySourcePair, token_type, channel, char_index - len(text), char_index - 1) token.line = self.line token.column = self.column token.text = text self.emitToken(token)
def validate_common_token(py_tok:CommonToken, cpp_tok:CommonToken): assert type(py_tok) == type(cpp_tok) if py_tok is None: return assert py_tok.type == cpp_tok.type assert py_tok.channel == cpp_tok.channel assert py_tok.start == cpp_tok.start assert py_tok.stop == cpp_tok.stop assert py_tok.tokenIndex == cpp_tok.tokenIndex assert py_tok.line == cpp_tok.line assert py_tok.column == cpp_tok.column assert py_tok.text == cpp_tok.text assert isinstance(cpp_tok.getInputStream(), InputStream)
def main(argv): sai = StaticAnalysisInfo() input = FileStream(argv[1]) lexer = CPP14Lexer(input) stream = CommonTokenStream(lexer) parser = CPP14Parser(stream) tree = parser.translationunit() tracker = Tracker1(sai, stream) walker = ParseTreeWalker() walker.walk(tracker, tree) print(stream.getText()) stream.tokens[0].text = "test" tmp_tok = stream.getTokens(0,3) space = CommonToken() space.text = ' ' tmp_tok.append(space) stream.tokens[2:2] = tmp_tok #help(stream.tokens[0]) #tmp_tok = stream.tokens[2].clone() #stream.tokens.insert(2, tmp_tok) #ttt = CommonToken() #ttt.text = '???' #stream.tokens.insert(2, ttt) print(stream.getText()) print("---------") #lll = stream.getTokens(0,5) #print(lll) #print(stream.tokens[0].getTokenSource().text) #print(tracker.rewrite.getTokenStream().getText()) '''
def insertLeft(self, type, text): token = CommonToken() token.type = type.value token.text = text token.line = self._dllTokens.value.line token.tokenIndex = self._dllTokens.value.tokenIndex inserted = self._dllAll.insert(token, self._dllTokens) return TokenEditor(inserted, self._dllAll)
def create(self, source, type, text, channel, start, stop, line, column): t = CommonToken(source, type, channel, start, stop) t.line = line t.column = column if text is not None: t.text = text elif self.copyText and source[1] is not None: t.text = source[1].getText(start, stop) return t
def nextToken(self): token = super(PhpBaseLexer, self).nextToken() if token.type == self.PHPEnd or token.type == self.PHPEndSingleLineComment: if self._mode == self.SingleLineCommentMode: # SingleLineCommentMode for such allowed syntax: # // <?php echo "Hello world"; // comment ?> self.popMode() self.popMode() if token.text == "</script>": self._phpScript = False token.type = self.ScriptClose else: # Add semicolon to the end of statement if it is absent. # For example: <?php echo "Hello world" ?> if self._prevTokenType == self.SemiColon or \ self._prevTokenType == self.Colon or \ self._prevTokenType == self.OpenCurlyBracket or \ self._prevTokenType == self.CloseCurlyBracket: token = super(PhpBaseLexer, self).nextToken() else: token = CommonToken(type=self.SemiColon) token.text = ';' elif token.type == self.HtmlName: self._htmlNameText = token.text elif token.type == self.HtmlDoubleQuoteString: if token.text == "php" and self._htmlNameText == "language": self._phpScript = True elif self._mode == self.HereDoc: # Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc if token.type == self.StartHereDoc or token.type == self.StartNowDoc: self._heredocIdentifier = token.text[3:].strip().replace( "'", "") if token.type == self.HereDocText: if self.CheckHeredocEnd(token.text): self.popMode() heredoc_identifier = self.GetHeredocEnd(token.text) if token.text.strip().endswith(';'): text = heredoc_identifier + ";\n" token = CommonToken(type=self.SemiColon) token.text = text else: token = super(PhpBaseLexer, self).nextToken() token.text = heredoc_identifier + "\n;" elif self._mode == self.PHP: if self._channel == self.HIDDEN: self._prevTokenType = token.type return token
def insertRight(self, type, text): token = CommonToken() token.type = type.value token.text = text token.line = self._dllTokens.value.line token.tokenIndex = self._dllTokens.value.tokenIndex next = self._dllTokens.next if next: inserted = self._dllAll.insert(token, next) else: inserted = self._dllAll.append(token) return TokenEditor(inserted, self._dllAll)
def nextToken(self) -> Token: if not self.queue.empty(): return self.queue.get(block=False) else: next_: Token = super(TurkishLexer, self).nextToken() if next_.type != 16: return next_ else: next2: Token = super(TurkishLexer, self).nextToken() if next2.type == 19 and next2.text == ".": abbrev = next_.text + "." if self.abbreviations and abbrev in self.abbreviations: common_token: CommonToken = CommonToken(type=1) common_token.text = abbrev common_token.start = next_.start common_token.stop = next2.stop common_token.tokenIndex = next_.tokenIndex common_token.column = next_.column common_token.line = next_.line return common_token self.queue.put_nowait(next2) return next_
def get_injected_token(self, copy_from: CommonToken, token_type_str): new_token = copy_from.clone() new_token.text = token_type_str return new_token
def commonToken(self, type, text, indent=0): stop = self.getCharIndex() - 1 - indent start = (stop - len(text) + 1) if text else stop return CommonToken(self._tokenFactorySourcePair, type, super().DEFAULT_TOKEN_CHANNEL, start, stop)
def commonToken(self, mytype, text): stop = self.getCharIndex() - 1 start = stop if len(text) == 0 else (stop - len(text) + 1) return CommonToken(self._tokenFactorySourcePair, mytype, super().DEFAULT_TOKEN_CHANNEL, start, stop)
def createThin(self, type, text): t = CommonToken(type=type) t.text = text return t
def createThin(self, type: int, text: str): t = CommonToken(type=type) t.text = text return t
def deriveToken(self, token: CommonToken, type: int): res = token.clone() res.type = type return res
def createThin(self, type:int, text:str): t = CommonToken(type=type) t.text = text return t
def commonToken(self, _type, text): stop = self.getCharIndex() - 1 start = stop if text == "" else stop - len(text) + 1 return CommonToken(self._tokenFactorySourcePair, _type, start=start, stop=stop)