def java_tokenize(line): #stream = InputStream(line.decode('utf-8', 'ignore')) stream = InputStream(line) lexer = JavaLexer(stream) tokens = CommonTokenStream(lexer) tokens.fetch(100000) return tokens.tokens
def java_tokenize(line): # stream = InputStream(line.decode('utf-8', 'ignore')) stream = InputStream(line) lexer = JavaLexer(stream) tokens = CommonTokenStream(lexer) tokens.fetch(100000) # tokenlist=set([token.text for token in tokens.tokens]) token_list = list([token.text for token in tokens.tokens]) return token_list # tokens.tokens