Example #1
0
def findMain(code):
    """
    Look for the existence of if __name__ == '__main__'
    Documentation: https://docs.python.org/2/tutorial/modules.html in 6.1.1
    """
    found = False
    pos = 0
    lexer = PythonLexer()

    tokens_1 = pygments.lex(code, lexer)
    tokens_2 = pygments.lex(code, lexer)
    
    sequence_1 = [(Token.Keyword, '^if$'),
                (Token.Name, '^__name__$'),
                (Token.Operator, '^==$'),
                (Token.Literal.String.Double, '^__main__$'),
                (Token.Punctuation, '^:$')]

    sequence_2 = [(Token.Keyword, '^if$'),
                (Token.Name, '^__name__$'),
                (Token.Operator, '^==$'),
                (Token.Literal.String.Single, '^__main__$'),
                (Token.Punctuation, '^:$')]

    mainIdiom = PythonIdiom('ifNameMain')

    lineNum = _findSeqInTokens(sequence_1, tokens_1)
    if lineNum < 0:
        lineNum = _findSeqInTokens(sequence_2, tokens_2)
    if lineNum > 0:
        mainIdiom.addNew(lineNum)
    log("If name main found in lines: " + str(mainIdiom.getLines()))
    return mainIdiom
Example #2
0
def test_bare_class_handler():
    from pygments.formatters import HtmlFormatter
    from pygments.lexers import PythonLexer
    try:
        lex('test\n', PythonLexer)
    except TypeError as e:
        assert 'lex() argument must be a lexer instance' in str(e)
    else:
        assert False, 'nothing raised'
    try:
        format([], HtmlFormatter)
    except TypeError as e:
        assert 'format() argument must be a formatter instance' in str(e)
    else:
        assert False, 'nothing raised'
Example #3
0
    def BuildTags(self, buff, lexer):
        """
        @param buff: code buffer
        @param lexer: xml lexer
        @return: taglib.DocStruct instance for the given buff
        """
        rtags = taglib.DocStruct()
        rtags.SetElementDescription(self.TAG_ID, '/')
        line_count = 0
        current_line = []
        code_lines = []

        # Parse the file into tokens and values
        for ttype, value in lex(buff.read(), lexer):
            if '\n' in value:
                if len(current_line) > 0:
                    code_lines.append((line_count, current_line))
                current_line = []
                line_count += value.count('\n')
                continue
            if ttype == Token.Name.Tag and len(value) > 1:
                current_line.append((ttype, value))
        docroot = self.Parse(code_lines)
        if docroot != None:
            rtags.AddElement(self.TAG_ID, docroot)
        return rtags
Example #4
0
def highlightMultiSource(codeLexerTuples, multiSourceFormatter, outfile=None):
    """
    main function to create formatted output based on tuples of code and
    related metadata (lexing information and title to display)
    """
    if not isinstance(codeLexerTuples, tuple):
        raise TypeError(
            "first highlight() argument must be a tupel of \
codeLexerTuple"
        )
    if not isinstance(multiSourceFormatter, Formatter):
        raise TypeError(
            "second highlight() argument must be a \
MultiSourceFormatter"
        )

    tokensList = []
    for codeLexerTuple in codeLexerTuples:
        tokensList.append(lex(codeLexerTuple.code, codeLexerTuple.lexer))
        multiSourceFormatter.titles.append(codeLexerTuple.title)
    if not outfile:
        # print formatter, 'using', formatter.encoding
        realoutfile = multiSourceFormatter.encoding and BytesIO() or StringIO()
        multiSourceFormatter.format(tokensList, realoutfile)
        return realoutfile.getvalue()
    else:
        multiSourceFormatter.format(tokensList, outfile)
def style_ansi(raw_code, lang=None):
    """ actual code hilite """
    lexer = 0
    if lang:
        try:
            lexer = get_lexer_by_name(lang)
        except ValueError:
            print col(R, 'Lexer for %s not found' % lang)
    lexer = None
    if not lexer:
        try:
            if guess_lexer:
                lexer = pyg_guess_lexer(raw_code)
        except:
            pass
    if not lexer:
        lexer = get_lexer_by_name(def_lexer)
    tokens = lex(raw_code, lexer)
    cod = []
    for t, v in tokens:
        if not v:
            continue
        _col = code_hl_tokens.get(t)
        if _col:
            cod.append(col(v, _col))
        else:
            cod.append(v)
    return ''.join(cod)
Example #6
0
def findBadUseImport(code):
    """
    Find when use from foo import *
    Documentation: http://python.net/~goodger/projects/pycon/2007/idiomatic/handout.html#importing
                   https://docs.python.org/2/howto/doanddont.html#from-module-import
    """
    sequence = [(Token.Keyword.Namespace, '^from$'),
                (Token.Name.Namespace, '.*'),
                (Token.Keyword.Namespace, '^import$'),
                (Token.Operator, '\*')]
    lexer = PythonLexer()
    lexer.add_filter('tokenmerge')
    tokens = pygments.lex(code, lexer)
    badUseImport = PythonIdiom('badImport')

    lineNumber = 1
    while True:
        lineAux = _findSeqInTokens(sequence, tokens)
        if lineAux < 0:
            break
        lineNumber += lineAux -1
        badUseImport.addNew(lineNumber)
    log("badUseImport found in lines {0}".format(badUseImport.getLines()))

    return badUseImport
Example #7
0
    def __init__(self, disassembly, lexer=lexer, msg=None):

        self.lines = []
        if isinstance(disassembly, list):
            self.lines = disassembly
        elif disassembly:
            line = []
            if msg:
                current_function = msg.rsplit(None, 1)[-1][:-1]
            else:
                current_function = None
            with currentfunctiontfilter.current_function(current_function):
                for ttype, value in pygments.lex(disassembly, lexer):
                    if '\n' in value:
                        self.lines.append(DisassemblyLine(line))
                        line = []
                    else:
                        line.append((ttype, value))

        self.linenos = {}
        for i, line in enumerate(self.lines):
            self.linenos[line.address] = line, i

        self.lexer = lexer
        self.msg = msg
Example #8
0
    def lex(self, code, lex):
        """Return tokenified code.

        Return a list of tuples (scope, word) where word is the word to be
        printed and scope the scope name representing the context.

        :param str code: Code to tokenify.
        :param lex: Lexer to use.
        :return:
        """
        if lex is None:
            if not type(code) is str:
                # if not suitable lexer is found, return decoded code
                code = code.decode("utf-8")
            return (("global", code),)

        words = pygments.lex(code, lex)

        scopes = []
        for word in words:
            token = word[0]
            scope = "global"

            if token in self.token_map.keys():
                scope = self.token_map[token]

            scopes.append((scope, word[1]))
        return scopes
Example #9
0
def main():
    arguments = docopt( 
        __doc__.format( program=docstring_format_dict ), 
        version=        '{docstring_format_dict["human_format"]} 2.0', 
        options_first=  True
        )

    lexer = BibtexLexer()
    lexer.add_filter( RaiseOnErrorTokenFilter() )
    #lexer.add_filter( TokenMergeFilter() )
    lexer.add_filter( KeywordCaseFilter(case='lower') )
    
    for f in arguments['<file>']:
        # get bibtex source
        code = None
        with open(f, 'r') as f:
            code = ''.join( f.readlines() )

        # NOW LEX SEE CODE!
        for idx, item in enumerate(pygments.lex(code, lexer)):
            tokentype, tokenvalue = item[0], item[1]
            
            # if tokentype in frozenset([Token.Text.Whitespace, Token.Punctuation]):
            #     continue
            print(  "{0:>5}\t{1[0]!s:<25}\t{1[1]!r}".format(idx, item),
                    file=sys.stdout )
Example #10
0
    def _lexContents(self):
        # We add a space in front because otherwise the lexer will discard
        # everything up to the first token, meaning that we lose the potentially
        # empty first lines and mess up the matching. With the space, we force
        # the lexer to process the initial \n. and we just skip the space token
        tokens = list(pygments.lex(" "+self._document.documentText(), pygments.lexers.PythonLexer()))
        self._document.beginTransaction()
        current_line_num = 1
        meta = []

        # Skip the space token
        for token in tokens[1:]:
            ttype, string = token

            meta.extend([ttype]*len(string))

            if string.endswith('\n'):
                self._document.deleteCharMeta( (current_line_num,1),
                                                self._document.lineLength(current_line_num),
                                                CharMeta.LexerToken)
                self._document.updateCharMeta((current_line_num,1), {CharMeta.LexerToken: meta})
                current_line_num += 1
                meta = []

        self._document.endTransaction()
Example #11
0
    def filename(self, value):
        "Set the file being displayed by the view"
        if self._filename != value:
            self.code.delete('1.0', END)
            with open(value) as code:
                all_content = code.read()
                if self.lexer:
                    lexer = self.lexer
                else:
                    lexer = guess_lexer_for_filename(value, all_content, stripnl=False)
                for token, content in lex(all_content, lexer):
                    self.code.insert(END, content, str(token))

            # Now update the text for the linenumbers
            end_index = self.code.index(END)
            line_count = int(end_index.split('.')[0])
            lineNumbers = '\n'.join('%5d' % i for i in range(1, line_count))
            self.lines.config(state=NORMAL)
            self.lines.delete('1.0', END)
            self.lines.insert('1.0', lineNumbers)
            self.lines.config(state=DISABLED)

            # Store the new filename, and clear any current line
            self._filename = value
            self._line = None
Example #12
0
 def SyntexHighlight(self, event=None):
     from tkinter.font import Font
     for tag in self.tag_names():
         self.tag_delete(tag)
     self.mark_set("range_start", "1.0")
     data = self._get_value()
     self.tag_configure("Token.Comment", foreground="#F00")
     bolder = Font(family=self.app.cnf['font'][0])
     bolder.config(size=self.app.cnf['font'][1]-2)
     bolder.config(weight="bold")
     for token, content in lex(data, PythonLexer()):
         self.mark_set("range_end", "range_start + %dc" % len(content))
         self.tag_add(str(token), "range_start", "range_end")
         self.mark_set("range_start", "range_end")
     self.tag_config("Token.Comment.Single", foreground="#F00")
     self.tag_config("Token.Literal.String.Doc", foreground="#F00")
     for tag in self.tag_names():
         if 'Token.Keyword' == tag:
             self.tag_config(tag, foreground="#008", font=bolder)
         elif 'Token.Keyword.Namespace' == tag:
             self.tag_config(tag, foreground="#00F", font=bolder)
         elif 'Token.Name.Class' in tag:
             self.tag_config(tag, foreground="#F30", background='#AFA')
         elif 'Token.Name.Function' in tag:
             self.tag_config(tag, foreground="#A3A", background='#FFA')
         elif 'Token.Literal' in tag:
             self.tag_config(tag, foreground="#6A0")
         elif 'Token.Operator' in tag:
             self.tag_config(tag, foreground="#A3A")
     print(self.tag_names())
Example #13
0
        def _generator():
            lexer = self.create_lexer()
            raw = pygments.lex(code_region.text, lexer)

            row, col = 0, 0
            begin_column = False
            for type, value in raw:
                tok = Tok(type, value, row, col, begin_column)
                if '\n' in value:
                    # Pygments doesn't necessarily split spaces and newlines into
                    # separate tokens, so we do it ourselves, ensuring there is always
                    # a token at column 0 of every row
                    spaces = value.split('\n')
                    begin_column = False
                    if spaces[0]:
                        yield Tok(type, spaces[0], row, col)
                        col += len(spaces[0])
                    for sp in spaces[1:]:
                        yield Tok(Token.Text, '\n', row, col)
                        row += 1
                        col = 0
                        if not sp:
                            continue
                        yield Tok(Token.Text, sp, row, col)
                        begin_column = True
                        col += len(sp)
                    continue
                if tok.is_whitespace():
                    if len(value) > 1:
                        begin_column = True
                else:
                    begin_column = False
                col += len(value)
                yield tok
def check(*expected):
    text = ''.join(i[1] for i in expected)
    md_lexer = MarkdownLexer()
    md_lexer.add_filter('raiseonerror')
    md_lexer.add_filter('tokenmerge')
    result = list(pygments.lex(text, md_lexer))
    assert result == list(expected)
Example #15
0
    def __init__(self, pdf, code, lexer):
        self.pdf = pdf
        fname, fstyle, fsize = self.pdf.theme["code-font"]

        self.pdf.set_font(fname, fstyle, fsize)
        style = pygments.styles.get_style_by_name("emacs")
        style = dict(style)
        for token, text in pygments.lex(code["code"], lexer):
            token_style = style[token]

            if token_style["color"]:
                r, g, b = map(ord, token_style["color"].decode("hex"))
            else:
                r, g, b = (0, 0, 0)
            self.pdf.set_text_color(r, g, b)

            if token_style["bold"] and token_style["italic"]:
                self.pdf.set_font(fname, "BI", fsize)
            elif token_style["bold"]:
                self.pdf.set_font(fname, "B", fsize)
            elif token_style["italic"]:
                self.pdf.set_font(fname, "I", fsize)
            else:
                self.pdf.set_font(fname, "", fsize)

            height = pdf.theme["code-height"]
            self.pdf.write(height, text)
Example #16
0
def findMagicMethods(code):
    """
    Search magic methods in the code and returns a list of how many have
    been found, what kind of dificult it has and wich where
    Documentation: http://www.rafekettler.com/magicmethods.html
                   Python Pocket Reference page 88
    """
    lexer = PythonLexer()
    tokens = pygments.lex(code, lexer)
    lineNumber = 1
    methodsFound = []
    methodsIdiom1 = PythonIdiom('idiomMethods1')
    methodsIdiom2 = PythonIdiom('idiomMethods2')
    methodsIdiom3 = PythonIdiom('idiomMethods3')

    for ttype, word in tokens:
        lineNumber += _getNewLines((ttype, word))
        if ttype is Token.Name.Function:
            if word in magicMethods_1:
                methodsIdiom1.addNew(lineNumber, otherInfo={'method': word})
                methodsFound.append(word)
            elif word in magicMethods_2:
                methodsIdiom2.addNew(lineNumber, otherInfo={'method': word})
                methodsFound.append(word)
            elif word in magicMethods_3:
                methodsIdiom3.addNew(lineNumber, otherInfo={'method': word})
                methodsFound.append(word)

    log("MagicMethods: %s" % str(methodsFound))
    return [methodsIdiom1, methodsIdiom2, methodsIdiom3]
Example #17
0
def basicStructure(code):
    sequence = []
    lexer = PythonLexer()
    lexer.add_filter('tokenmerge')
    tokens = pygments.lex(code, lexer)
    for token in tokens:
        print token
Example #18
0
def checkBadLoopCollect(code):
    """
    Look for bad loop like 'for i in range(len(list))'
    Documentation: https://youtu.be/OSGv2VnC0go?t=4m47s
    """
    sequence = [(Token.Keyword, '^for$'),
                (Token.Name, '^\w+$'),
                (Token.Operator.Word, '^in$'),
                (Token.Name.Builtin, '^range$|^xrange$'),
                (Token.Punctuation, '^\($'),
                (Token.Name.Builtin, '^len$'),
                (Token.Punctuation, '^\($'),
                (Token.Name, '^\w+$')]
    lexer = PythonLexer()
    lexer.add_filter('tokenmerge')
    tokens = pygments.lex(code, lexer)
    badLoopCollectIdiom = PythonIdiom('badLoop')

    lineNumber = 1
    while True:
        lineAux = _findSeqInTokens(sequence, tokens)
        if lineAux < 0:
            break
        lineNumber += lineAux -1
        badLoopCollectIdiom.addNew(lineNumber)
    log("badLoopCollect found in lines {0}".format(badLoopCollectIdiom.getLines()))

    return badLoopCollectIdiom
Example #19
0
 def getCodeStyleChunks(self, node):
     assert node.tag == 'code'
     lang = node.attrib.get('lang','python')
     #@TODO: error handling if lang is bad
     lexer = pygments.lexers.get_lexer_by_name(lang)
     for tok, text in pygments.lex(e2txt(node), lexer):
         yield [[tok]], text
Example #20
0
def checkNotRange(code):
    """
    Check if there is: for xx in [0,1,2] instead of for xxx in (x)range
    Documentation: https://youtu.be/OSGv2VnC0go?t=3m4s
    """
    sequence = [(Token.Keyword, '^for$'),
                (Token.Name, '^\w+$'),
                (Token.Operator.Word, '^in$'),
                (Token.Punctuation, '^\[$'),
                (Token.Literal.Number.Integer, '^\d$')]

    lexer = PythonLexer()
    lexer.add_filter('tokenmerge')
    tokens = pygments.lex(code, lexer)
    notRangeIdiom = PythonIdiom('notRange')

    lineNumber = 1
    while True:
        lineAux = _findSeqInTokens(sequence, tokens)
        if lineAux < 0:
            break
        lineNumber += lineAux -1
        notRangeIdiom.addNew(lineNumber)
    log("badForIn found in lines {0}".format(notRangeIdiom.getLines()))
    return notRangeIdiom
Example #21
0
def findDocstring(code):
    """Find the use of documentation in the functions, classes or script
    Documentation: https://www.python.org/dev/peps/pep-0257/
    """
    lexer = PythonLexer()
    lexer.add_filter('tokenmerge')

    classDefToken = (Token.Keyword, '^class$')
    functDefToken = (Token.Keyword, '^def$')
    tokens = pygments.lex(code, lexer)

    docIdiom = PythonIdiom('docstring')
    docstringFound = defaultdict(int)
    typeDoc = 'module'
    lineNumber = 1


    for ttype, word in tokens:
        if _sameToken((ttype, word), classDefToken):
            typeDoc = 'class'
        elif _sameToken((ttype, word), functDefToken):
            typeDoc = 'function'
        elif ttype == Token.Literal.String.Doc:
            docstringFound[typeDoc] += 1
            docIdiom.addNew(lineNumber)
        lineNumber += _getNewLines((ttype, word))

    for typeDoc in docstringFound:
        log("type %s: %d found" % (typeDoc, docstringFound[typeDoc]))
    log('DocString found in lines: ' + str(docIdiom.getLines()))
    return docIdiom
Example #22
0
def lex(code_lines, lexername):
    try:
        from pygments.lexers import get_lexer_by_name
        from pygments import lex
    except ImportError:
        print('For lexer support please install extras: pip install sourcemap-tool[lexer]', file=stderr)
        exit(1)

    # TODO: join lexemes with trailing space, remove comment lexemes
    lexer = get_lexer_by_name(lexername)
    tokens = lex(''.join(code_lines), lexer)
    result = []
    line = []
    for _, text in tokens:
        parts = text.split('\n')
        if len(parts) > 1: # multiline token
            first = True
            for part in parts:
                if not first:
                    result.append(line)
                    line = []
                first = False
                if len(part) > 0:
                    line.append(len(part))
        else:
            if len(text) > 0:
                line.append(len(text))
    if line:
        result.append(line)
    return result
Example #23
0
def scan_source(fp, args):
    # print("scanning: %r" % fp)

    global filepath

    filepath = fp
    filepath_base = os.path.basename(filepath)

    #print(highlight(code, CLexer(), RawTokenFormatter()).decode('utf-8'))
    code = open(filepath, 'r', encoding="utf-8").read()

    tokens[:] = []
    line = 1

    for ttype, text in lex(code, CLexer()):
        tokens.append(TokStore(ttype, text, line))
        line += text.count("\n")

    col = 0  # track line length
    index_line_start = 0

    for i, tok in enumerate(tokens):
        #print(tok.type, tok.text)
        if tok.type == Token.Keyword:
            if tok.text in {"switch", "while", "if", "for"}:
                item_range = extract_statement_if(i)
                if item_range is not None:
                    blender_check_kw_if(item_range[0], i, item_range[1])
            elif tok.text == "else":
                blender_check_kw_else(i)
        elif tok.type == Token.Punctuation:
            if tok.text == ",":
                blender_check_comma(i)
        elif tok.type == Token.Operator:
            # we check these in pairs, only want first
            if tokens[i - 1].type != Token.Operator:
                op, index_kw_end = extract_operator(i)
                blender_check_operator(i, index_kw_end, op)
        elif tok.type in Token.Comment:
            doxyfn = None
            if "\\file" in tok.text:
                doxyfn = tok.text.split("\\file", 1)[1].strip().split()[0]
            elif "@file" in tok.text:
                doxyfn = tok.text.split("@file", 1)[1].strip().split()[0]

            if doxyfn is not None:
                doxyfn_base = os.path.basename(doxyfn)
                if doxyfn_base != filepath_base:
                    warning("doxygen filename mismatch %s != %s" % (doxyfn_base, filepath_base), i, i)

        # ensure line length
        if (not args.no_length_check) and tok.type == Token.Text and tok.text == "\n":
            # check line len
            blender_check_linelength(index_line_start, i - 1, col)

            col = 0
            index_line_start = i + 1
        else:
            col += len(tok.text.expandtabs(TAB_SIZE))
Example #24
0
def parse(s, l):
    ret_list = []
    start = 0
    for token in lex(s, l):
        color = determine_color(token[0])
        ret_list.append((start, token[1], color))
        start += len(token[1])
    print ret_list
Example #25
0
 def lex(self):
     # Get lexer for language (use text as fallback)
     try:
         lexer = get_lexer_by_name(self.language)
     except ValueError:
         # info: "no pygments lexer for %s, using 'text'"%self.language
         lexer = get_lexer_by_name('text')
     return pygments.lex(self.code, lexer)
Example #26
0
def findUpdateVariables1Line(code):
    """
    Look for lines of code like this: 'x, y = 0, 1' or 'x, y = y, x+y'
    """
    lexer = PythonLexer()
    tokens = pygments.lex(code, lexer)

    linesFound = []
    assignIdiom = PythonIdiom('assignOneLine')
    # Tokens variables
    nameToken = (Token.Name, '^\w+$')
    equalToken = (Token.Operator, '^\=$')
    newLineToken = (Token.Text, '\n')
    commaToken = (Token.Punctuation, '^,$')

    # To advoid mistakes, I count the variables before/after the equal
    numVarPrevEqual = 0
    numVarPostEqual = 0
    numCommas = 0
    beforeEqual = True
    actualLine = ''
    ignoreLine = False
    lineNumber = 1

    for ttype, word in tokens:
        if not _ignoreStr(word):
            actualLine += word.encode('utf-8')
        lineNumber += _getNewLines((ttype, word))
        if _sameToken((ttype, word), newLineToken):
            beforeEqual = True
            if numVarPrevEqual == numVarPostEqual and numVarPrevEqual > 1:
                if not ignoreLine:
                    linesFound.append(actualLine)
                    assignIdiom.addNew(lineNumber-1) # -1 because waits until the line finish
            actualLine= ''
            numVarPrevEqual, numVarPostEqual, numCommas, ignoreLine = 0, 0, 0, False
            continue

        if ignoreLine:
            continue

        if _sameToken((ttype, word), equalToken):
            if not beforeEqual:
                ignoreLine = True
            beforeEqual = False
            numCommas = 0
        elif _sameToken((ttype, word), commaToken):
            numCommas += 1

        if beforeEqual:
            if _sameToken((ttype, word), nameToken) and (numCommas == numVarPrevEqual):
                numVarPrevEqual += 1
        else:
            if re.match('\w+', word.encode('utf-8')) and (numCommas == numVarPostEqual):
                numVarPostEqual += 1
    log("Update in 1 line. Found: " + str(linesFound))
    log("Update in 1 line found in lines " + str(assignIdiom.getLines()))
    return assignIdiom
Example #27
0
 def show_it(string, tf, width=80):
     tf.reset(width)
     print('=' * 30)
     for t in lex(string, rst_lex):
         print(t)
         pass
     print('-' * 30)
     print(highlight(string, rst_lex, tf))
     return
Example #28
0
 def test_3(self):
     # Note that this will add a newline to the lexed output, since the
     # `ensurenl <http://pygments.org/docs/lexers/>`_ option is True by
     # default.
     lexer = get_lexer_by_name('python')
     token_iter = lex('', lexer)
     # Capture both group and string for help in debugging.
     token_group = list(_group_lexer_tokens(token_iter, True, False))
     assert token_group == [(_GROUP.whitespace, '\n')]
Example #29
0
    def test_1(self):
        test_py_code = '# A comment\nan_identifier\n'
        test_token_list = [(Token.Comment.Single, '# A comment'),
                           (Token.Text, '\n'),
                           (Token.Name, 'an_identifier'),
                           (Token.Text, '\n')]

        lexer = get_lexer_by_name('python')
        token_list = list( lex(test_py_code, lexer) )
        assert token_list == test_token_list
    def _lex(self, lexer):

        """ Lex the document. """

        current_location = 0
        for token, text in pygments.lex(str(self.raw_text_document), lexer):
            stop_position = current_location + len(text)
            flat_slice = FlatSlice(current_location, stop_position)
            self.append(HighlightedTextFragment(flat_slice, token))
            current_location = stop_position
Example #31
0
 def highlight(self, block):
     """Method called on each block to highlight it content"""
     tokens = pygments.lex(block, self.python_lexer)
     if self.format_rst:
         from pygments.token import Token
         toks = []
         for token in tokens:
             if token[0] == Token.String.Doc and len(token[1]) > 6:
                 toks += pygments.lex(token[1][:3], self.python_lexer)
                 # parse doc string content by rst lexer
                 toks += pygments.lex(token[1][3:-3], self.rst_lexer)
                 toks += pygments.lex(token[1][-3:], self.python_lexer)
             elif token[0] == Token.Comment.Single:
                 toks.append((Token.Comment.Single, token[1][0]))
                 # parse comment content by rst lexer
                 # remove the extra newline added by rst lexer
                 toks += list(pygments.lex(token[1][1:], self.rst_lexer))[:-1]
             else:
                 toks.append(token)
         tokens = toks
     return pygments.format(tokens, self.formatter)
Example #32
0
def capture_comment(content, lexer, start):
    # look backward to capture the entire comment in case we are the middle of a multiline comment
    comment_start = comment_end = start
    for line in reversed(content[:start]):
        ttypes = [t for t, _ in pygments.lex(line, lexer)]
        # if a line has no keyword, name or operator
        # and has a comment token we assume it is a part of the initial comment
        if is_a_comment_line_java(ttypes):
            comment_start -= 1
        else:
            break

    # look forward to capture the entire comment in case we are the middle of a multiline comment
    for line in content[start:]:
        ttypes = [t for t, _ in pygments.lex(line, lexer)]
        if is_a_comment_line_java(ttypes):
            comment_end += 1
        else:
            break
    comment = content[comment_start:comment_end]
    return comment, comment_end
Example #33
0
def extract_code(start_lineno, file_name):
    with open(file_name.as_posix(), mode='r', encoding='iso-8859-1') as f:
        content = f.readlines()
    lexer = build_lexer('java')

    # content array is 0 index so need to shift down by 1
    start_lineno = max(0, start_lineno - 1)
    comment, comment_end = capture_comment(content, lexer, start_lineno)

    to_extract_content = content[comment_end:]
    code_end = 1
    heuristic = None
    block_count = 0
    for i, line in enumerate(to_extract_content):
        tokens = list(pygments.lex(line, lexer))
        should_stop, reason = Heuristic.should_stop_java(tokens)
        if should_stop and block_count == 0:
            heuristic = reason
            code_end = i
            break

    if heuristic == Heuristic.CLOSE_PAREN:
        code_end = min(code_end, len(to_extract_content))
        code = capture_code(code_end, lexer, to_extract_content)
        comment = strip_special_chars(comment)
    elif heuristic == Heuristic.NEXT_COMMENT:
        code_end = min(code_end, len(to_extract_content))
        code = capture_code(code_end, lexer, to_extract_content)
        comment = strip_special_chars(comment)
    else:
        code_end = min(code_end + 5, len(to_extract_content))
        code = capture_code(code_end + 1, lexer, to_extract_content)
        comment = strip_special_chars(comment)

    # if "Close the server and confirm it saw what we expected." in comment:
    #     set_trace()
    # skipping comment and code are on the same line case
    # if not comment:
    #     if len(content) - 1 < start_lineno:
    #         print("Length of content is less than start_line {}".format(
    #             file_name.as_posix()))
    #         return None, None, None
    #     ttypes = [t for t, _ in pygments.lex(content[start_lineno], lexer)]
    #     if is_a_code_line(ttypes) and contains_a_comment(ttypes):
    #         line = content[start_lineno].split("//")
    #         if len(line) != 2:
    #             return None, None, None

    #         code, comment = line[:-1], line[-1]
    #         code = [w.strip() for w in code]
    #         comment = comment.strip().replace("\n", "\\n")

    return clean_comment(comment), clean_code(code), heuristic
Example #34
0
    def initial_highlight(self, *args):

        content = self.text.get("1.0", tk.END)
        self.text.mark_set("range_start", "1.0")
        data = self.text.get("1.0", tk.END)
        for token, content in lex(data, self.lexer):
            self.text.mark_set("range_end", "range_start + %dc" % len(content))
            self.text.tag_add(str(token), "range_start", "range_end")
            self.text.mark_set("range_start", "range_end")
            
        self.previousContent = self.text.get("1.0", tk.END)
        self.syntax_theme_configuration()
Example #35
0
 def _render_highlighted_block(self, content, language):
     code = indent(content, " " * 2)
     lexer = get_lexer(language or "")
     if lexer:
         formatted_text = PygmentsTokens(
             pygments.lex(code=code, lexer=lexer))
     else:
         formatted_text = to_formatted_text(
             code,
             style="",
         )
     return formatted_text
Example #36
0
def run_seq(seq):
    tokens = list(pygments.lex(seq, TypeScriptLexer()))
    ws, tokens = prep(tokens)
    # Set up tensors
    inputs = np.zeros(len(ws))
    outputs = np.zeros(len(ws))
    for i in range(len(ws)):
        inputs[i] = source_dict[
            ws[i]] if ws[i] in source_dict else source_dict["_UNKNOWN_"]
    N = len(inputs)
    if N > 4 * minibatch_size:
        return None
    inputs = scipy.sparse.csr_matrix(
        (np.ones(N, np.float32), (range(N), inputs)), shape=(N, vocab_size))
    outputs = scipy.sparse.csr_matrix(
        (np.ones(N, np.float32), (range(N), outputs)), shape=(N, num_labels))
    sIn = C.io.MinibatchSourceFromData(
        dict(xx=([inputs], C.layers.typing.Sequence[C.layers.typing.tensor]),
             yy=([outputs], C.layers.typing.Sequence[C.layers.typing.tensor])))
    mb = sIn.next_minibatch(N)
    data = {x: mb[sIn.streams['xx']], y: mb[sIn.streams['yy']]}

    enhance_data(data, enc)
    pred = dec.eval({x: data[x], t: data[t]})[0]

    with open(outp, 'w', encoding="utf-8") as f:
        ix = 0
        sep = chr(31)
        for tt, v, in tokens:
            f.write("%s%s%s" % (v.replace("\t", "\\t").replace(
                "\n", "\\n").replace("\r", "\\r"), sep, str(tt)[6:]))
            print(v, end='')
            if v.strip() == '' or tt in Comment:
                f.write('\n')
                continue
            pr = pred[ix]
            ix += 1
            if v.strip() in keywords or not bool(regex.match(v.strip())):
                f.write('\n')
                continue
            r = [
                i[0] for i in sorted(
                    enumerate(pr), key=lambda x: x[1], reverse=True)
            ]
            guess = target_wl[r[0]]
            gs = [target_wl[r[ix]] for ix in range(5)]
            gs = [g[1:len(g) - 1] if g[0] == "$" else g for g in gs]
            if target_wl[r[0]] != "O":
                print(" : %s" % guess[1:len(guess) - 1], end='')
            for i in range(len(gs)):
                f.write("%s%s%s%.4f" % (sep, gs[i], sep, pr[r[i]]))
            f.write('\n')
    print()
Example #37
0
 def __init__(self, file_path, var_table=None):
     self.program_text = open(file_path, "r").read()
     self.tokens = list(lex(self.program_text, HexRaysCLexer()))
     # Maps a placeholder id to a dict of variable names
     self.var_table = dict()
     if var_table:
         with open(var_table, newline="") as tablefile:
             reader = csv.DictReader(tablefile,
                                     delimiter=",",
                                     quotechar="|")
             for row in reader:
                 self.var_table[row.pop("var_id")] = row
Example #38
0
 def _highlight(self, start_pos, text):
     """テキストをハイライトする."""
     self.text.mark_set('range_start', start_pos)
     for token, content in lex(text, PythonLexer()):
         self.text.mark_set(
             'range_end', 'range_start+{0}c'.format(len(content))
         )
         self.text.tag_add(str(token), 'range_start', 'range_end')
         self.text.mark_set('range_start', 'range_end')
         # import名、関数名、クラス名は補完リストに使うので保存しておく
         if str(token) in ('Token.Name.Namespace', 'Token.Name.Class', 'Token.Name.Function'):
             self.var_name_list.add(content)
Example #39
0
def test_bare_class_handler():
    from pygments.formatters import HtmlFormatter
    from pygments.lexers import PythonLexer
    try:
        lex('test\n', PythonLexer)
    except TypeError as e:
        assert 'lex() argument must be a lexer instance' in str(e)
    else:
        assert False, 'nothing raised'
    try:
        format([], HtmlFormatter)
    except TypeError as e:
        assert 'format() argument must be a formatter instance' in str(e)
    else:
        assert False, 'nothing raised'

    # These cases should not trigger this heuristic.
    class BuggyLexer(RegexLexer):
        def get_tokens(self, text, extra_argument):
            pass

        tokens = {'root': []}

    try:
        list(lex('dummy', BuggyLexer()))
    except TypeError as e:
        assert 'lex() argument must be a lexer instance' not in str(e)
    else:
        assert False, 'no error raised by buggy lexer?'

    class BuggyFormatter(Formatter):
        def format(self, tokensource, outfile, extra_argument):
            pass

    try:
        format([], BuggyFormatter())
    except TypeError as e:
        assert 'format() argument must be a formatter instance' not in str(e)
    else:
        assert False, 'no error raised by buggy formatter?'
def get_tokenization(lexedWoComments, lexer):
    tokenized_string = ''
    token_types = []
    curr_line_empty = True
    for t in lexedWoComments:
        token_type = str(t[0])
        token = t[1]
        token_stripped = token.strip()

        # Pygments will sometimes lex many tokens as one
        # This can occur with preprocessor directives and definitions in C
        # In this case, we need to lex that whole line
        num_tokens = len(token.split())
        if num_tokens > 1:
            # Need to manually lex each space seperated token on occassions
            # when pygments doesn't lex properly
            line_split = token.split()
            line_lexed = []
            for temp_token in line_split:
                token_lexed = list(lex(temp_token, lexer))
                for lexed in token_lexed:
                    if lexed[1] != "\n":
                        line_lexed.append(lexed)
            line_lexed.append((Token.Text, '\n'))
            line_code, line_types = get_tokenization(line_lexed, lexer)
            tokenized_string += line_code
            token_types += line_types
            curr_line_empty = True
            continue

        if '\n' in token:
            if curr_line_empty:
                if (t[0] != Token.Text or t[0] != Token.Comment.Preproc
                    ) and token_stripped != '':
                    tokenized_string += token_stripped + "\n"
                    token_types.append(token_type)
            else:
                tokenized_string += token_stripped + "\n"

                # Edge case for stray "\" in code
                if token_stripped == "\\":
                    token_types.append(token_type)
            curr_line_empty = True
        elif t[0] != Token.Text and len(token_stripped) > 0:
            curr_line_empty = False
            tokenized_string += token + ' '
            token_types.append(token_type)

    assert len(
        tokenized_string.split()) == len(token_types), "{0} != {1}".format(
            len(tokenized_string.split()), len(token_types))
    return tokenized_string, token_types
Example #41
0
def _parse_led_config(file, matrix_cols, matrix_rows):
    """Return any 'raw' led/rgb matrix config
    """
    matrix_raw = []
    position_raw = []
    flags = []

    found_led_config = False
    bracket_count = 0
    section = 0
    for _type, value in lex(_preprocess_c_file(file), CLexer()):
        # Assume g_led_config..stuff..;
        if value == 'g_led_config':
            found_led_config = True
        elif value == ';':
            found_led_config = False
        elif found_led_config:
            # Assume bracket count hints to section of config we are within
            if value == '{':
                bracket_count += 1
                if bracket_count == 2:
                    section += 1
            elif value == '}':
                bracket_count -= 1
            else:
                # Assume any non whitespace value here is important enough to stash
                if _type in [
                        Token.Literal.Number.Integer,
                        Token.Literal.Number.Float, Token.Literal.Number.Hex,
                        Token.Name
                ]:
                    if section == 1 and bracket_count == 3:
                        matrix_raw.append(_coerce_led_token(_type, value))
                    if section == 2 and bracket_count == 3:
                        position_raw.append(_coerce_led_token(_type, value))
                    if section == 3 and bracket_count == 2:
                        flags.append(_coerce_led_token(_type, value))

    # Slightly better intrim format
    matrix = list(_get_chunks(matrix_raw, matrix_cols))
    position = list(_get_chunks(position_raw, 2))
    matrix_indexes = list(filter(lambda x: x is not None, matrix_raw))

    # If we have not found anything - bail with no error
    if not section:
        return None

    # Throw any validation errors
    _validate_led_config(matrix, matrix_rows, matrix_indexes, position,
                         position_raw, flags)

    return (matrix, position, flags)
def preprocessFile(path, basePath, retainLine):
    """
    Perform preprocessing on the lexer.
    Parameters:
    -----------
    path
    basePath
    retainLine - do we keep the original line numbers or not
    Returns:
    -----------
    (curProject - The current project or corpora we are in
     curFile - The corresponding original file path
     lexedWoComments - the Pygments token list with preprocessing
     OR (Not yet implemented) Something for English?,
     language - the language of this lexer
     fileErrorCount - count of observed error tokens from Pygments)
    """
    if (True):  #TODO is a programming language.
        components = path.split(".")
        fileContents = ""
        fileContents = ''.join(open(path, 'r').readlines())

        lexer = get_lexer_for_filename(path)
        tokens = lex(fileContents, lexer)  # returns a generator of tuples
        tokensList = list(tokens)
        language = languageForLexer(lexer)
        (curProject, curFile) = getProjectAndFilename(path, basePath)

        #Debug: what does the original token set look like
        #print(tokensList)
        #quit()

        if (retainLine):
            lexedWoComments = reduceToNewLine(tokensList, Token.Comment)
            lexedWoComments = reduceToNewLine(lexedWoComments,
                                              Token.Literal.String.Doc)
        else:
            # Strip comments and alter strings
            lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment)
            lexedWoComments = tokensExceptTokenType(lexedWoComments,
                                                    Token.Literal.String.Doc)
        beforeError = len(lexedWoComments)
        #Remove Things than didn't lex properly
        lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Error)
        fileErrorCount = beforeError - len(lexedWoComments)

        #Alter the pygments lexer types to be more comparable between our
        #languages
        lexedWoComments = fixTypes(lexedWoComments, language)
        lexedWoComments = convertNamespaceTokens(lexedWoComments, language)

    return (curProject, curFile, lexedWoComments, language, fileErrorCount)
Example #43
0
def format_line(fname, lineno, line, color=True, lexer=None, formatter=None):
    """Formats a trace line suitable for printing."""
    fname = min(fname, replace_home(fname), os.path.relpath(fname), key=len)
    if not color:
        return COLORLESS_LINE.format(fname=fname, lineno=lineno, line=line)
    cline = COLOR_LINE.format(fname=fname, lineno=lineno)
    if not HAVE_PYGMENTS:
        return cline + line
    # OK, so we have pygments
    tokens = pyghooks.partial_color_tokenize(cline)
    lexer = lexer or pyghooks.XonshLexer()
    tokens += pygments.lex(line, lexer=lexer)
    return tokens
Example #44
0
 def lex(self):
     # Get lexer for language (use text as fallback)
     try:
         if self.language and str(self.language).lower() != 'none':
             lexer = get_lexer_by_name(self.language.lower(),
                                       **self.custom_args)
         else:
             lexer = get_lexer_by_name('text', **self.custom_args)
     except ValueError:
         log.info("no pygments lexer for %s, using 'text'" % self.language)
         # what happens if pygment isn't present ?
         lexer = get_lexer_by_name('text')
     return pygments.lex(self.code, lexer)
Example #45
0
 def lex(self):
     """Get lexer for language (use text as fallback)"""
     try:
         if self.language and unicode(self.language).lower() <> 'none':
             lexer = get_lexer_by_name(self.language.lower(),
                                     **self.custom_args
                                     )
         else:
             lexer = get_lexer_by_name('text', **self.custom_args)
     except ValueError:
         # what happens if pygment isn't present ?
         lexer = get_lexer_by_name('text')
     return pygments.lex(self.code, lexer)
Example #46
0
 def print_result(self, data=None):
     data = data or self.data
     if isinstance(data, dict):
         data = self._process_dict(data)
     elif isinstance(data, (list, tuple)):
         data = [
             v.decode('utf-8') if isinstance(v, bytes) else v for v in data
         ]
     elif isinstance(data, bytes):
         data = data.decode('utf-8')
     tokens = list(
         pygments.lex(json.dumps(data, indent=4), lexer=JsonLexer()))
     print_formatted_text(PygmentsTokens(tokens))
Example #47
0
 def parse(self, start='1.0'):
     data = self.get(start, 'end')
     while data and '\n' == data[0]:
         start = self.index('%s+1c' % start)
         data = data[1:]
     self.mark_set('range_start', start)
     for t in self._syntax_highlighting_tags:
         self.tag_remove(t, start, "range_start +%ic" % len(data))
     for token, content in lex(data, Python3Lexer()):
         self.mark_set("range_end", "range_start + %ic" % len(content))
         for t in token.split():
             self.tag_add(str(t), "range_start", "range_end")
         self.mark_set("range_start", "range_end")
Example #48
0
def display_bibs(labels, bibs):
    r"""
  Display a list of bib entries on screen with flying colors.

  Parameters
  ----------
  labels: List of Strings
     Header labels to show above each Bib() entry.
  bibs: List of Bib() objects
     BibTeX entries to display.

  Examples
  --------
  >>> import bibmanager.bib_manager as bm
  >>> e1 = '''@Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }'''
  >>> e2 = '''@Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }'''
  >>> bibs = [bm.Bib(e1), bm.Bib(e2)]
  >>> bm.display_bibs(["DATABASE:\n", "NEW:\n"], bibs)
  ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  DATABASE:
  @Misc{JonesEtal2001scipy,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {{SciPy}: Open source scientific tools for {Python}},
         year   = {2001},
       }

  NEW:
  @Misc{Jones2001,
         author = {Eric Jones and Travis Oliphant and Pearu Peterson},
         title  = {SciPy: Open source scientific tools for Python},
         year   = {2001},
       }
  """
    style = prompt_toolkit.styles.style_from_pygments_cls(
        pygments.styles.get_style_by_name(cm.get('style')))
    if labels is None:
        labels = ["" for _ in bibs]
    tokens = [(Token.Comment, u.BANNER)]
    for label, bib in zip(labels, bibs):
        tokens += [(Token.Text, label)]
        tokens += list(pygments.lex(bib.content, lexer=BibTeXLexer()))
        tokens += [(Token.Text, "\n")]
    print_formatted_text(PygmentsTokens(tokens), end="", style=style)
Example #49
0
 def __iter__(self):
     """Parse self.code and yield "classified" tokens.
     """
     if self.lexer is None:
         yield ([], self.code)
         return
     tokens = pygments.lex(self.code, self.lexer)
     for tokentype, value in self.merge(tokens):
         if self.tokennames == 'long':  # long CSS class args
             classes = str(tokentype).lower().split('.')
         else:  # short CSS class args
             classes = [_get_ttype_class(tokentype)]
         classes = [cls for cls in classes if cls not in unstyled_tokens]
         yield (classes, value)
Example #50
0
def print_packets(path: list, nodes: dict) -> None:
    tokens = []
    for e in path[:-1]:
        node = nodes[e.dst]
        p = node.render()
        line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))
        tokens.extend(list(pygments.lex(line, lexer=Python3Lexer())))

    # p = self.fuzz_node.render()
    node = nodes[path[-1].dst]
    p = node.render()
    line = '{} = {}'.format(node.name.replace('-', '_'), repr(p))

    print(pygments.highlight(line, Python3Lexer(), Terminal256Formatter(style='rrt')))
Example #51
0
def _findOneToken(tokenToFind, code):
    """
    Find a token in the code and returns a list of lines where it was found
    """
    lexer = PythonLexer()
    tokens = pygments.lex(code, lexer)
    lineNumber = 1
    whereFound = []

    for token in tokens:
        lineNumber += _getNewLines(token)
        if _sameToken(token, tokenToFind):
            whereFound.append(lineNumber)
    return whereFound
Example #52
0
 def get_tokens(file: str, lang: str) -> Counter:
     """
     Gather a Counter object of tokens in the file and their count.
     :param file: the path to the file.
     :param lang: the language of file.
     :return: a Counter object of items: token and count.
     """
     content = PygmentsParser.read_file(file)
     tokens = []
     for pair in pygments.lex(content, PygmentsParser.LEXERS[lang]):
         if any(pair[0] in sublist
                for sublist in PygmentsParser.TYPES[lang]):
             tokens.extend(list(Subtokenizer.process_token(pair[1])))
     return Counter(tokens)
Example #53
0
def convert_text(text: str, extension: str) -> List[ParsedToken]:
    extension = extension or 'java'
    if extension:
        try:
            lexer = get_lexer_by_name(extension)
        except ClassNotFound as err:
            logger.warning(err)
            lexer = guess_lexer(text)
    else:
        lexer = guess_lexer(text)
    for token, value in lex(text, lexer):
        model_tokens = _convert(token, value)
        for mr in model_tokens:
            yield mr
Example #54
0
    def parse_string(self, s):
        """
        Parse string using lexer, if none exists
        return string with default text color
        """
        start = 0
        ret_list = []
        if self.lexer is None:
            return ([(0, s, options['text_color'])])

        for token in lex(s, self.lexer):
            color = self.determine_color(token[0])
            ret_list.append((start, token[1], color))
            start += len(token[1])
        return ret_list
 def highlight2(self, event=None):
     """Highlight the syntax of the current line"""
     text_widget = self.get_current()
     row = text_widget.index('insert').split('.')[0]
     self.remove_tags2(1)
     content = text_widget.get("1.0", 'end')
     # lines = content.split("\n")
     text_widget.mark_set("range_start", "1" + ".0")
     data = text_widget.get("1.0", "end")
     for token, content in lex(data, Python3Lexer()):
         text_widget.mark_set("range_end",
                              "range_start + %dc" % len(content))
         text_widget.tag_add(str(token), "range_start", "range_end")
         text_widget.mark_set("range_start", "range_end")
     self.tag_conf()
Example #56
0
def code_token(code_diff):
    # print(code_diff)
    # print('---------------------------------------')
    code_diff = code_prepare(code_diff)
    # print(code_diff)
    # print('==========================================')
    # print(lexers.guess_lexer(code_diff))
    lexer = lexers.get_lexer_by_name("java", stripall=True)
    tokens = list(pygments.lex(code_diff, lexer))
    # tokens = list(javalang.tokenizer.tokenize(code_diff))
    tokens_list = []
    for token in tokens:
        if str(token[0]) != 'Token.Text' and str(token[0]) != 'Token.Punctuation':
            tokens_list.append(token[1].lower())
    return tokens_list
Example #57
0
def findDecorators(code):
    """
    Look for decorators @
    Documentation: Python Pocket Reference page 67
    """
    decorators = PythonIdiom('decorator')
    lexer = PythonLexer()
    tokens = pygments.lex(code, lexer)
    lineNumber = 1
    for ttype, word in tokens:
        lineNumber += _getNewLines((ttype, word))
        if ttype is Token.Name.Decorator:
            decorators.addNew(lineNumber)
    log ("Decorators found in lines: " + str(decorators.getLines()))
    return decorators
Example #58
0
 def python_lexer(self):
     for tag in self.text_area.tag_names():
         self.text_area.tag_delete(tag)
     self._set_text_tags()
     data = self.text_area.get("1.0", "end-1c")
     self.text_area.mark_set("range_start", "1.0")
     print("------------------")
     for token, content in lex(data, PythonLexer()):
         master_token = ".".join(str(token).split(".")[0:2])
         self.text_area.mark_set("range_end",
                                 "range_start + %dc" % len(content))
         self.text_area.tag_add(str(master_token), "range_start",
                                "range_end")
         print(token, len(content), content.encode())
         self.text_area.mark_set("range_start", "range_end")
Example #59
0
    def default_highlight(self):
        row = float(self.text.index(tk.INSERT))
        row = str(math.trunc(row))
        content = self.text.get("1.0", tk.END)
        lines = content.split("\n")

        if (self.previousContent != content):
            self.text.mark_set("range_start", row + ".0")
            data = self.text.get(row + ".0", row + "." + str(len(lines[int(row) - 1])))

            for token, content in lex(data, self.lexer):
                self.text.mark_set("range_end", "range_start + %dc" % len(content))
                self.text.tag_add(str(token), "range_start", "range_end")
                self.text.mark_set("range_start", "range_end")

        self.previousContent = self.text.get("1.0", tk.END)
Example #60
0
    def run(self):
        """
        Lexes the data to see what lexers can tokenize it.
        Any successful lexers are considered possible matches.
        """
        bad_tokens = (Token.Text, Token.Name, Token.Name.Other)
        tokens = [
            tok for tok, text in lex(self.data_string, self.lexer)
            if tok not in bad_tokens and text != ''
        ]
        token_count = len(tokens)

        # Errors mean we definitely didn't find the right language
        if Token.Error in tokens or token_count == 0:
            self.result = False
        else:
            self.result = token_count