Ejemplo n.º 1
0
    def get_tokens_unprocessed(self, text):
        handling_preproc = False
        preproc_index = 0
        preproc_text = ''

        seen_name = False
        buffered_name_token = None
        buffered_spacing = []
        for index, token, value in CLexer.get_tokens_unprocessed(self, text):

            if token == Token.Comment.Preproc:
                if value == '#':
                    handling_preproc = True
                    preproc_index = index + 1
                    preproc_text = ''
                    yield index, token, value

                else:
                    preproc_text += value

            else:
                if handling_preproc:
                    handling_preproc = False
                    first_space = preproc_text.find(' ')
                    yield preproc_index, Token.Comment.Preproc, preproc_text[:
                                                                             first_space]
                    rest = preproc_text[first_space:]
                    rest_index = preproc_index + first_space
                    sublexer = CLexer()
                    for i, t, v in sublexer.get_tokens_unprocessed(rest):
                        yield i + first_space, t, v

                if token == Token.Name or token == Token.Keyword.Type:
                    if not seen_name:
                        seen_name = True
                        buffered_name_token = (index, token, value)
                    else:
                        seen_name = False
                        _index, _type, _value = buffered_name_token
                        buffered_name_token = None
                        yield _index, Token.Name.Other, _value
                        for i in buffered_spacing:
                            yield i
                        buffered_spacing = []
                        yield index, Token.Name.Variable, value

                else:
                    if seen_name:
                        if value == ' ' or value == '*':
                            buffered_spacing.append((index, token, value))
                            continue
                        seen_name = False
                        yield buffered_name_token
                        buffered_name_token = None
                        for i in buffered_spacing:
                            yield i
                        buffered_spacing = []

                    yield index, token, value
Ejemplo n.º 2
0
    def get_tokens_unprocessed(self, text):
        handling_preproc = False
        preproc_index = 0
        preproc_text = ''

        seen_name = False
        buffered_name_token = None
        buffered_spacing = []
        for index, token, value in CLexer.get_tokens_unprocessed(self, text):

            if token == Token.Comment.Preproc:
                if value == '#':
                    handling_preproc = True
                    preproc_index = index+1
                    preproc_text = ''
                    yield index, token, value

                else:
                    preproc_text += value

            else:
                if handling_preproc:
                    handling_preproc = False
                    first_space = preproc_text.find(' ')
                    yield preproc_index, Token.Comment.Preproc, preproc_text[:first_space]
                    rest = preproc_text[first_space:]
                    rest_index = preproc_index + first_space
                    sublexer = CLexer()
                    for i,t,v in sublexer.get_tokens_unprocessed(rest):
                        yield i+first_space,t,v

                if token == Token.Name or token == Token.Keyword.Type:
                    if not seen_name:
                        seen_name = True
                        buffered_name_token = (index, token, value)
                    else:
                        seen_name = False
                        _index, _type, _value = buffered_name_token
                        buffered_name_token = None
                        yield _index, Token.Name.Other, _value
                        for i in buffered_spacing:
                            yield i
                        buffered_spacing = []
                        yield index, Token.Name.Variable, value

                else:
                    if seen_name:
                        if value == ' ' or value == '*':
                            buffered_spacing.append((index,token,value))
                            continue
                        seen_name = False
                        yield buffered_name_token
                        buffered_name_token = None
                        for i in buffered_spacing:
                            yield i
                        buffered_spacing = []

                    yield index, token, value
Ejemplo n.º 3
0
    def display(self, stype, mode='raw', oformat='term'):
        """
        Display output for a single match

        :param stype: name of the matched type
        :type stype: str
        :param mode: display mode
        :type mode: str
        :param oformat: format of output for color (term, html)
        :type oformat: str
        :return: a human readable string containing the result of the search
                 (matched line, context, file name, etc.)
        """
        f = open(self.file, 'r')
        lines = f.readlines()
        pmatch = lines[self.line - 1][self.column:self.columnend]
        ptype = "*"  # match is a pointer to struct
        if (CocciMatch.ptype_regexp.search(lines[self.line -
                                                 1][self.columnend:])):
            ptype = ""
        output = ""
        if mode == 'color':
            output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % (
                self.file, self.line, self.line - self.start_at, self.line,
                self.stop_at - self.line, stype, ptype, pmatch)
        for i in range(self.start_at - 1, min(self.stop_at, len(lines))):
            if mode == 'color':
                output += lines[i]
            elif mode == 'vim':
                output += "%s|%s| (%s %s%s): %s" % (self.file, i + 1, stype,
                                                    ptype, pmatch, lines[i])
            elif mode == 'emacs':
                output += "%s:%s: (%s %s%s): %s" % (self.file, i + 1, stype,
                                                    ptype, pmatch, lines[i])
            elif i == self.line - 1:
                output += "%s:%s (%s %s%s): %s" % (self.file, i + 1, stype,
                                                   ptype, pmatch, lines[i])
            else:
                output += "%s-%s %s - %s" % (self.file, i + 1, ' ' *
                                             (2 + len(stype + ptype + pmatch)),
                                             lines[i])
        f.close()
        if mode == 'color':
            if have_pygments:
                lexer = CLexer()
                lfilter = NameHighlightFilter(names=[pmatch])
                lexer.add_filter(lfilter)
                if oformat == "term":
                    return highlight(output, lexer, Terminal256Formatter())
                elif oformat == "html":
                    return highlight(output, lexer,
                                     HtmlFormatter(noclasses=True))
                else:
                    return output
        return output + self.trailer
Ejemplo n.º 4
0
    def color_line(self, line):
        """
        """
        lexer = CLexer()
        tokens = list(lexer.get_tokens(line))
        new_line = ""
        for t in tokens:
            ttype = t[0]
            ttext = str(t[1])
            if ttype == Token.Text:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_INSN)

            elif ttype == Token.Text.Whitespace:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_INSN)

            elif ttype == Token.Error:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_ERROR)

            elif ttype == Token.Other:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_DSTR)

            elif ttype == Token.Keyword:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_KEYWORD)

            elif ttype == Token.Name:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_LIBNAME)

            elif ttype == Token.Literal:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_LOCNAME)

            elif ttype == Token.Literal.String:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_STRING)

            elif ttype == Token.Literal.Number:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_DNUM)

            elif ttype == Token.Operator:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_ALTOP)

            elif ttype == Token.Punctuation:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_SYMBOL)

            elif ttype == Token.Comment:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_REGCMT)

            elif ttype == Token.Comment.Single:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_REGCMT)

            elif ttype == Token.Generic:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_CREFTAIL)

            else:
                new_line += idaapi.COLSTR(ttext, idaapi.SCOLOR_CREFTAIL)
        return new_line
Ejemplo n.º 5
0
    def display(self, stype, mode='raw', oformat='term'):
        """
        Display output for a single match

        :param stype: name of the matched type
        :type stype: str
        :param mode: display mode
        :type mode: str
        :param oformat: format of output for color (term, html)
        :type oformat: str
        :return: a human readable string containing the result of the search
                 (matched line, context, file name, etc.)
        """
        f = open(self.file, 'r')
        lines = f.readlines()
        pmatch = lines[self.line - 1][self.column:self.columnend]
        ptype = "*" # match is a pointer to struct
        if (CocciMatch.ptype_regexp.search(lines[self.line - 1][self.columnend:])):
            ptype = ""
        output = ""
        if mode == 'color':
            output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % (self.file,
                 self.line, self.line - self.start_at, self.line,
                 self.stop_at - self.line, stype, ptype, pmatch)
        for i in range(self.start_at - 1, min(self.stop_at, len(lines))):
            if mode == 'color':
                output += lines[i]
            elif mode == 'vim':
                output += "%s|%s| (%s %s%s): %s" % (self.file, i + 1,
                stype, ptype, pmatch, lines[i])
            elif mode == 'emacs':
                output += "%s:%s: (%s %s%s): %s" % (self.file, i + 1,
                stype, ptype, pmatch, lines[i])
            elif i == self.line - 1:
                output += "%s:%s (%s %s%s): %s" % (self.file, i + 1,
                stype, ptype, pmatch, lines[i])
            else:
                output += "%s-%s %s - %s" % (self.file, i + 1,
                ' ' * (2 + len(stype + ptype + pmatch)), lines[i])
        f.close()
        if mode == 'color':
            if have_pygments:
                lexer = CLexer()
                lfilter = NameHighlightFilter(names=[pmatch])
                lexer.add_filter(lfilter)
                if oformat == "term":
                    return highlight(output, lexer, Terminal256Formatter())
                elif oformat == "html":
                    return highlight(output, lexer,
                        HtmlFormatter(noclasses=True))
                else:
                    return output
        return output + self.trailer
Ejemplo n.º 6
0
    def __str__(self):
        out = ""

        if self._header is not None:
            for line in self._header.split("\n"):
                out += "\n" + " "*18 + "| " + highlight(line, CLexer(), Terminal256Formatter(style='monokai')).strip()

        out += "\n"
        out += "\n".join(str(self[item]) for item in self)

        if self._footer is not None:
            for line in self._footer.split("\n"):
                out += "\n" + " "*18 + "| " + highlight(line, CLexer(), Terminal256Formatter(style='monokai')).strip()
        return out.rstrip()
Ejemplo n.º 7
0
    def __str__(self):

        s = ""

        # Are we on the first line of this decompiled output?
        first = True

        if self.src is not None:
            src = self.src.decode('latin-1')

            for line in src.split("\n"):

                if first:
                    # Can't do adjustments since we don't know the name
                    if self._file_name is None:
                        saddr = "{:18s}".format(hex(self.address))
                    else:
                        # Adjust offset to make sense with our current binary
                        saddr = "{:18s}".format(
                            hex(self._process.memory[
                                self._file_name.decode('latin-1') + ":" +
                                hex(self.address)].address))

                    if self.highlight is not None:
                        s += getattr(
                            colorama.Back, self.highlight
                        ) + saddr + colorama.Style.RESET_ALL + "| "
                    else:
                        s += saddr + "| "

                    s += highlight(
                        line, CLexer(),
                        Terminal256Formatter(style='monokai')).strip() + "\n"
                    first = False

                else:
                    saddr = " " * 18

                    if self.highlight is not None:
                        s += getattr(
                            colorama.Back, self.highlight
                        ) + saddr + colorama.Style.RESET_ALL + "| "
                    else:
                        s += saddr + "| "

                    s += highlight(
                        line, CLexer(),
                        Terminal256Formatter(style='monokai')).strip() + "\n"

        return s.strip()
Ejemplo n.º 8
0
    def display(self, stype, mode='raw', oformat='term', before=0, after=0):
        """
        Display output for a single match

        :param mode: display mode
        :type mode: str
        :param oformat: format of output for color (term, html)
        :type oformat: str
        :param before: number of lines to display before match
        :type before: int
        :param after: number of lines to display after match
        :type after: int
        """
        f = open(self.file, 'r')
        lines = f.readlines()
        pmatch = lines[self.line - 1][self.column:self.columnend]
        ptype = "*" # match is a pointer to struct
        if (CocciMatch.ptype_regexp.search(lines[self.line - 1][self.columnend:])):
            ptype = ""
        output = ""
        if mode == 'color':
            output += "%s: l.%s -%d, l.%s +%d, %s %s%s\n" % (self.file,
                 self.line, before, self.line, after, stype, ptype, pmatch)
        for i in range(int(self.line) - 1 - before, int(self.line) + after):
            if mode == 'color':
                output += lines[i]
            elif mode == 'vim':
                output += "%s|%s| (%s %s%s): %s" % (self.file, self.line,
                stype, ptype, pmatch, lines[i])
            elif mode == 'emacs':
                output += "%s:%s: (%s %s%s): %s" % (self.file, self.line,
                stype, ptype, pmatch, lines[i])
            else:
                output += "%s:%s (%s %s%s): %s" % (self.file, self.line,
                stype, ptype, pmatch, lines[i])
        f.close()
        if mode == 'color':
            if have_pygments:
                lexer = CLexer()
                lfilter = NameHighlightFilter(names=[pmatch])
                lexer.add_filter(lfilter)
                if oformat == "term":
                    return highlight(output, lexer, Terminal256Formatter())
                elif oformat == "html":
                    return highlight(output, lexer,
                        HtmlFormatter(noclasses=True))
                else:
                    return output
        return output
Ejemplo n.º 9
0
    def build(self):
        root = BoxLayout(orientation="vertical", padding=5)

        butn = GridLayout(cols=3, size_hint=[1, .07])

        self.nameF = TextInput(text="main.c",
                               size_hint=[1, .1],
                               background_color=[1, 1, 1, .5])
        root.add_widget(self.nameF)

        buttonA = Button(text='Add File', on_press=self.add)
        butn.add_widget(buttonA)

        buttonC = Button(text='Compile File', on_press=self.compile)
        butn.add_widget(buttonC)

        buttonS = Button(text='Save File', on_press=self.save)
        butn.add_widget(buttonS)

        root.add_widget(butn)

        self.code = CodeInput(text="", lexer=CLexer())
        root.add_widget(self.code)

        self.check = TextInput(text="",
                               size_hint=[1, .3],
                               background_color=[1, 1, 1, .5])
        root.add_widget(self.check)

        return root
Ejemplo n.º 10
0
 def render_output(self, code):
     lexer = CLexer()
     style = NativeStyle()
     style.background_color = BG_COLOR
     formatter = HtmlFormatter(full=True, style='native', noclasses=True)
     colored_code = highlight(code, lexer, formatter)
     show_html_report('{}.c'.format(self._function.name), colored_code)
Ejemplo n.º 11
0
def show_output(bv, func_name, decompiled):
    lexer = CLexer()
    style = NativeStyle()
    style.background_color = '#272811'
    formatter = HtmlFormatter(full=True, style='native', noclasses=True)
    colored_code = highlight(decompiled, lexer, formatter)
    bv.show_html_report('Decompiled ' + func_name, colored_code)
Ejemplo n.º 12
0
    def add_comment(self):
        """
        Add a commment to the selected line
        """
        print("GhIDA:: [DEBUG] add_comment called")
        colored_line = self.GetCurrentLine(notags=1)
        if not colored_line:
            idaapi.warning("Select a line")
            return False

        # Use pygments to parse the line to check if there are comments
        line = idaapi.tag_remove(colored_line)
        lexer = CLexer()
        tokens = list(lexer.get_tokens(line))
        text = ""
        text_comment = ""
        for t in tokens:
            ttype = t[0]
            ttext = str(t[1])
            if ttype == Token.Comment.Single:
                text_comment = ttext.replace('//', '').strip()
            else:
                text += ttext

        # Get the new comment
        comment = gl.display_comment_form(text_comment)
        if not comment or len(comment) == 0:
            return False
        comment = comment.replace("//", "").replace("\n", " ")
        comment = comment.strip()

        # Create the new text
        full_comment = "    // %s" % comment
        text = text.rstrip()
        new_text = text + full_comment
        text_colored = self.color_line(new_text)

        num_line = self.GetLineNo()
        self.EditLine(num_line, text_colored)
        self.RefreshCurrent()

        # Add comment to cache
        COMMENTS_CACHE.add_comment_to_cache(self.__ea, num_line, full_comment)

        print("GhIDA:: [DEBUG] Added comment to #line: %d (%s)" %
              (num_line, new_text))
        return
Ejemplo n.º 13
0
def main(srcFile, jsonFile):
    src = readFrom(srcFile)
    srcHtml = highlight(src, CLexer(stripall=False), HtmlFormatter())
    srcHtml = addLineNumbers(src, srcHtml)
    srcJson = readFrom(jsonFile)
    tplt = Template(readFrom(tgtTplt))
    tgt = tplt.substitute(srcHtml=srcHtml, srcJson=srcJson)
    writeTo(tgtFile, tgt)
    def testC(self):
        """ Does the CompletionLexer work for C/C++?
        """
        lexer = CompletionLexer(CLexer())
        self.assertEquals(lexer.get_context("foo.bar"), [ "foo", "bar" ])
        self.assertEquals(lexer.get_context("foo->bar"), [ "foo", "bar" ])

        lexer = CompletionLexer(CppLexer())
        self.assertEquals(lexer.get_context("Foo::Bar"), [ "Foo", "Bar" ])
Ejemplo n.º 15
0
 def write_html(self, fd):
     source = open(self.path, 'r')
     code = source.read()
     lexer = CLexer()
     formatter = FunctionHtmlFormatter(self.lines,
                                       full=True,
                                       linenos='inline')
     fd.write(highlight(code, lexer, formatter))
     source.close()
Ejemplo n.º 16
0
def get_highlighted_cl_code(text):
    try:
        from pygments import highlight
    except ImportError:
        return text
    else:
        from pygments.lexers import CLexer
        from pygments.formatters import TerminalFormatter

        return highlight(text, CLexer(), TerminalFormatter())
Ejemplo n.º 17
0
def get_highlighted_code(text, python=False):
    try:
        from pygments import highlight
    except ImportError:
        return text
    else:
        from pygments.lexers import CLexer, PythonLexer
        from pygments.formatters import TerminalFormatter

        return highlight(text, CLexer() if not python else PythonLexer(),
                         TerminalFormatter())
Ejemplo n.º 18
0
class CLexerTest(unittest.TestCase):
    def setUp(self):
        self.lexer = CLexer()

    def testNumbers(self):
        code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23'
        wanted = []
        for item in zip([
                Number.Integer, Number.Float, Number.Float, Number.Float,
                Number.Oct, Number.Hex, Number.Float, Number.Float
        ], code.split()):
            wanted.append(item)
            wanted.append((Text, ' '))
        wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')]
        self.assertEqual(list(self.lexer.get_tokens(code)), wanted)
Ejemplo n.º 19
0
def code(listing):
    # {{ ''' foo() \n bar() ''' | code }}
    # format this with external
    proc = Popen(["clang-format"], stdout=PIPE, stdin=PIPE)
    result = proc.communicate(input=bytearray(listing, 'utf-8'))

    if proc.returncode != 0:
        raise Exception("build.py fails to call clang-format for listing: " + listing)

    from pygments import highlight
    from pygments.lexers import CLexer
    from pygments.formatters import HtmlFormatter

    fmtd = highlight(result[0].decode(), CLexer(), HtmlFormatter())
    return "%s\n" % fmtd
Ejemplo n.º 20
0
class CLexerTest(unittest.TestCase):

    def setUp(self):
        self.lexer = CLexer()

    def testNumbers(self):
        code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23'
        wanted = []
        for item in zip([Number.Integer, Number.Float, Number.Float,
                         Number.Float, Number.Oct, Number.Hex,
                         Number.Float, Number.Float], code.split()):
            wanted.append(item)
            wanted.append((Text, ' '))
        wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')]
        self.assertEqual(list(self.lexer.get_tokens(code)), wanted)
Ejemplo n.º 21
0
def rewrite(fs, codes, out):
    with open(out, "w") as fd:
        (static_symbols, rewriting_plan) = prepare_rewritting(fs, codes)
        for (pn, code) in codes:
            # reschedule static symbols
            rewritten = highlight(code, CLexer(),
                                  TokenRewritter(pn, rewriting_plan))
            for l in rewritten.splitlines():
                fd.write(l.encode("utf8"))
                fd.write("\n")

            fd.write("/" + "*" * 60 + "/\n")
            fd.write("/* %s */\n" % pn)
            fd.write("/" + "*" * 60 + "/\n")

    return (static_symbols, rewriting_plan)
Ejemplo n.º 22
0
def htmlHighlight(code, lang):
    """Apply HTML formatting to highlight code fragment.

    :code:  Source code
    :lang:  Language of source code
    """
    lexer = None
    if lang == 'C':
        lexer = CLexer()
    elif lang in ('Fortran77', 'Fortran95'):
        lexer = FortranLexer()

    if lexer:
        code  = highlight(code, lexer, HtmlFormatter()).replace('<pre>', '<pre>\n')
        lines = [ '<li>%s</li>' % l for l in code.split('\n') if l[:4] != "<div" and l[:5]!="</pre" ]
        return '<pre class="prettyprint linenums"><ol class="highlight linenums">%s</ol></pre>' % ''.join(lines) # absolutely NO blank spaces!
    else:
        return code
Ejemplo n.º 23
0
def prepare_rewritting(target, codes):
    global FS_FORCE_REWRITE

    def _to_canonical(pn, sym):
        base = os.path.basename(pn)
        return sym + "_" + base.replace(".", "_").replace("-", "_")

    # get static symbols
    static_symbols = {}
    for (pn, code) in codes:
        formatter = StaticDecl()
        highlight(code, CLexer(), formatter)

        print("> %-50s: %d" % (pn, len(formatter.table)))
        static_symbols[pn] = formatter.table

    # check collisions
    rewriting_plan = defaultdict(dict)
    for (pivot_pn, pivot_tbl) in static_symbols.iteritems():
        # rewrite if collapsed
        for sym in pivot_tbl:
            for (target_pn, target_tbl) in static_symbols.iteritems():
                if pivot_pn == target_pn:
                    continue
                if sym in target_tbl:
                    print("> %s collaposed with %s & %s" %
                          (sym, pivot_pn, target_pn))
                    rewriting_plan[pivot_pn][sym] = _to_canonical(
                        pivot_pn, sym)

        # update pivot_tbl to minize rewriting
        for (sym, new_sym) in rewriting_plan[pivot_pn].iteritems():
            pivot_tbl.remove(sym)
            pivot_tbl.add(new_sym)

    # manual rewriting (e.g., collision in headers)
    for sym in FS_FORCE_REWRITE.get(target, []):
        print("> manually include %s" % sym)
        for (pivot_pn, pivot_tbl) in static_symbols.iteritems():
            rewriting_plan[pivot_pn][sym] = _to_canonical(pivot_pn, sym)

    return (static_symbols, rewriting_plan)
Ejemplo n.º 24
0
    def __init__(self, logger, src):
        self.logger = logger

        self.tokens = lex(src, CLexer())

        # Current token line number.
        self.cur_line_numb = 1
        # Current token start offset within current line.
        self.cur_start_offset = 0

        # List of entities (each represented as kind, line number, start and end offsets) to be highligted
        self.highlights = list()

        # Workaround for missed "\n" at the beginning of source file that do not become tokens.
        self.initial_new_lines_numb = 0
        for c in src:
            if c == '\n':
                self.initial_new_lines_numb += 1
            else:
                break
Ejemplo n.º 25
0
def catFile(file_name, base_path, report_path):
    """
    Create an HTML page for a source file.
    """

    file_path = os.path.join(base_path, file_name)
    with open(file_path, 'r') as f:
        lines = f.readlines()

    with tmpHtmlFile(report_path) as report_f:
        html_name = report_f.name
        tpl = Template(NEWFILE_TEMPLATE)
        try:
            text = ''.join(lines).decode('utf8').encode('ascii', 'ignore')
        except:
            text = ''.join(lines).decode('latin').encode('ascii', 'ignore')

        report_f.write(
            tpl.render(file_name=file_name,
                       code=highlight(text, CLexer(), HtmlFormatter())))

    return html_name, calcSignature(''.join([line for line in lines]))
class CLexerTest(unittest.TestCase):

    def setUp(self):
        self.lexer = CLexer()

    def testNumbers(self):
        code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23'
        wanted = []
        for item in zip([Number.Integer, Number.Float, Number.Float,
                         Number.Float, Number.Oct, Number.Hex,
                         Number.Float, Number.Float], code.split()):
            wanted.append(item)
            wanted.append((Text, ' '))
        wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')]
        self.assertEqual(list(self.lexer.get_tokens(code)), wanted)

    def testSwitch(self):
        fragment = u'''\
        int main()
        {
            switch (0)
            {
                case 0:
                default:
                    ;
            }
        }
        '''
        expected = [
            (Token.Text, u''),
            (Token.Keyword.Type, u'int'),
            (Token.Text, u' '),
            (Token.Name.Function, u'main'),
            (Token.Text, u''),
            (Token.Punctuation, u'('),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Keyword, u'switch'),
            (Token.Text, u' '),
            (Token.Punctuation, u'('),
            (Token.Literal.Number.Integer, u'0'),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Text, u'        '),
            (Token.Keyword, u'case'),
            (Token.Text, u' '),
            (Token.Literal.Number.Integer, u'0'),
            (Token.Operator, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'        '),
            (Token.Keyword, u'default'),
            (Token.Operator, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'            '),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
        ]
        self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testSwitchSpaceBeforeColon(self):
        fragment = u'''\
        int main()
        {
            switch (0)
            {
                case 0 :
                default :
                    ;
            }
        }
        '''
        expected = [
            (Token.Text, u''),
            (Token.Keyword.Type, u'int'),
            (Token.Text, u' '),
            (Token.Name.Function, u'main'),
            (Token.Text, u''),
            (Token.Punctuation, u'('),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Keyword, u'switch'),
            (Token.Text, u' '),
            (Token.Punctuation, u'('),
            (Token.Literal.Number.Integer, u'0'),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Text, u'        '),
            (Token.Keyword, u'case'),
            (Token.Text, u' '),
            (Token.Literal.Number.Integer, u'0'),
            (Token.Text, u' '),
            (Token.Operator, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'        '),
            (Token.Keyword, u'default'),
            (Token.Text, u' '),
            (Token.Operator, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'            '),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Text, u'    '),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
        ]
        self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabel(self):
        fragment = u'''\
        int main()
        {
        foo:
          goto foo;
        }
        '''
        expected = [
            (Token.Text, u''),
            (Token.Keyword.Type, u'int'),
            (Token.Text, u' '),
            (Token.Name.Function, u'main'),
            (Token.Text, u''),
            (Token.Punctuation, u'('),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Name.Label, u'foo'),
            (Token.Punctuation, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'  '),
            (Token.Keyword, u'goto'),
            (Token.Text, u' '),
            (Token.Name, u'foo'),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
        ]
        self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabelSpaceBeforeColon(self):
        fragment = u'''\
        int main()
        {
        foo :
          goto foo;
        }
        '''
        expected = [
            (Token.Text, u''),
            (Token.Keyword.Type, u'int'),
            (Token.Text, u' '),
            (Token.Name.Function, u'main'),
            (Token.Text, u''),
            (Token.Punctuation, u'('),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Name.Label, u'foo'),
            (Token.Text, u' '),
            (Token.Punctuation, u':'),
            (Token.Text, u'\n'),
            (Token.Text, u'  '),
            (Token.Keyword, u'goto'),
            (Token.Text, u' '),
            (Token.Name, u'foo'),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
        ]
        self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabelFollowedByStatement(self):
        fragment = u'''\
        int main()
        {
        foo:return 0;
          goto foo;
        }
        '''
        expected = [
            (Token.Text, u''),
            (Token.Keyword.Type, u'int'),
            (Token.Text, u' '),
            (Token.Name.Function, u'main'),
            (Token.Text, u''),
            (Token.Punctuation, u'('),
            (Token.Punctuation, u')'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
            (Token.Punctuation, u'{'),
            (Token.Text, u'\n'),
            (Token.Name.Label, u'foo'),
            (Token.Punctuation, u':'),
            (Token.Keyword, u'return'),
            (Token.Text, u' '),
            (Token.Literal.Number.Integer, u'0'),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Text, u'  '),
            (Token.Keyword, u'goto'),
            (Token.Text, u' '),
            (Token.Name, u'foo'),
            (Token.Punctuation, u';'),
            (Token.Text, u'\n'),
            (Token.Punctuation, u'}'),
            (Token.Text, u'\n'),
            (Token.Text, u''),
        ]
        self.assertEqual(expected, list(self.lexer.get_tokens(textwrap.dedent(fragment))))
Ejemplo n.º 27
0
class CLexerTest(unittest.TestCase):
    def setUp(self):
        self.lexer = CLexer()

    def testNumbers(self):
        code = "42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23"
        wanted = []
        for item in zip(
            [
                Number.Integer,
                Number.Float,
                Number.Float,
                Number.Float,
                Number.Oct,
                Number.Hex,
                Number.Float,
                Number.Float,
            ],
            code.split(),
        ):
            wanted.append(item)
            wanted.append((Text, " "))
        wanted = wanted[:-1] + [(Text, "\n")]
        self.assertEqual(list(self.lexer.get_tokens(code)), wanted)

    def testSwitch(self):
        fragment = u"""\
        int main()
        {
            switch (0)
            {
                case 0:
                default:
                    ;
            }
        }
        """
        tokens = [
            (Token.Keyword.Type, u"int"),
            (Token.Text, u" "),
            (Token.Name.Function, u"main"),
            (Token.Punctuation, u"("),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Keyword, u"switch"),
            (Token.Text, u" "),
            (Token.Punctuation, u"("),
            (Token.Literal.Number.Integer, u"0"),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Text, u"        "),
            (Token.Keyword, u"case"),
            (Token.Text, u" "),
            (Token.Literal.Number.Integer, u"0"),
            (Token.Operator, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"        "),
            (Token.Keyword, u"default"),
            (Token.Operator, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"            "),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testSwitchSpaceBeforeColon(self):
        fragment = u"""\
        int main()
        {
            switch (0)
            {
                case 0 :
                default :
                    ;
            }
        }
        """
        tokens = [
            (Token.Keyword.Type, u"int"),
            (Token.Text, u" "),
            (Token.Name.Function, u"main"),
            (Token.Punctuation, u"("),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Keyword, u"switch"),
            (Token.Text, u" "),
            (Token.Punctuation, u"("),
            (Token.Literal.Number.Integer, u"0"),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Text, u"        "),
            (Token.Keyword, u"case"),
            (Token.Text, u" "),
            (Token.Literal.Number.Integer, u"0"),
            (Token.Text, u" "),
            (Token.Operator, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"        "),
            (Token.Keyword, u"default"),
            (Token.Text, u" "),
            (Token.Operator, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"            "),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Text, u"    "),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabel(self):
        fragment = u"""\
        int main()
        {
        foo:
          goto foo;
        }
        """
        tokens = [
            (Token.Keyword.Type, u"int"),
            (Token.Text, u" "),
            (Token.Name.Function, u"main"),
            (Token.Punctuation, u"("),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Name.Label, u"foo"),
            (Token.Punctuation, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"  "),
            (Token.Keyword, u"goto"),
            (Token.Text, u" "),
            (Token.Name, u"foo"),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabelSpaceBeforeColon(self):
        fragment = u"""\
        int main()
        {
        foo :
          goto foo;
        }
        """
        tokens = [
            (Token.Keyword.Type, u"int"),
            (Token.Text, u" "),
            (Token.Name.Function, u"main"),
            (Token.Punctuation, u"("),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Name.Label, u"foo"),
            (Token.Text, u" "),
            (Token.Punctuation, u":"),
            (Token.Text, u"\n"),
            (Token.Text, u"  "),
            (Token.Keyword, u"goto"),
            (Token.Text, u" "),
            (Token.Name, u"foo"),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))

    def testLabelFollowedByStatement(self):
        fragment = u"""\
        int main()
        {
        foo:return 0;
          goto foo;
        }
        """
        tokens = [
            (Token.Keyword.Type, u"int"),
            (Token.Text, u" "),
            (Token.Name.Function, u"main"),
            (Token.Punctuation, u"("),
            (Token.Punctuation, u")"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"{"),
            (Token.Text, u"\n"),
            (Token.Name.Label, u"foo"),
            (Token.Punctuation, u":"),
            (Token.Keyword, u"return"),
            (Token.Text, u" "),
            (Token.Literal.Number.Integer, u"0"),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Text, u"  "),
            (Token.Keyword, u"goto"),
            (Token.Text, u" "),
            (Token.Name, u"foo"),
            (Token.Punctuation, u";"),
            (Token.Text, u"\n"),
            (Token.Punctuation, u"}"),
            (Token.Text, u"\n"),
        ]
        self.assertEqual(tokens, list(self.lexer.get_tokens(textwrap.dedent(fragment))))
Ejemplo n.º 28
0
    # For type annotation
    from typing import Any, Dict  # NOQA
    from pygments.formatter import Formatter  # NOQA
    from sphinx.util.typing import unicode  # NOQA


logger = logging.getLogger(__name__)

lexers = {
    'none': TextLexer(stripnl=False),
    'python': PythonLexer(stripnl=False),
    'python3': Python3Lexer(stripnl=False),
    'pycon': PythonConsoleLexer(stripnl=False),
    'pycon3': PythonConsoleLexer(python3=True, stripnl=False),
    'rest': RstLexer(stripnl=False),
    'c': CLexer(stripnl=False),
}  # type: Dict[unicode, Lexer]
for _lexer in lexers.values():
    _lexer.add_filter('raiseonerror')


escape_hl_chars = {ord(u'\\'): u'\\PYGZbs{}',
                   ord(u'{'): u'\\PYGZob{}',
                   ord(u'}'): u'\\PYGZcb{}'}

# used if Pygments is available
# use textcomp quote to get a true single quote
_LATEX_ADD_STYLES = r'''
\renewcommand\PYGZsq{\textquotesingle}
'''
Ejemplo n.º 29
0
 def setUp(self):
     self.lexer = CLexer()
Ejemplo n.º 30
0
        background_color = '#eeffcc'
        default_style = ''

        styles = FriendlyStyle.styles
        styles.update({
            Generic.Output: 'italic #333',
            Comment: 'italic #408090',
        })

    lexers = defaultdict(
        TextLexer,
        none=TextLexer(),
        python=PythonLexer(),
        pycon=PythonConsoleLexer(),
        rest=RstLexer(),
        c=CLexer(),
    )
    for _lexer in lexers.values():
        _lexer.add_filter('raiseonerror')

    fmter = HtmlFormatter(style=PythonDocStyle)


def highlight_block(source, lang):
    if not pygments:
        return '<pre>' + cgi.escape(source) + '</pre>\n'
    if lang == 'python':
        if source.startswith('>>>'):
            lexer = lexers['pycon']
        else:
            lexer = lexers['python']
Ejemplo n.º 31
0
 def setUp(self):
     self.lexer = CLexer()
Ejemplo n.º 32
0
from style import NightOwl
from pygments.formatters.html import HtmlFormatter
from pygments import highlight
from pygments.lexers import CLexer, MarkdownLexer

if __name__ == "__main__":
    samples = {
        "samples/sample.c": CLexer(),
        "samples/sample.md": MarkdownLexer()
    }

    formatter = HtmlFormatter(style=NightOwl)
    formatter.style.background_color = '#011627'

    with open('test.html', 'w') as out_file:
        out_file.truncate()
        out_file.write(
            "<html><head><link rel='stylesheet' href='base.css'><style>{}</style></head><body>"
            .format(formatter.get_style_defs('div.highlight pre')))

        files = samples.keys()
        files.sort()

        for key in files:
            with open(key, 'r') as sample_file:
                out_file.write(
                    highlight(sample_file.read(), samples[key], formatter))
        out_file.write("</body></html>")
Ejemplo n.º 33
0
if False:
    # For type annotation
    from typing import Any, Dict  # NOQA
    from pygments.formatter import Formatter  # NOQA

logger = logging.getLogger(__name__)

lexers = dict(
    none=TextLexer(stripnl=False),
    python=PythonLexer(stripnl=False),
    python3=Python3Lexer(stripnl=False),
    pycon=PythonConsoleLexer(stripnl=False),
    pycon3=PythonConsoleLexer(python3=True, stripnl=False),
    rest=RstLexer(stripnl=False),
    c=CLexer(stripnl=False),
)  # type: Dict[unicode, Lexer]
for _lexer in lexers.values():
    _lexer.add_filter('raiseonerror')

escape_hl_chars = {
    ord(u'\\'): u'\\PYGZbs{}',
    ord(u'{'): u'\\PYGZob{}',
    ord(u'}'): u'\\PYGZcb{}'
}

# used if Pygments is available
# use textcomp quote to get a true single quote
_LATEX_ADD_STYLES = r'''
\renewcommand\PYGZsq{\textquotesingle}
'''
Ejemplo n.º 34
0
def scan_source(fp, args):
    # print("scanning: %r" % fp)

    global filepath

    is_cpp = fp.endswith((".cpp", ".cxx"))

    filepath = fp

    #if "displist.c" not in filepath:
    #    return

    filepath_base = os.path.basename(filepath)

    #print(highlight(code, CLexer(), RawTokenFormatter()).decode('utf-8'))
    code = open(filepath, 'r', encoding="utf-8").read()

    quick_check_indentation(code)
    # return

    del tokens[:]
    line = 1

    for ttype, text in lex(code, CLexer()):
        if text:
            tokens.append(TokStore(ttype, text, line))
            line += text.count("\n")

    col = 0  # track line length
    index_line_start = 0

    for i, tok in enumerate(tokens):
        #print(tok.type, tok.text)
        if tok.type == Token.Keyword:
            if tok.text in {"switch", "while", "if", "for"}:
                item_range = extract_statement_if(i)
                if item_range is not None:
                    blender_check_kw_if(item_range[0], i, item_range[1])
            elif tok.text == "else":
                blender_check_kw_else(i)
            elif tok.text == "sizeof":
                blender_check_kw_sizeof(i)
        elif tok.type == Token.Punctuation:
            if tok.text == ",":
                blender_check_comma(i)
            elif tok.text == ".":
                blender_check_period(i)
            elif tok.text == "[":
                # note, we're quite relaxed about this but
                # disallow 'foo ['
                if tokens[i - 1].text.isspace():
                    if is_cpp and tokens[i + 1].text == "]":
                        # c++ can do delete []
                        pass
                    else:
                        warning("space before '['", i, i)
            elif tok.text == "(":
                # check if this is a cast, eg:
                #  (char), (char **), (float (*)[3])
                item_range = extract_cast(i)
                if item_range is not None:
                    blender_check_cast(item_range[0], item_range[1])
            elif tok.text == "{":
                # check previous character is either a '{' or whitespace.
                if (tokens[i - 1].line
                        == tok.line) and not (tokens[i - 1].text.isspace()
                                              or tokens[i - 1].text == "{"):
                    warning("no space before '{'", i, i)

                blender_check_function_definition(i)

        elif tok.type == Token.Operator:
            # we check these in pairs, only want first
            if tokens[i - 1].type != Token.Operator:
                op, index_kw_end = extract_operator(i)
                blender_check_operator(i, index_kw_end, op, is_cpp)
        elif tok.type in Token.Comment:
            doxyfn = None
            if "\\file" in tok.text:
                doxyfn = tok.text.split("\\file", 1)[1].strip().split()[0]
            elif "@file" in tok.text:
                doxyfn = tok.text.split("@file", 1)[1].strip().split()[0]

            if doxyfn is not None:
                doxyfn_base = os.path.basename(doxyfn)
                if doxyfn_base != filepath_base:
                    warning(
                        "doxygen filename mismatch %s != %s" %
                        (doxyfn_base, filepath_base), i, i)

        # ensure line length
        if (not args.no_length_check
            ) and tok.type == Token.Text and tok.text == "\n":
            # check line len
            blender_check_linelength(index_line_start, i - 1, col)

            col = 0
            index_line_start = i + 1
        else:
            col += len(tok.text.expandtabs(TAB_SIZE))
def scan_source(fp, args):
    # print("scanning: %r" % fp)

    global filepath

    is_cpp = fp.endswith((".cpp", ".cxx"))

    filepath = fp
    filepath_base = os.path.basename(filepath)

    #print(highlight(code, CLexer(), RawTokenFormatter()).decode('utf-8'))
    code = open(filepath, 'r', encoding="utf-8").read()

    quick_check_indentation(code)
    # return

    del tokens[:]
    line = 1

    for ttype, text in lex(code, CLexer()):
        tokens.append(TokStore(ttype, text, line))
        line += text.count("\n")

    col = 0  # track line length
    index_line_start = 0

    for i, tok in enumerate(tokens):
        #print(tok.type, tok.text)
        if tok.type == Token.Keyword:
            if tok.text in {"switch", "while", "if", "for"}:
                item_range = extract_statement_if(i)
                if item_range is not None:
                    blender_check_kw_if(item_range[0], i, item_range[1])
            elif tok.text == "else":
                blender_check_kw_else(i)
        elif tok.type == Token.Punctuation:
            if tok.text == ",":
                blender_check_comma(i)
        elif tok.type == Token.Operator:
            # we check these in pairs, only want first
            if tokens[i - 1].type != Token.Operator:
                op, index_kw_end = extract_operator(i)
                blender_check_operator(i, index_kw_end, op, is_cpp)
        elif tok.type in Token.Comment:
            doxyfn = None
            if "\\file" in tok.text:
                doxyfn = tok.text.split("\\file", 1)[1].strip().split()[0]
            elif "@file" in tok.text:
                doxyfn = tok.text.split("@file", 1)[1].strip().split()[0]

            if doxyfn is not None:
                doxyfn_base = os.path.basename(doxyfn)
                if doxyfn_base != filepath_base:
                    warning(
                        "doxygen filename mismatch %s != %s" %
                        (doxyfn_base, filepath_base), i, i)

        # ensure line length
        if (not args.no_length_check
            ) and tok.type == Token.Text and tok.text == "\n":
            # check line len
            blender_check_linelength(index_line_start, i - 1, col)

            col = 0
            index_line_start = i + 1
        else:
            col += len(tok.text.expandtabs(TAB_SIZE))
Ejemplo n.º 36
0
 def get_tokens_unprocessed(self, text):
     for index, token, value in CLexer.get_tokens_unprocessed(self, text):
         if token is Name and value in self.EXTRA_TYPES:
             yield index, Keyword.Type, value
         else:
             yield index, token, value
Ejemplo n.º 37
0
 def load_c_syntax(self):
     self.master.lexer = CLexer()
     self.master.initial_highlight()
Ejemplo n.º 38
0
 def code_to_graphviz_html(code):
     style = get_style_by_name('default')
     return highlight(
         code,
         CLexer(),  # FIXME
         GraphvizHtmlFormatter(style))