Example #1
0
def try_lexer(name, filename=None):
    """ Try getting a pygments lexer by name.
        None is returned if no lexer can be found by that name,
        unless 'filename' is given. If 'filename' is given the lexer
        is guessed by file name.
        Ultimately returns None on failure.
    """
    if not name:
        if filename_is_stdin(filename):
            # No lexer or file name.
            return None
        try:
            lexer = lexers.get_lexer_for_filename(filename)
        except pygments.util.ClassNotFound:
            return None
        # Retrieved by file name only.
        return lexer

    try:
        lexer = lexers.get_lexer_by_name(name)
    except pygments.util.ClassNotFound:
        if filename_is_stdin(filename):
            # No lexer found.
            return None
        try:
            lexer = lexers.get_lexer_for_filename(filename)
        except pygments.util.ClassNotFound:
            return None
        # Retrieved by falling back to file name.
        return lexer
    # Successful lexer by name.
    return lexer
Example #2
0
    def iscode(self):

        if pygments is None:
            return False

        try:
            get_lexer_for_filename(self.filename)
            return True
        except ClassNotFound:
            return False
Example #3
0
def get_renderer(full_path):
    if full_path.endswith('.ipynb'):
        return nb_renderer
    else:
        try:
            get_lexer_for_filename(full_path)
            return pygments_renderer
        except ClassNotFound:
            return raw_renderer

    return raw_renderer
Example #4
0
def pygments_lexer(filename):
    try:
        from pygments.lexers import get_lexer_for_filename
        from pygments.util import ClassNotFound
    except ImportError:
        return None
    try:
        return get_lexer_for_filename(filename)
    except ClassNotFound:
        if filename.lower().endswith('.recipe'):
            return get_lexer_for_filename('a.py')
        return None
Example #5
0
def get_lexers():
    ''''''
    import pygments.lexers as t
    count = 0 
    for item in t.get_all_lexers():
        print item
        count += 1
    try:
        print t.get_lexer_for_filename('this.xsd')
    except:
        print 'found'
    print 'NUMBER OF LEXERS: %s'%count
Example #6
0
def is_parsable(filename):
    u'''
    ファイル名からパースできるかどうかをチェック
    :param str filename: ファイル名
    :return: パース可能かどうか
    :rtype: bool
    '''

    try:
        lexers.get_lexer_for_filename(filename)
        return True
    except util.ClassNotFound:
        return False
Example #7
0
 def render_listing(in_name, out_name):
     with open(in_name, 'r') as fd:
         try:
             lexer = get_lexer_for_filename(in_name)
         except:
             lexer = TextLexer()
         code = highlight(fd.read(), lexer,
             HtmlFormatter(cssclass='code',
                 linenos="table",
                 nowrap=False,
                 lineanchors=utils.slugify(f),
                 anchorlinenos=True))
     title = os.path.basename(in_name)
     crumbs = out_name.split(os.sep)[1:-1] + [title]
     # TODO: write this in human
     paths = ['/'.join(['..'] * (len(crumbs) - 2 - i)) for i in
         range(len(crumbs[:-2]))] + ['.', '#']
     context = {
         'code': code,
         'title': title,
         'crumbs': zip(paths, crumbs),
         'lang': kw['default_lang'],
         'description': title,
         }
     self.site.render_template('listing.tmpl', out_name, context)
Example #8
0
def colorize(language, title, text):
    """Colorize the text syntax.

    Guess the language of the text and colorize it.

    Returns a tuple containing the colorized text and the language name.
    """
    formatter = HtmlFormatter(
        linenos=True, style=PygmentsStyle, noclasses=True, nobackground=True)
    #Try to get the lexer by name
    try:
        lexer = get_lexer_by_name(language.lower())
        return highlight(text, lexer, formatter), lexer.name
    except LexerNotFound:
        pass
    #Try to get the lexer by filename
    try:
        lexer = get_lexer_for_filename(title.lower())
        return highlight(text, lexer, formatter), lexer.name
    except LexerNotFound:
        pass
    #Try to guess the lexer from the text
    try:
        lexer = guess_lexer(text)
        if lexer.analyse_text(text) > .3:
            return highlight(text, lexer, formatter), lexer.name
    except LexerNotFound:
        pass
    #Fallback to the plain/text lexer
    lexer = get_lexer_by_name('text')
    return highlight(text, lexer, formatter), lexer.name
    def set_lexer_from_filename(self, filename):
        """
        Change the lexer based on the filename (actually only the extension is
        needed)

        :param filename: Filename or extension
        """
        if filename.endswith("~"):
            filename = filename[0:len(filename) - 1]
        try:
            self._lexer = get_lexer_for_filename(filename)
            _logger().debug('lexer for filename (%s): %r', filename,
                            self._lexer)
        except ClassNotFound:
            _logger().warning('failed to get lexer from filename: %s, using '
                              'plain text instead...', filename)
            self._lexer = TextLexer()
            return False
        except ImportError:
            # import error while loading some pygments plugins, the editor
            # should not crash
            _logger().warning('failed to get lexer from filename: %s, using '
                              'plain text instead...', filename)
            self._lexer = TextLexer()
            return False
        else:
            return True
Example #10
0
def get_used_tokens(filename) :
  """
  Get all the tokens that are referenced in this file
  """
  lines = (line for line in open(filename, 'r'))
  lexer = lexers.get_lexer_for_filename(filename)
  seen = set([])

  used_tokens = []
  declared_tokens = set([])

  token_iter = lexer.get_tokens('\n'.join(lines))
  filtered_iter = (tok for tok in token_iter if tok[1].strip())

  prev_iter, curr_iter, next_iter = itertools.tee(filtered_iter, 3)

  # we miss the first token here... whatever
  next(curr_iter)
  next(next_iter)
  next(next_iter)

  for (prev, curr, next_tok) in itertools.izip(prev_iter, curr_iter, next_iter) :
    if str(prev[0]) in ['Token.Operator', 'Token.Keyword.Type'] and prev[1] == '.' :
      continue
    if (next_tok[1]) == u'=>' :
      continue
    if str(curr[0]) in ['Token.Name.Class', 'Token.Keyword.Type'] and curr[1][0].isupper() :
      if is_declaration(prev[1]) :
        declared_tokens.add(curr[1])
      elif curr not in seen :
        seen.add(curr)
        used_tokens.append(curr[1].split(".")[0])

  return [t for t in used_tokens if t not in declared_tokens]
Example #11
0
def get_file_lexer(filename, content):
    """ Try to get a lexer by file extension, guess by content if that fails.
    """
    try:
        # Pygments sometimes returns a weird lexer for .txt files.
        if filename.lower().endswith('.txt'):
            lexer = lexers.get_lexer_by_name('text')
            debug('Lexer forced by extension: {:>20} -> {}'.format(
                lexer.name,
                filename,
            ))
        else:
            lexer = lexers.get_lexer_for_filename(filename)
            debug('Lexer chosen by file name: {:>20} -> {}'.format(
                lexer.name,
                filename,
            ))
    except ClassNotFound:
        try:
            # Guess by content.
            lexer = lexers.guess_lexer(content)
            debug('Lexer guessed by content:  {:>20} -> {}'.format(
                lexer.name,
                filename,
            ))
        except ClassNotFound:
            # Fall back to default lexer.
            lexer = lexers.get_lexer_by_name(DEFAULT_LEXER)
            debug('Lexer set to default:      {:>20} -> {}'.format(
                lexer.name,
                filename,
            ))
    return lexer
Example #12
0
def pipp_code(context, src, code, lexer, docss):
    ctx = context.processor.extensionParams[(NAMESPACE, 'context')]

    src = Conversions.StringValue(src)
    if src:
        abs_src = ctx.abs_in_path(src)
        ctx.add_depends(abs_src[len(ctx.in_root):])
        fname = os.path.basename(src)
        code = open(abs_src).read()
    else:
        fname = 'inline-code'
        code = Conversions.StringValue(code)

    lexer = Conversions.StringValue(lexer)
    if lexer:
        lexer = get_lexer_by_name(lexer)
    elif src:
        lexer = get_lexer_for_filename(fname)
    else:
        raise Exception('The lexer must be explicitly specified for inline code blocks')

    formatter = HtmlFormatter(cssclass="source")
    result = highlight(code, lexer, formatter)
    if Conversions.StringValue(docss) == '1':
        result = '<link rel="stylesheet" href="%s.css"/>' % fname + result
        css = open(ctx.abs_out_path(ctx.abs_in_path(fname + '.css')), 'w')
        css.write(formatter.get_style_defs())
        css.close()

    return result
Example #13
0
 def render_listing(in_name, out_name, folders=[], files=[]):
     if in_name:
         with open(in_name, 'r') as fd:
             try:
                 lexer = get_lexer_for_filename(in_name)
             except:
                 lexer = TextLexer()
             code = highlight(fd.read(), lexer,
                              HtmlFormatter(cssclass='code',
                                            linenos="table", nowrap=False,
                                            lineanchors=utils.slugify(f),
                                            anchorlinenos=True))
         title = os.path.basename(in_name)
     else:
         code = ''
         title = ''
     crumbs = utils.get_crumbs(os.path.relpath(out_name,
                                               kw['output_folder']),
                               is_file=True)
     context = {
         'code': code,
         'title': title,
         'crumbs': crumbs,
         'lang': kw['default_lang'],
         'folders': folders,
         'files': files,
         'description': title,
     }
     self.site.render_template('listing.tmpl', out_name,
                               context)
Example #14
0
def display(sphinx_id):

    db = oursql.connect(user='******', passwd='codesearch',
                        db='codesearch')

    cursor = db.cursor(oursql.DictCursor)
    query = 'SELECT project, path, text FROM documents WHERE id = ?'
    cursor.execute(query, (sphinx_id,))
    sourcecode = cursor.fetchone()
    if sourcecode is None:
        flask.abort(404)

    title = posixpath.join(sourcecode['project'], sourcecode['path'])

    try:
        lexer = get_lexer_for_filename(sourcecode['path'])
    except ClassNotFound:
        # Syntax highlighting not supported.'
        code = u'<pre>{}</pre>'.format(sourcecode['text'])
        return flask.render_template('display.html', title=title, code=code)

    formatter = HtmlFormatter()

    # Highlighting large files can be a slow operation. This is a candidate
    # for caching.
    checksum = zlib.adler32(sourcecode['text'])
    key = json.dumps(['HIGHLIGHT', checksum])
    code = cache.get(key)
    if code is None:
        code = highlight(sourcecode['text'], lexer, formatter)
        cache.set(key, code)

    return flask.render_template('display.html', title=title,
                                 code=code)
Example #15
0
def pcat(filename, target='ipython'):

    code = read_file_or_url(filename)

    HTML_TEMPLATE = """<style>
    {}
    </style>
    {}
    """

    from pygments.lexers import get_lexer_for_filename
    lexer = get_lexer_for_filename(filename, stripall=True)

    from pygments.formatters import HtmlFormatter, TerminalFormatter
    from pygments import highlight

    try:
        assert(target=='ipython')
        from IPython.display import HTML, display
        from pygments.formatters import HtmlFormatter
        formatter = HtmlFormatter(linenos=True, cssclass="source")
        html_code = highlight(code, lexer, formatter)
        css = formatter.get_style_defs()
        html = HTML_TEMPLATE.format(css, html_code)
        htmlres = HTML(html)

        return htmlres

    except Exception as e:
        print(e)
        pass

    formatter = TerminalFormatter()
    output = highlight(code,lexer,formatter)
    print(output)
Example #16
0
    def __init__(self, text, syntax=None, name=None):
        self.text = strip_indents(text)
        lexer = None

        if name:
            self.name = name

        if syntax is not None:
            self.syntax = syntax.lower()
        elif self.name:
            try:
                lexer = get_lexer_for_filename(self.name)
            except ClassNotFound:
                pass
            else:
                self.syntax = lexer.aliases[0]

        if self.syntax == "markdown":
            self.html = markdown(self.text)
        else:
            try:
                lexer = lexer or get_lexer_by_name(self.syntax)
            except ClassNotFound:
                # do nothing - if html is empty then description is a raw text
                pass
            else:
                self.html = highlight(self.text, lexer, formater)
Example #17
0
def _lexer_for_filename(filename):
    """Return a Pygments lexer suitable for a file based on its extension.

    Return None if one can't be determined.

    """
    if filename.endswith('.js') or filename.endswith('.jsm'):
        # Use a custom lexer for js/jsm files to highlight prepocessor
        # directives
        lexer = JavascriptPreprocLexer()
    elif filename == 'moz.build':
        lexer = PythonLexer()
    else:
        try:
            # Lex .h files as C++ so occurrences of "class" and such get colored;
            # Pygments expects .H, .hxx, etc. This is okay even for uses of
            # keywords that would be invalid in C++, like 'int class = 3;'.

            # Also we can syntax highlight XUL as XML, and IDL/WebIDL as CPP
            lexer = get_lexer_for_filename(
                'dummy.cpp' if filename.endswith(
                    ('.h', '.idl', '.webidl', '.tcc', '.tpp'))
                else 'dummy.xml' if filename.endswith(('.xul', '.svg'))
                else filename)
        except ClassNotFound:
            return None

    return lexer
def test_example_files():
    testdir = os.path.dirname(__file__)
    outdir = os.path.join(testdir, 'examplefiles', 'output')
    if STORE_OUTPUT and not os.path.isdir(outdir):
        os.makedirs(outdir)
    for fn in os.listdir(os.path.join(testdir, 'examplefiles')):
        if fn.startswith('.') or fn.endswith('#'):
            continue

        absfn = os.path.join(testdir, 'examplefiles', fn)
        if not os.path.isfile(absfn):
            continue
        outfn = os.path.join(outdir, fn)

        try:
            lx = get_lexer_for_filename(absfn)
        except ClassNotFound:
            if "_" not in fn:
                raise AssertionError('file %r has no registered extension, '
                                     'nor is of the form <lexer>_filename '
                                     'for overriding, thus no lexer found.'
                                    % fn)
            try:
                name, rest = fn.split("_", 1)
                lx = get_lexer_by_name(name)
            except ClassNotFound:
                raise AssertionError('no lexer found for file %r' % fn)
        yield check_lexer, lx, absfn, outfn
Example #19
0
def main(fn, lexer=None):
    if lexer is not None:
        lx = get_lexer_by_name(lexer)
    else:
        try:
            lx = get_lexer_for_filename(os.path.basename(fn))
        except ValueError:
            try:
                name, rest = fn.split('_', 1)
                lx = get_lexer_by_name(name)
            except ValueError:
                raise AssertionError('no lexer found for file %r' % fn)
    debug_lexer = False
    # does not work for e.g. ExtendedRegexLexers
    if lx.__class__.__bases__ == (RegexLexer,):
        lx.__class__.__bases__ = (DebuggingRegexLexer,)
        debug_lexer = True
    lno = 1
    text = file(fn, 'U').read()
    text = text.strip('\n') + '\n'
    text = text.decode('latin1')
    tokens = []
    states = []

    def show_token(tok, state):
        reprs = map(repr, tok)
        print '   ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0],
        if debug_lexer:
            print ' ' + ' ' * (29-len(reprs[0])) + repr(state),
        print

    for type, val in lx.get_tokens(text):
        lno += val.count('\n')
        if type == Error:
            print 'Error parsing', fn, 'on line', lno
            print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':'
            if showall:
                for tok, state in zip(tokens, states):
                    show_token(tok, state)
            else:
                for i in range(len(tokens) - num, len(tokens)):
                    show_token(tokens[i], states[i])
            print 'Error token:'
            l = len(repr(val))
            print '   ' + repr(val),
            if debug_lexer and hasattr(lx, 'statestack'):
                print ' ' * (60-l) + repr(lx.statestack),
            print
            print
            return 1
        tokens.append((type,val))
        if debug_lexer:
            if hasattr(lx, 'statestack'):
                states.append(lx.statestack[:])
            else:
                states.append(None)
    if showall:
        for tok, state in zip(tokens, states):
            show_token(tok, state)
    return 0
Example #20
0
def get_sphinx_data(sphinx_id):
    query = Search(indexes=['sourcecode'], config=BaseSearchConfig)
    query = query.filter(id__eq=sphinx_id)
    results = query.ask()
    if len(results['result']['items']) == 0:
        flask.abort(404)

    filename = results['result']['items'][0]['path']

    if not os.path.isfile(filename):
        return filename, 'File not found. Please resphinx_id.'

    code = ''
    with open(filename) as f:
        code = f.read()

    try:
        # This is the line that throws the exception.
        lexer = get_lexer_for_filename(filename)
        formatter = HtmlFormatter(noclasses=True)
        result = highlight(code, lexer, formatter)
    except ClassNotFound:
        # Syntax highlighting not supported.'
        result = '<pre>{}</pre>'.format(code)

    url = flask.url_for('display', sphinx_id=sphinx_id)

    return {'body': result, 'path': filename, 'url': url}
Example #21
0
  def refreshSource(self, process = None):
    (self.height, self.width) = self.win.getmaxyx()

    if process is not None:
      loc = process.GetSelectedThread().GetSelectedFrame().GetLineEntry()
      f = loc.GetFileSpec()
      self.pc_line = loc.GetLine()

      if not f.IsValid():
        self.win.addstr(0, 0, "Invalid source file")
        return

      self.filename = f.GetFilename()
      path = os.path.join(f.GetDirectory(), self.filename)
      self.setTitle(path)
      self.content = self.getContent(path)
      self.updateViewline()

    if self.filename is None:
      return
   
    if self.formatter is not None:
      from pygments.lexers import get_lexer_for_filename
      self.lexer = get_lexer_for_filename(self.filename)

    bps = [] if not self.filename in self.breakpoints else self.breakpoints[self.filename]
    self.win.erase()
    if self.content:
      self.formatContent(self.content, self.pc_line, bps)
Example #22
0
    def tokenize(self):
        """
        Tokenizes the input file.

        Yields (tokentype, val) pairs, where val is a string.

        The concatenation of all val strings is equal to the input file's
        content.
        """
        # contains all namespaces and other '{' tokens
        self.stack = []
        # current line number
        self.lineno = 1

        # we're using the pygments lexer (mainly because that was the first
        # google hit for 'python c++ lexer', and it's fairly awesome to use)

        lexer = get_lexer_for_filename('.cpp')

        with open(self.filename) as infile:
            code = infile.read()

        for token, val in lexer.get_tokens(code):
            # ignore whitespaces
            yield token, val
            self.lineno += val.count('\n')
Example #23
0
    def highlight_file(self, linenos=True, style='default'):
        """ Highlight the input file, and return HTML as a string. """
        try:
            lexer = lexers.get_lexer_for_filename(self.input_file)
        except pygments.util.ClassNotFound:
            # Try guessing the lexer (file type) later.
            lexer = None

        try:
            formatter = formatters.HtmlFormatter(
                linenos=linenos,
                style=style,
                full=True)
        except pygments.util.ClassNotFound:
            logging.error("\nInvalid style name: {}\nExpecting one of:\n \
                {}".format(style, "\n    ".join(sorted(styles.STYLE_MAP))))
            sys.exit(1)

        try:
            with open(self.input_file, "r") as f:
                content = f.read()
                try:
                    lexer = lexer or lexers.guess_lexer(content)
                except pygments.util.ClassNotFound:
                    # No lexer could be guessed.
                    lexer = lexers.get_lexer_by_name("text")
        except EnvironmentError as exread:
            fmt = "\nUnable to read file: {}\n{}"
            logging.error(fmt.format(self.input_file, exread))
            sys.exit(2)

        return pygments.highlight(content, lexer, formatter)
Example #24
0
def doc(path):
    """Gathers the documentation
    """
    file_ = open(path, 'r')
    content = file_.read()
    file_.close()
    try:
        lexer = get_lexer_for_filename(path, stripall=True)
    except:
        lexer = TextLexer(stripall=True)
    if path.endswith('.md'):
        return markdown.markdown(
            bleach.clean(content),
            extensions=['markdown.extensions.nl2br', 'markdown.extensions.toc']
        )
    elif path.endswith('.less'):
        lexer = ScssLexer(stripall=True)

    formatter = HtmlFormatter(
        linenos=True,
        cssclass='codehilight',
        noclobber_cssfile=True,
        title=path[path.rfind(os.sep)+1:]
    )
    return "<div class='table-responsive codehilight'>"+highlight(content, lexer, formatter)+"</div>"
Example #25
0
 def __init__(self, raw, request, **kw):
     self.raw = raw
     self.req = request
     if "format_args" in kw:
         # called from a {{{ }}} block
         try:
             self.lexer = get_lexer_by_name(kw['format_args'].strip())
         except ClassNotFound:
             self.lexer = textlexer
         return
     if "filename" in kw:
         # called for an attachment
         filename = kw['filename']
     else:
         # called for an attachment by an older moin
         # HACK: find out the filename by peeking into the execution
         #       frame which might not always work
         try:
             frame = sys._getframe(1)
             filename = frame.f_locals['filename']
         except:
             filename = 'x.txt'
     try:
         self.lexer = get_lexer_for_filename(filename)
     except ClassNotFound:
         self.lexer = textlexer
Example #26
0
def get_line_types(repo, repo_uri, rev, path):
    """Returns an array, where each item means a line of code.
       Each item is labled 'code', 'comment' or 'empty'"""

    #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path))
    uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path
    file_content = _get_file_content(repo, uri, rev)  # get file_content

    if file_content is None or file_content == '':
        printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.")
        line_types = None
    else:
        try:
            lexer = get_lexer_for_filename(path)
        except ClassNotFound:
            try:
                printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".")
                lexer = guess_lexer(file_content)
            except ClassNotFound:
                printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.")
                lexer = TextLexer()

        if isinstance(lexer, NemerleLexer):
            # this lexer is broken and yield an unstoppable process
            # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop
            lexer = TextLexer()

        # Not shure if this should be skipped, when the language uses off-side rules (e.g. python,
        # see http://en.wikipedia.org/wiki/Off-side_rule for list)
        stripped_code = _strip_lines(file_content)
        lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code))
        line_types_str = _comment_empty_or_code(lexer_output)
        line_types = line_types_str.split("\n")

    return line_types
Example #27
0
    def _get_lexer(self, filename):
        try:
            lexer = get_lexer_for_filename(filename)
        except ClassNotFound:
            raise

        return lexer
Example #28
0
def highlightCode(code, fileName):
	htmlFormatter = HtmlFormatter()
	try:
		lexer = get_lexer_for_filename(fileName)
	except ClassNotFound:
		lexer = TextLexer()
	return highlight(code, lexer, htmlFormatter)
Example #29
0
def code(title):
    """
    Return syntax highlighted LaTeX.
    """
    filename = title.split(' ')[1]
    
    # open the code file relative from the yml file path
    f = open(os.path.join(os.path.dirname(os.path.abspath(source_file)), filename))
    
    out = "\n\\begin{frame}[fragile,t]"
    out += "\n\t\\frametitle{Code: \"%s\"}" % filename
    
    try:
        from pygments import highlight
        from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
        from pygments.formatters import LatexFormatter
        
        try:
            lexer = get_lexer_for_filename(filename)
        except:
            lexer = get_lexer_by_name('text')
        out += "\n%s\n" % highlight(f.read(), lexer, LatexFormatter(linenos=True))
    except ImportError:
        out += "\n\t\\begin{lstlisting}\n"
        out += f.read()
        out += "\n\t\end{lstlisting}"
        
    f.close()
    out += "\n\end{frame}"
    return out
Example #30
0
    def process_text_to_dict(self, input_text):
        composer = Composer()
        builder = idiopidae.parser.parse('Document', input_text + "\n\0")

        ext = self.artifact.input_ext
        name = "input_text%s" % ext
        # List any file extensions which don't map neatly to lexers.
        if ext == '.pycon':
            lexer = PythonConsoleLexer()
        elif ext == '.rbcon':
            lexer = RubyConsoleLexer()
        elif ext in ('.json', '.dexy'):
            lexer = JavascriptLexer()
        else:
            lexer = get_lexer_for_filename(name)
        formatter = get_formatter_for_filename(self.artifact.filename(),
                                               lineanchors='l')
        output_dict = OrderedDict()

        for i, s in enumerate(builder.sections):
            lines = builder.statements[i]['lines']
            formatted_lines = composer.format(lines, lexer, formatter) 
            output_dict[s] = formatted_lines

        return output_dict
Example #31
0
def highlight(text,
              extension=None,
              formatter=HtmlFormatter,
              **formatter_options):
    try:
        lexer = get_lexer_for_filename('foo.{}'.format(extension))
    except ClassNotFound:
        lexer = find_best_lexer(text)

    return pygment_highlight(text, lexer, formatter(**formatter_options))
Example #32
0
def get_lexer_from_filename(name):

    try:
        lexer = get_lexer_for_filename(name)
        return lexer.name
    except ClassNotFound:
        console.print(
            'WARNING: Could not determine correct lexer for this file!',
            style='yellow')
        return ''
Example #33
0
def highlight_and_paginate_content(file_name, content):
    lexer = None
    try:
        lexer = get_lexer_for_filename(file_name)
    except:
        lexer = RawTokenLexer()
    if get_terminal_color_support() == 256:
        paginate(highlight(content, lexer, Terminal256Formatter()))
    else:
        paginate(highlight(content, lexer, TerminalFormatter()))
Example #34
0
 def __init__(self, filename):
     """
     init by getting a lexer for file name
     If none exist set lexer to dummy which will be
     caught in parse
     """
     try:
         self.lexer = get_lexer_for_filename(filename)
     except:
         self.lexer = None
Example #35
0
    def guess_lexer_for_filename(filename):
        from pygments.lexers import get_lexer_for_filename
        from pygments.util import ClassNotFound

        try:
            lexer = get_lexer_for_filename(filename)
        except ClassNotFound:
            from pygments.lexers.special import TextLexer
            lexer = TextLexer()
        return lexer
Example #36
0
 def _get_lexer(self):
     if self.language:
         from pygments.lexers import get_lexer_by_name
         return get_lexer_by_name(self.language)
     elif self.filename:
         from pygments.lexers import get_lexer_for_filename
         return get_lexer_for_filename(self.filename)
     else:
         from pygments.lexers import guess_lexer
         return guess_lexer(self.data)
def print_lines(console_printer, file_dict, sourcerange):
    """
    Prints the lines between the current and the result line. If needed
    they will be shortened.
    :param console_printer: Object to print messages on the console.
    :param file_dict:       A dictionary containing all files as values with
                            filenames as key.
    :param sourcerange:     The SourceRange object referring to the related
                            lines to print.
    """
    no_color = not console_printer.print_colored
    for i in range(sourcerange.start.line, sourcerange.end.line + 1):
        # Print affected file's line number in the sidebar.
        console_printer.print(format_lines(lines='', line_nr=i, symbol='['),
                              color=FILE_LINES_COLOR,
                              end='')

        line = file_dict[sourcerange.file][i - 1].rstrip('\n')
        try:
            lexer = get_lexer_for_filename(sourcerange.file)
        except ClassNotFound:
            lexer = TextLexer()
        lexer.add_filter(
            VisibleWhitespaceFilter(spaces=True,
                                    tabs=True,
                                    tabsize=SpacingHelper.DEFAULT_TAB_WIDTH))
        # highlight() combines lexer and formatter to output a ``str``
        # object.
        printed_chars = 0
        if i == sourcerange.start.line and sourcerange.start.column:
            console_printer.print(highlight_text(
                no_color, line[:sourcerange.start.column - 1],
                BackgroundMessageStyle, lexer),
                                  end='')

            printed_chars = sourcerange.start.column - 1

        if i == sourcerange.end.line and sourcerange.end.column:
            console_printer.print(highlight_text(
                no_color, line[printed_chars:sourcerange.end.column - 1],
                BackgroundSourceRangeStyle, lexer),
                                  end='')

            console_printer.print(highlight_text(
                no_color, line[sourcerange.end.column - 1:],
                BackgroundSourceRangeStyle, lexer),
                                  end='')
            console_printer.print('')
        else:
            console_printer.print(highlight_text(no_color,
                                                 line[printed_chars:],
                                                 BackgroundMessageStyle,
                                                 lexer),
                                  end='')
            console_printer.print('')
Example #38
0
 def render_listing(in_name,
                    out_name,
                    input_folder,
                    output_folder,
                    folders=[],
                    files=[]):
     if in_name:
         with open(in_name, 'r') as fd:
             try:
                 lexer = get_lexer_for_filename(in_name)
             except:
                 lexer = TextLexer()
             code = highlight(
                 fd.read(), lexer,
                 HtmlFormatter(cssclass='code',
                               linenos="table",
                               nowrap=False,
                               lineanchors=utils.slugify(in_name,
                                                         force=True),
                               anchorlinenos=True))
         # the pygments highlighter uses <div class="codehilite"><pre>
         # for code.  We switch it to reST's <pre class="code">.
         code = CODERE.sub('<pre class="code literal-block">\\1</pre>',
                           code)
         title = os.path.basename(in_name)
     else:
         code = ''
         title = os.path.split(os.path.dirname(out_name))[1]
     crumbs = utils.get_crumbs(os.path.relpath(
         out_name, self.kw['output_folder']),
                               is_file=True)
     permalink = self.site.link(
         'listing',
         os.path.join(
             input_folder,
             os.path.relpath(
                 out_name[:-5],  # remove '.html'
                 os.path.join(self.kw['output_folder'],
                              output_folder))))
     if self.site.config['COPY_SOURCES']:
         source_link = permalink[:-5]  # remove '.html'
     else:
         source_link = None
     context = {
         'code': code,
         'title': title,
         'crumbs': crumbs,
         'permalink': permalink,
         'lang': self.kw['default_lang'],
         'folders': natsort.natsorted(folders),
         'files': natsort.natsorted(files),
         'description': title,
         'source_link': source_link,
     }
     self.site.render_template('listing.tmpl', out_name, context)
Example #39
0
def highlight_url(request):
    """Retrieves a generated source file and syntax highlights it

    Some stack frames are functions that are generated during the build process. Thus the stack
    frame itself isn't particularly helpful since the generated source file isn't available
    anywhere.

    Bug 1389217 and friends adjust the build process to capture the generated source and push it to
    S3.

    This view takes a URL for the generated source, retrieves it from S3, runs it through syntax
    highlighting, and returns that as an HTML page.

    NOTE(willkg): The output of pygments has CSS in the page, but no JS.

    """
    url = request.GET.get("url")

    if not url:
        return http.HttpResponseBadRequest("No url specified.")

    parsed = urlparse(url)

    # We will only pull urls from allowed hosts
    if parsed.netloc not in ALLOWED_SOURCE_HOSTS:
        return http.HttpResponseForbidden("Document at disallowed host.")

    if parsed.scheme not in ALLOWED_SCHEMES:
        return http.HttpResponseForbidden("Document at disallowed scheme.")

    resp = requests.get(url)
    if resp.status_code != 200:
        return http.HttpResponseNotFound("Document at URL does not exist.")

    filename = parsed.path.split("/")[-1]
    if filename.endswith(".h"):
        # Pygments will default to C which we don't want, so override it here.
        lexer = CppLexer()
    else:
        lexer = get_lexer_for_filename(filename)

    lines = []
    if request.GET.get("line"):
        try:
            lines = [int(request.GET.get("line"))]
        except ValueError:
            pass

    formatter = HtmlFormatter(full=True,
                              title=parsed.path,
                              linenos="table",
                              lineanchors="L",
                              hl_lines=lines)
    return http.HttpResponse(highlight(resp.text, lexer, formatter),
                             content_type="text/html")
Example #40
0
def preview_cheat(cheat):
    filepath = CHEAT_DIRECTORY+cheat
    with open(filepath, "r") as f:
        file_content = f.read()
    try:
        lexer = lexers.get_lexer_for_filename(filepath, stripnl=False, stripall=False)
    except ClassNotFound:
        lexer = lexers.get_lexer_by_name("text", stripnl=False, stripall=False)
    formatter = formatters.TerminalFormatter(bg="dark")  # dark or light
    highlighted_file_content = highlight(file_content, lexer, formatter)
    return highlighted_file_content
Example #41
0
def highlight_data(code: str, filename: str, aliases: Optional[Dict[str, str]] = None) -> Optional[str]:
    if aliases:
        base, ext = os.path.splitext(filename)
        alias = aliases.get(ext[1:])
        if alias is not None:
            filename = base + '.' + alias
    try:
        lexer = get_lexer_for_filename(filename, stripnl=False)
    except ClassNotFound:
        return None
    return cast(str, highlight(code, lexer, formatter))
Example #42
0
def prettyprint(path):
    try:
        lexer = get_lexer_for_filename(path)
    except ClassNotFound:
        lexer = _custom_guess(path)

    content = open(path).read()
    formatter = HtmlFormatter()
    return IPython.display.HTML('<style type="text/css">{}</style>{}'.format(
        formatter.get_style_defs('.highlight'),
        highlight(content, lexer, formatter)))
Example #43
0
def snippet_api(request):
    content = request.POST.get('content', '').strip()
    lexer = request.POST.get('lexer', LEXER_DEFAULT).strip()
    filename = request.POST.get('filename', '').strip()
    expires = request.POST.get('expires', '').strip()
    format = request.POST.get('format', 'default').strip()

    if not content:
        return HttpResponseBadRequest('No content given')

    # We need at least a lexer or a filename
    if not lexer and not filename:
        return HttpResponseBadRequest('No lexer or filename given. Unable to '
            'determine a highlight. Valid lexers are: %s' % ', '.join(LEXER_KEYS))

    # A lexer is given, check if its valid at all
    if lexer and lexer not in LEXER_KEYS:
        return HttpResponseBadRequest('Invalid lexer "%s" given. Valid lexers are: %s' % (
            lexer, ', '.join(LEXER_KEYS)))

    # No lexer is given, but we have a filename, try to get the lexer out of it.
    # In case Pygments cannot determine the lexer of the filename, we fallback
    # to 'plain' code.
    if not lexer and filename:
        try:
            lexer_cls = get_lexer_for_filename(filename)
            lexer = lexer_cls.aliases[0]
        except (ClassNotFound, IndexError):
            lexer = PLAIN_CODE

    if expires:
        expire_options = [str(i) for i in dict(EXPIRE_CHOICES).keys()]
        if not expires in expire_options:
            return HttpResponseBadRequest('Invalid expire choice "{}" given. '
                'Valid values are: {}'.format(expires, ', '.join(expire_options)))
        expires, expire_type = get_expire_values(expires)
    else:
        expires = datetime.datetime.now() + datetime.timedelta(seconds=60 * 60 * 24 * 30)
        expire_type = Snippet.EXPIRE_TIME

    s = Snippet.objects.create(
        content=content,
        lexer=lexer,
        expires=expires,
        expire_type=expire_type,
    )
    s.save()

    if not format in FORMAT_MAPPING:
        response = _format_default(s)
    else:
        response = FORMAT_MAPPING[format](s)

    return HttpResponse(response)
Example #44
0
File: views.py Project: RANUX/ASPTC
def getcode(request):
    solution = Solution.objects.get(id=request.POST['solution'])
    solution_path = '{}/{}/{}'.format(MEDIA_ROOT, solution.user.login,
                                          solution.file.name[:solution.file.name.find('.')] + '_{}'.format(request.POST['solution'])
                                          + solution.file.name[solution.file.name.find('.'):])

    with open(solution_path) as file:
        solution_code = file.read()
    lexer_for_code = get_lexer_for_filename(solution.file.name)

    return HttpResponse(highlight(solution_code, lexer_for_code, HtmlFormatter()))
Example #45
0
def get_lexer_for_file(filename):
    ext = os.path.splitext(filename)[1]
    try:
        lexer = lexers.get_lexer_for_filename(filename)
    except lexers.ClassNotFound:
        if ext == '.kv':
            lexer = KivyLexer()
        else:
            lexer = lexers.TextLexer()
    # print('found {} for {}'.format(lexer, filename))
    return lexer
Example #46
0
    def _apply_pygments(self, data, filename):
        """Applies Pygments syntax-highlighting to a file's contents.

        The resulting HTML will be returned as a list of lines.
        """
        lexer = get_lexer_for_filename(filename,
                                       stripnl=False,
                                       encoding='utf-8')
        lexer.add_filter('codetagify')

        return highlight(data, lexer, NoWrapperHtmlFormatter()).splitlines()
Example #47
0
    def html_src(self):
        if not self.src:
            return ''

        try:
            return highlight(
                self.src,
                get_lexer_for_filename(self.file.name.replace('.s', '.asm')),
                HtmlFormatter())
        except:
            return '<pre>' + self.src + '</pre>'
Example #48
0
    def update_all(self):
        """
        Colorize all text in the widget.
        """

        lexer = None
        try:
            lexer = get_lexer_for_filename(self.area.filename, '')
        except Exception:
            return
        self.tag_tokens(lexer, '1.0', 'end')
Example #49
0
    def test_render(self):
        code = '#!/usr/bin/env python\n'\
               'print "Hello, world!"'

        formatter = utils.LineAnchorCodeHtmlFormatter(cssclass='codehilite',
                                                      linenos='inline')
        lexer = get_lexer_for_filename("some.py", encoding='chardet')
        hl_code = highlight(code, lexer, formatter)
        assert '<div class="codehilite">' in hl_code
        assert '<div id="l1" class="code_block">' in hl_code
        assert_in('<span class="lineno">1 </span>', hl_code)
Example #50
0
    def update_cache(self):
        try:
            if self.language == "" and self.filename is not None:
                lexer = get_lexer_for_filename(self.filename, stripall=True)
            else:
                lexer = get_lexer_by_name(self.language, stripall=True)
        except pygments.util.ClassNotFound:
            lexer = get_lexer_by_name("text")

        self.text_cache = highlight(self.text, lexer,
                                    HtmlFormatter(linenos=True))
Example #51
0
    def format(self):
        try:
            self.lexer = get_lexer_for_filename(self.file)
        except Exception as e:
            if self.language !=None:
                self.lexer = get_lexer_by_name(self.language)
            else:
                raise Exception("no suitable lexer found")

        formatter = TerminalFormatter()

        return highlight(self._read(),self.lexer,formatter)
Example #52
0
    def highlighter_for_file(self, filename):
        if self.formatter is None:  # pragma: no cover
            return NullHighlighting.highlighter_for_file(filename)

        import pygments
        from pygments.lexers import get_lexer_for_filename
        from jinja2 import Markup
        try:
            lexer = get_lexer_for_filename(filename, None, stripnl=False)
            return lambda code: [Markup(line.rstrip()) for line in pygments.highlight(code, lexer, self.formatter).split("\n")]
        except pygments.util.ClassNotFound:  # pragma: no cover
            return NullHighlighting.highlighter_for_file(filename)
Example #53
0
def highlight_data(code, filename, aliases=None):
    if aliases:
        base, ext = os.path.splitext(filename)
        alias = aliases.get(ext[1:])
        if alias is not None:
            filename = base + '.' + alias
    try:
        lexer = get_lexer_for_filename(filename, stripnl=False)
    except ClassNotFound:
        pass
    else:
        return highlight(code, lexer, formatter)
Example #54
0
def test_example_files():
    global STATS
    STATS = {}
    outdir = os.path.join(TESTDIR, 'examplefiles', 'output')
    if STORE_OUTPUT and not os.path.isdir(outdir):
        os.makedirs(outdir)
    for fn in os.listdir(os.path.join(TESTDIR, 'examplefiles')):
        if fn.startswith('.') or fn.endswith('#'):
            continue

        absfn = os.path.join(TESTDIR, 'examplefiles', fn)
        if not os.path.isfile(absfn):
            continue

        extension = os.getenv('TEST_EXT')
        if extension and not absfn.endswith(extension):
            continue

        print(absfn)
        with open(absfn, 'rb') as f:
            code = f.read()
        try:
            code = code.decode('utf-8')
        except UnicodeError:
            code = code.decode('latin1')

        lx = None
        if '_' in fn:
            try:
                lx = get_lexer_by_name(fn.split('_')[0])
            except ClassNotFound:
                pass
        if lx is None:
            try:
                lx = get_lexer_for_filename(absfn, code=code)
            except ClassNotFound:
                raise AssertionError('file %r has no registered extension, '
                                     'nor is of the form <lexer>_filename '
                                     'for overriding, thus no lexer found.'
                                     % fn)
        yield check_lexer, lx, fn

    N = 7
    stats = list(STATS.items())
    stats.sort(key=lambda x: x[1][1])
    print('\nExample files that took longest absolute time:')
    for fn, t in stats[-N:]:
        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t))
    print()
    stats.sort(key=lambda x: x[1][2])
    print('\nExample files that took longest relative time:')
    for fn, t in stats[-N:]:
        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t))
Example #55
0
def get_language_for(filename, mimetype=None, default='text'):
    """Get language for filename and mimetype"""
    try:
        if mimetype is None:
            raise ClassNotFound()
        lexer = get_lexer_for_mimetype(mimetype)
    except ClassNotFound:
        try:
            lexer = get_lexer_for_filename(filename)
        except ClassNotFound:
            return default
    return get_known_alias(lexer, default)
Example #56
0
def generate(inpath, outpath=None, style='autumn'):
    outpath = outpath or inpath.rsplit('.')[0] + '.png'
    with open(inpath) as infile:
        with open(outpath, 'w') as outfile:
            lexer = get_lexer_for_filename(inpath)
            formatter = ImageFormatter(style=style,
                                       line_numbers=False,
                                       font_size=42,
                                       font_name='dejavu sans mono')
            highlight(infile.read(), lexer, formatter, outfile)
    resize(outpath)
    return outpath
Example #57
0
    def __getLexerByFileName(self, params_dict):
        from pygments.lexers import ClassNotFound
        from pygments.lexers import get_lexer_for_filename

        fname = getFileName(params_dict[FILE_PARAM_NAME])

        try:
            lexer = get_lexer_for_filename(fname, stripall=True)
        except ClassNotFound:
            lexer = self.__getDefaultLexer()

        return lexer
Example #58
0
    def _sample_source(self, parts):
        """
        Return the HTML rendering of a source file
        """

        bn = parts[-1]
        if not bn.startswith('source--') or not bn.endswith('.html'):
            # The link isn't of the form that this plugin generated, and
            # therefore cannot refer to a valid source file.
            raise TrackError("Path doesn't refer to source file: %s" % path)

        repos = self.env.get_repository()
        try:
            sourcefn = parts[-1].replace('-', '/')[8:-5]

            suffix = os.path.splitext(sourcefn)[-1]
            if suffix not in gSourceSuffix:
                # File is not one of the types of sources we want to deal
                # with.
                raise TrackError("Path doesn't refer to source file: %s" %
                                 path)

            fullpath = '/'.join(parts[1:-1] + [sourcefn])
            path = '/trunk/pyobjc/' + parts[0] + '/Examples/' + fullpath

            node = repos.get_node(path)
            if node is None:
                raise TrackError("Couldn't find repository path: %s" % path)

            src = node.get_content().read()

            sources = []
            rootnode = repos.get_node(os.path.dirname(path))
            self._gather_sources(sources, rootnode, base=None)
            sources.sort(key=lambda x: x[-1])
            sources = [(a, b, (((b == parts[-1]) or None) and 'selected'))
                       for a, b in sources]

            lexer = get_lexer_for_filename(sourcefn)
            formatter = HtmlFormatter(linenoes=False, cssclass='source')
            body = highlight(src, lexer, formatter)
            style = formatter.get_style_defs()

            return dict(
                filename=sourcefn,
                zipname=os.path.basename(parts[-2]) + '.zip',
                body=body,
                style=style,
                sources=sources,
            )

        finally:
            repos.close()
Example #59
0
def filter_code(code, filename, language=None):
    """Tokenize and filter a code document. Replace variable names with
    V, function names with F, object names with O, and strings with S.
    Return the filtered document and a list of offsets indicating how
    many characters were removed by filtering at each index in the
    resulting document where filtering occured (this is used later to
    highlight the original code using plagiarism detection results on
    the filtered code)
    """
    try:
        if language is not None:
            lexer = lexers.get_lexer_by_name(language)
        else:
            lexer = lexers.get_lexer_for_filename(filename)
        tokens = lexer.get_tokens(code)
    except pygments.util.ClassNotFound:
        logging.warning(f"{filename} not tokenized: unknown file extension")
        return code, np.array([])

    if lexer == pygments.lexers.TextLexer:
        logging.warning(f"did not tokenize plaintext file {filename}")
        return code, np.array([])

    out_code = ""
    offset = 0
    offsets = [[0,0]]
    for t in tokens:
        if t[0] == token.Name:
            out_code += "V"
            offsets.append([len(out_code) - 1, offset])
            offset += len(t[1]) - 1
        elif t[0] in token.Name.Function:
            out_code += "F"
            offsets.append([len(out_code) - 1, offset])
            offset += len(t[1]) - 1
        elif t[0] in token.Name.Class:
            out_code += "O"
            offsets.append([len(out_code) - 1, len(t[1]) - 1])
            offset += len(t[1]) - 1
        elif t[0] == token.Text or t[0] in token.Comment:
            offsets.append([len(out_code) - 1, offset])
            offset += len(t[1])
        elif t[0] in token.Literal.String:
            if t[1] == "'" or t[1] == '"':
                out_code += '"'
            else:
                out_code += "S"
                offsets.append([len(out_code) - 1, offset])
                offset += len(t[1]) - 1
        else:
            out_code += t[1]
    return out_code, np.array(offsets)
def preprocessFile(path, basePath, retainLine):
    """
    Perform preprocessing on the lexer.
    Parameters:
    -----------
    path
    basePath
    retainLine - do we keep the original line numbers or not
    Returns:
    -----------
    (curProject - The current project or corpora we are in
     curFile - The corresponding original file path
     lexedWoComments - the Pygments token list with preprocessing
     OR (Not yet implemented) Something for English?,
     language - the language of this lexer
     fileErrorCount - count of observed error tokens from Pygments)
    """
    if (True):  #TODO is a programming language.
        components = path.split(".")
        fileContents = ""
        fileContents = ''.join(open(path, 'r').readlines())

        lexer = get_lexer_for_filename(path)
        tokens = lex(fileContents, lexer)  # returns a generator of tuples
        tokensList = list(tokens)
        language = languageForLexer(lexer)
        (curProject, curFile) = getProjectAndFilename(path, basePath)

        #Debug: what does the original token set look like
        #print(tokensList)
        #quit()

        if (retainLine):
            lexedWoComments = reduceToNewLine(tokensList, Token.Comment)
            lexedWoComments = reduceToNewLine(lexedWoComments,
                                              Token.Literal.String.Doc)
        else:
            # Strip comments and alter strings
            lexedWoComments = tokensExceptTokenType(tokensList, Token.Comment)
            lexedWoComments = tokensExceptTokenType(lexedWoComments,
                                                    Token.Literal.String.Doc)
        beforeError = len(lexedWoComments)
        #Remove Things than didn't lex properly
        lexedWoComments = tokensExceptTokenType(lexedWoComments, Token.Error)
        fileErrorCount = beforeError - len(lexedWoComments)

        #Alter the pygments lexer types to be more comparable between our
        #languages
        lexedWoComments = fixTypes(lexedWoComments, language)
        lexedWoComments = convertNamespaceTokens(lexedWoComments, language)

    return (curProject, curFile, lexedWoComments, language, fileErrorCount)