Beispiel #1
0
    def _add_trailing_semicolon_to_js(cls, string):
        return string  # i give up on this issue. adding trailing semicolon is not a trivial job

        lexer = JavascriptLexer(stripnl=False, stripall=False, ensurenl=False)
        clean_string = []
        tokens_values = []
        for token_value in lexer.get_tokens(string + "\n"):
            tokens_values.append(token_value)
        need_semicolon = True
        for (token, value) in reversed(tokens_values):
            if value == "":
                continue
            if LexerToken.Text and value[0] == "\n":
                need_semicolon = True
            elif token == LexerToken.Punctuation:
                need_semicolon = False
            elif token == LexerToken.JavaComment.Single:
                pass
            elif token == LexerToken.Comment.Multiline:
                pass
            else:
                if need_semicolon:
                    need_semicolon = False
                    value += ";"
            clean_string.append(value)
        return "".join(reversed(clean_string))[:-1]
Beispiel #2
0
 def _js_comments_endline2block(cls, string):
     lexer = JavascriptLexer(stripnl=False, stripall=False, ensurenl=False)
     string += "\n"
     string = "".join([(('/* ' + value.strip(' \t\n\r/') + ' */' +
                         ('' if value[-1] != '\n' else '\n'))
                        if token == LexerToken.Comment.Single else value)
                       for (token, value) in lexer.get_tokens(string)])
     return string[:-1]
Beispiel #3
0
    def createLexers(self):

        lex = {}
        lex['.c'] = CFamilyLexer()
        lex['.h'] = CFamilyLexer()
        lex['.cpp'] = CppLexer()
        lex['.hpp'] = CppLexer()
        lex['.css'] = CssLexer()
        lex['.sass'] = SassLexer()
        lex['.yaml'] = YamlLexer()
        lex['.yml'] = YamlLexer()
        lex['.json'] = JsonLexer()
        lex['.cs'] = CSharpLexer()
        lex['.fs'] = FSharpLexer()
        lex['.e'] = EiffelLexer()
        lex['.erl'] = ErlangLexer()
        lex['.hrl'] = ErlangLexer()
        lex['.es'] = ErlangLexer()
        lex['.f03'] = FortranLexer()
        lex['.f90'] = FortranLexer()
        lex['.F03'] = FortranLexer()
        lex['.F90'] = FortranLexer()
        lex['.go'] = GoLexer()
        lex['.hs'] = HaskellLexer()
        lex['.v'] = VerilogLexer()
        lex['.vhdl'] = VhdlLexer()
        lex['.vhd'] = VhdlLexer()
        lex['.html'] = HtmlLexer()
        lex['.htm'] = HtmlLexer()
        lex['.xhtml'] = HtmlLexer()
        lex['.xml'] = XmlLexer()
        lex['.js'] = JavascriptLexer()
        lex['.tex'] = TypeScriptLexer()
        lex['.coffee'] = CoffeeScriptLexer()
        lex['.java'] = JavaLexer()
        lex['.scala'] = ScalaLexer()
        lex['.kt'] = KotlinLexer()
        lex['.ktm'] = KotlinLexer()
        lex['.kts'] = KotlinLexer()
        lex['.lisp'] = CommonLispLexer()
        lex['make'] = MakefileLexer()
        lex['Make'] = MakefileLexer()
        lex['CMake'] = CMakeLexer()
        lex['cmake'] = CMakeLexer()
        lex['.m'] = MatlabLexer()
        lex['.mat'] = MatlabLexer()
        lex['.dpr'] = DelphiLexer()
        lex['.perl'] = PerlLexer()
        lex['.php'] = PhpLexer()
        lex['.pr'] = PrologLexer()
        lex['.py'] = Python3Lexer()
        lex['.rb'] = RubyLexer()
        lex['.sh'] = BashLexer()
        lex['.sql'] = MySqlLexer()
        lex['.mysql'] = MySqlLexer()
        lex['.tcl'] = TclLexer()
        lex['.awk'] = AwkLexer()

        return lex
Beispiel #4
0
 def _highlight(self, start_pos, text):
     """テキストをハイライトする."""
     self.text.mark_set('range_start', start_pos)
     for token, content in lex(text, JavascriptLexer()):
         print(content, token)
         self.text.mark_set('range_end',
                            'range_start+{0}c'.format(len(content)))
         self.text.tag_add(str(token), 'range_start', 'range_end')
         self.text.mark_set('range_start', 'range_end')
Beispiel #5
0
def jsparser(path):
    print "Reading data..."
    minified_data = ['']
    label_data = []
    t1 = time.time()
    filecounter = 0
    excluded = {'test', 'tests', '__tests__' 'locale', 'locales', 'ngLocale'}
    point = JavascriptLexer()

    for root, dirs, files in os.walk(path, topdown=True):
        dirs[:] = [d for d in dirs if d not in excluded]  # exclude test directories
        for name in files:
            if name.endswith(".js"):
                blob = FileBlob(os.path.join(root, name))  # Linguist file checking
                if not (blob.is_binary or blob.is_generated):
                    filecounter += 1
                    with open(os.path.join(root, name)) as js_file:
                        data = js_file.read()
                        minidata = '\xff' + jsmin(data) + '\xfe'
                        labels = []
                        for token in point.get_tokens_unprocessed(minidata):
                            (index, label, seq) = token
                            for i in range(len(seq)):
                                labels.append(label)
                        minified_data.append(minidata)
                        label_data.append(labels)
    minified_data = ''.join(minified_data)

    t2 = time.time()
    print "Created the dataset in: %f milliseconds from %d files" % ((t2 - t1) * 1000., filecounter)

    chars = list(set(minified_data))
    data_size, vocab_size = len(minified_data), len(chars)
    print 'data has %d characters, %d unique.' % (data_size, vocab_size)

    return minified_data
Beispiel #6
0
    def __init__(self, language="python"):
        """

        :param language: python, javascript, java or cpp
        """
        self.language = language
        if self.language == "python":
            self.lexer = PythonLexer()
        elif self.language == "javascript":
            self.lexer = JavascriptLexer()
        elif self.language == "cpp":
            self.lexer = CppLexer()
        elif self.language == "java":
            self.lexer = JavaLexer()
        else:
            raise NotImplementedError
Beispiel #7
0
 def strip_comments_from_js(cls, string, flags=FLAG_STRIP_COMMENTS_JS):
     strip_comments = []
     if cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_ENDLINE):
         strip_comments.append(LexerToken.Comment.Single)
     if cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_BLOCK):
         strip_comments.append(LexerToken.Comment.Multiline)
     if strip_comments:
         lexer = JavascriptLexer(stripnl=False,
                                 stripall=False,
                                 ensurenl=False)
         string += "\n"
         string = cls._strip_comments(
             string, lexer, strip_comments,
             cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_KEEP_FIRST))
         string = string[:-1]
     return string
Beispiel #8
0
 def POST(self):
     data = web.input()
     code = data.code
     language = data.lang
     if language == 'python':
         from pygments.lexers.python import PythonLexer
         lexer = PythonLexer()
     elif language == 'php':
         from pygments.lexers.php import PhpLexer
         lexer = PhpLexer()
     elif language == 'java':
         from pygments.lexers.jvm import JavaLexer
         lexer = JavaLexer()
     elif language == 'javascript':
         from pygments.lexers.javascript import JavascriptLexer
         lexer = JavascriptLexer()
     elif language == 'html':
         from pygments.lexers.html import HtmlLexer
         lexer = HtmlLexer()
     elif language == 'cpp':
         from pygments.lexers.c_cpp import CppLexer
         lexer = CppLexer()
     elif language == 'shell':
         from pygments.lexers.shell import ShellSessionLexer
         lexer = ShellSessionLexer()
     elif language == 'matlab':
         from pygments.lexers.matlab import MatlabLexer
         lexer = MatlabLexer()
     elif language == 'ruby':
         from pygments.lexers.ruby import RubyLexer
         lexer = RubyLexer()
     elif language == 'r':
         from pygments.lexers.r import RConsoleLexer
         lexer = RConsoleLexer()
     elif language == 'lisp':
         from pygments.lexers.lisp import SchemeLexer
         lexer = SchemeLexer()
     elif language == 'go':
         from pygments.lexers.go import GoLexer
         lexer = GoLexer()
     formatter = html.HtmlFormatter(linenos=False,
                                    encoding='utf-8',
                                    nowrap=False)
     hilighted_snippet = highlight(code, lexer, formatter)
     #return hilighted
     #return render.submit()
     return render.result(hilighted_snippet)
Beispiel #9
0
    def format(self, node: "Node") -> Text:
        """
        Formats the node into JSON5. If colors were specified in the
        constructor, then it's also where the coloration is added before being
        returned.
        """

        out = "\n".join(map(self._line, self._format(node)))
        formatter = None

        if self.colors:
            formatter = get_formatter_by_name(self.colors)

        if formatter:
            out = highlight(out, JavascriptLexer(), formatter)

        return out
Beispiel #10
0
 def __init__(self, repl):
     self._repl = repl
     self._lexer = JavascriptLexer()
Beispiel #11
0
    class FridaCompleter(Completer):
        def __init__(self, repl):
            self._repl = repl
            self._lexer = JavascriptLexer()

        def get_completions(self, document, complete_event):
            prefix = document.text_before_cursor

            magic = len(prefix) > 0 and prefix[0] == '%' and not any(
                map(lambda c: c.isspace(), prefix))

            tokens = list(self._lexer.get_tokens(prefix))[:-1]

            # 0.toString() is invalid syntax,
            # but pygments doesn't seem to know that
            for i in range(len(tokens) - 1):
                if tokens[i][0] == Token.Literal.Number.Integer \
                        and tokens[i + 1][0] == Token.Punctuation and tokens[i + 1][1] == '.':
                    tokens[i] = (Token.Literal.Number.Float,
                                 tokens[i][1] + tokens[i + 1][1])
                    del tokens[i + 1]

            before_dot = ''
            after_dot = ''
            encountered_dot = False
            for t in tokens[::-1]:
                if t[0] in Token.Name.subtypes:
                    before_dot = t[1] + before_dot
                elif t[0] == Token.Punctuation and t[1] == '.':
                    before_dot = '.' + before_dot
                    if not encountered_dot:
                        encountered_dot = True
                        after_dot = before_dot[1:]
                        before_dot = ''
                else:
                    if encountered_dot:
                        # The value/contents of the string, number or array doesn't matter,
                        # so we just use the simplest value with that type
                        if t[0] in Token.Literal.String.subtypes:
                            before_dot = '""' + before_dot
                        elif t[0] in Token.Literal.Number.subtypes:
                            before_dot = '0.0' + before_dot
                        elif t[0] == Token.Punctuation and t[1] == ']':
                            before_dot = '[]' + before_dot

                    break

            try:
                if encountered_dot:
                    if before_dot == "" or before_dot.endswith("."):
                        return
                    for key in self._get_keys("""\
                            (() => {
                                let o;
                                try {
                                    o = """ + before_dot + """;
                                } catch (e) {
                                    return [];
                                }

                                if (o === undefined || o === null)
                                    return [];

                                let k = Object.getOwnPropertyNames(o);

                                let p;
                                if (typeof o !== 'object')
                                    p = o.__proto__;
                                else
                                    p = Object.getPrototypeOf(o);
                                if (p !== null && p !== undefined)
                                    k = k.concat(Object.getOwnPropertyNames(p));

                                return k;
                            })();"""):
                        if self._pattern_matches(after_dot, key):
                            yield Completion(key, -len(after_dot))
                else:
                    if magic:
                        keys = self._repl._magic_command_args.keys()
                    else:
                        keys = self._get_keys(
                            "Object.getOwnPropertyNames(this)")
                    for key in keys:
                        if not self._pattern_matches(before_dot, key) or (
                                key.startswith('_') and before_dot == ''):
                            continue
                        yield Completion(key, -len(before_dot))
            except frida.InvalidOperationError:
                pass
            except frida.OperationCancelledError:
                pass
            except Exception as e:
                self._repl._print(e)

        def _get_keys(self, code):
            repl = self._repl
            with repl._reactor.io_cancellable:
                (t, value) = repl._evaluate(code)

            if t == 'error':
                return []

            return sorted(filter(self._is_valid_name, set(value)))

        def _is_valid_name(self, name):
            tokens = list(self._lexer.get_tokens(name))
            return len(tokens) == 2 and tokens[0][0] in Token.Name.subtypes

        def _pattern_matches(self, pattern, text):
            return re.search(re.escape(pattern), text, re.IGNORECASE) != None
def highlight_json_example(example_text: str) -> str:
    """Filter. Return an highlighted version of the provided JSON text"""
    return highlight(example_text, JavascriptLexer(), HtmlFormatter())
Beispiel #13
0
from pygments.lexers.javascript import JavascriptLexer
from pygments.token import Token

from utils import build_labeled_model, temp

typoes = {
    Token.Literal.String.Regex: 'r',
    Token.Keyword: 'k',
    Token.Literal.String: 's',
    Token.Punctuation: 'p',
    Token.Literal.Number: 'n',
    Token.Operator: 'o',
    Token.Text: 'p',
    Token.Name: 'i'
}
point = JavascriptLexer()

# TODO: Add temperature
parser = argparse.ArgumentParser(
    description='Sample a trained model with labels')
parser.add_argument('filepath', help='filepath to model')
parser.add_argument('-s', '--seed', help='seed input', type=str, default='')
parser.add_argument('-t',
                    '--temperature',
                    help='set sampling temperature',
                    type=float,
                    default=0.85)
parser.add_argument('-l',
                    '--length',
                    help='set output length',
                    type=int,
Beispiel #14
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-d', '--data_source', type=str, required=True)
    p_args = parser.parse_args()

    db = TinyDB(os.path.abspath(p_args.data_source))

    eq_table = db.table('equivalence')
    mt_table = db.table('mutants')

    mutants = []

    for mutant in mt_table.all():
        if len(eq_table.search(
            (Query().name == mutant['name']) &
            (Query().compile_error == False)
        )) > 0:
            mutants.append(mutant['name'])

    valid_configurations = eq_table.search(Query().invalid_configuration == False)
    configurations = []
    operators = {}

    for configuration in valid_configurations:
        if configuration['name'] in mutants and not configuration['compile_error']:
            configurations.append(configuration)

    for conf in configurations:

        if conf['operator'] not in operators.keys():
            operators[conf['operator']] = {
                'operator': conf['operator'],
                'mutants_total': len(mt_table.search(Query().operator == conf['operator'])),
                'mutants': [],
                'mutants_equivalent': [],
                'mutants_not_equivalent': []
            }

        if conf['name'] not in operators[conf['operator']]['mutants']:
            operators[conf['operator']]['mutants'].append(conf['name'])

        if conf['useless']:
            if conf['name'] not in operators[conf['operator']]['mutants_equivalent']:
                operators[conf['operator']]['mutants_equivalent'].append(conf['name'])
        else:
            if conf['name'] not in operators[conf['operator']]['mutants_not_equivalent']:
                operators[conf['operator']]['mutants_not_equivalent'].append(conf['name'])

    partially_equivalent = 0
    totally_equivalent = 0
    totally_not_equivalent = 0

    for op_name in operators.keys():
        mutants_partially_equivalent = []
        mutants_totally_equivalent = []
        mutants_totally_not_equivalent = []

        op = operators[op_name]
        for mutant in op['mutants']:
            if mutant in op['mutants_not_equivalent'] and mutant in op['mutants_equivalent']:
                mutants_partially_equivalent.append(mutant)
            elif mutant in op['mutants_not_equivalent'] and mutant not in op['mutants_equivalent']:
                mutants_totally_not_equivalent.append(mutant)
            elif mutant not in op['mutants_not_equivalent'] and mutant in op['mutants_equivalent']:
                mutants_totally_equivalent.append(mutant)

        op['mutants_partially_equivalent'] = len(mutants_partially_equivalent)
        op['mutants_totally_not_equivalent'] = len(mutants_totally_not_equivalent)
        op['mutants_totally_equivalent'] = len(mutants_totally_equivalent)

        partially_equivalent += len(mutants_partially_equivalent)
        totally_equivalent += len(mutants_totally_equivalent)
        totally_not_equivalent += len(mutants_totally_not_equivalent)

        print(str(op['operator']) + ',' + str(op['mutants_total']) + ','
              + str(len(mutants_partially_equivalent)) + ', ,' +
              str(len(mutants_totally_equivalent)) + ', ,' +
              str(len(mutants_totally_not_equivalent)) + ', ,')

    print(highlight(
        code=json.dumps(operators, indent=2, sort_keys=True),
        lexer=JavascriptLexer(),
        formatter=TerminalFormatter()
    ))

    macros = len(db.search(Query().type == 'config')[0]['macros'])

    print(str(macros) + ',' + str(len(mt_table.all())) + ',' + str(partially_equivalent)
          + ', ,' + str(totally_equivalent) + ', ,' + str(totally_not_equivalent)
          + ', ,')

    print(len(eq_table.all()))
    print(len(eq_table.search((Query().compile_error == False))))
    print(len(eq_table.search((Query().compile_error == True))))

    mutants_with_compilation_error = []
    mutants_without_compilation_error = []
    mutants_without_compilation_error_2 = []
    mutants_without_compilation_error_3 = []

    for mutant in mt_table.all():
        if len(eq_table.search(
            (Query().name == mutant['name']) &
            (Query().compile_error == True) &
            (Query().invalid_configuration == False)
        )) > 0:
            mutants_with_compilation_error.append(mutant['name'])

        if len(eq_table.search(
                (Query().name == mutant['name']) &
                (Query().compile_error == True) &
                (Query().invalid_configuration == False)
        )) == 0:
            mutants_without_compilation_error.append(mutant['name'])

        if len(eq_table.search(
                (Query().name == mutant['name']) &
                (Query().compile_error == True) &
                (Query().invalid_configuration == True)
        )) > 0:
            mutants_without_compilation_error_2.append(mutant['name'])

            if len(eq_table.search(
                    (Query().name == mutant['name']) &
                    (Query().compile_error == False) &
                    (Query().invalid_configuration == True)
            )) > 0:
                mutants_without_compilation_error_2.append(mutant['name'])

    print("------------------")
    print(len(mutants_with_compilation_error))
    print(len(mutants_without_compilation_error))
    print(len(mutants_without_compilation_error_2))
    print(len(mutants_without_compilation_error_3))
    print(len(mt_table.all()))
def jsoncolorize(value):
    return highlight(value, JavascriptLexer(), TerminalFormatter())
def lexer():
    yield JavascriptLexer()
Beispiel #17
0
def main():
    config = Config()

    parser = argparse.ArgumentParser()

    parser.add_argument('-s', '--source_file', type=str, required=True)
    parser.add_argument('-M', '--mutants_dir', type=str, required=True)
    parser.add_argument('-O', '--output_dir', type=str, required=True)
    parser.add_argument('-P', '--gcc_params', type=str, required=False)
    parser.add_argument('-I', '--includes', nargs='*', required=False)
    parser.add_argument('-D', '--defines', nargs='*', required=False)
    parser.add_argument('-U', '--undefines', nargs='*', required=False)
    parser.add_argument('-v', '--verbose', default=False, action='store_true')
    parser.add_argument('-E', '--debug', default=False, action='store_true')
    parser.add_argument('--disable-impact-analysis', default=False, action='store_true')
    parser.add_argument('--no-check-duplicates', default=False, action='store_true')

    p_args = parser.parse_args()

    config.output_dir = os.path.abspath(p_args.output_dir)
    config.mutants_dir = os.path.abspath(p_args.mutants_dir)
    config.source_file = os.path.abspath(p_args.source_file)
    gcc_params = ['-D' + a for a in p_args.defines] if p_args.defines is not None else []
    gcc_params += ['-U' + a for a in p_args.undefines] if p_args.undefines is not None else []
    config.include_dirs = [os.path.abspath(i) for i in p_args.includes] if p_args.includes is not None else []

    if os.path.exists(config.output_dir):
        shutil.rmtree(config.output_dir)

    os.makedirs(config.output_dir)

    impact_analysis_state = ImpactAnalysisRunner(
        config=config,
        disabled=p_args.disable_impact_analysis
    ).run()

    product_state = ProductState(impact_analysis_state=impact_analysis_state)

    product_generator = ProductGenerator(product_state=product_state,
                                         gcc_strategy=gcc_to_tce)

    if not product_generator.is_done():
        product_generator.generate(debug=p_args.debug, params=gcc_params)

    equivalence_res = EquivalenceChecker(product_state=product_state).run()

    operators = []

    for key in equivalence_res['operators'].keys():

        mutants = equivalence_res['operators'][key]['mutants']
        products_total = equivalence_res['operators'][key]['products_total']
        products_compiled = equivalence_res['operators'][key]['products_compiled']
        partially_useless = equivalence_res['operators'][key]['partially_useless']
        totally_useless = equivalence_res['operators'][key]['totally_useless']
        totally_useful = equivalence_res['operators'][key]['totally_useful']
        products_useless = equivalence_res['operators'][key]['products_useless']
        products_useful = equivalence_res['operators'][key]['products_useful']

        operators.append({
            '1_operator': key,
            '2_mutants_total': mutants,
            '3_mutants_partially_useless': partially_useless,
            '4_mutants_totally_useless': totally_useless,
            '5_mutants_totally_useful': totally_useful,
            '6_products_total': products_total,
            '7_products_compiled': products_compiled,
            '8_products_useless': products_useless,
            '9_products_useful': products_useful,
            '__csv': str(key) + ',' + str(mutants) + ',' +
            str(partially_useless) + ', ,' + str(totally_useless) + ', ,' +
            str(totally_useful) + ', ,' + str(products_total) + ',' +
            str(products_compiled) + ',' + str(products_useless) + ', ,' +
            str(products_useful) + ', ,',
        })

    output = {
        '_operators': operators,
        '1_macros': equivalence_res['macros'],
        '2_mutants_total': equivalence_res['total_mutants'],
        '3_mutants_partially_useless': equivalence_res['partially_useless'],
        '4_mutants_totally_useless': equivalence_res['totally_useless'],
        '5_mutants_totally_useful': equivalence_res['totally_useful'],
        '6_products_total': equivalence_res['products_total'],
        '7_products_compiled': equivalence_res['products_compiled'],
        '8_products_useless': equivalence_res['products_useless'],
        '9_products_useful': equivalence_res['products_useful'],
        '__csv': str(equivalence_res['macros']) + ',' + str(equivalence_res['total_mutants']) + ',' +
        str(equivalence_res['partially_useless']) + ', ,' + str(equivalence_res['totally_useless']) + ', ,' +
        str(equivalence_res['totally_useful']) + ', ,' + str(equivalence_res['products_total']) + ',' +
        str(equivalence_res['products_compiled']) + ',' + str(equivalence_res['products_useless']) + ', ,' +
        str(equivalence_res['products_useful']) + ', ,',
    }

    if p_args.verbose:
        print(highlight(
            code=json.dumps(output, indent=2, sort_keys=True),
            lexer=JavascriptLexer(),
            formatter=TerminalFormatter()
        ))

    output_file = os.path.join(config.output_dir, 'result.json')
    print('\nWriting results in %s...' % output_file)
    result = open(output_file, 'w')
    result.write(json.dumps(output, indent=2, sort_keys=True))
def syntax_highlight_json(code):
    return Markup(
        highlight(code, JavascriptLexer(), HtmlFormatter(noclasses=True)))
import argparse
import numpy as np
import pickle
import time

import jsbeautifier
from pygments.lexers.javascript import JavascriptLexer
from pygments.token import Token

from utils import build_labeled_model, temp

typoes = {Token.Literal.String.Regex: 'r', Token.Keyword: 'k', Token.Literal.String: 's',
          Token.Punctuation: 'p', Token.Literal.Number: 'n', Token.Operator: 'o', Token.Text: 'p',
          Token.Name: 'i'}
point = JavascriptLexer()

# TODO: Add temperature
parser = argparse.ArgumentParser(description='Sample a trained model with labels')
parser.add_argument('filepath', help='filepath to model')
parser.add_argument('-s', '--seed', help='seed input', type=str, default='')
parser.add_argument('-t', '--temperature', help='set sampling temperature', type=float, default=0.85)
parser.add_argument('-l', '--length', help='set output length', type=int, default=1000)
parser.add_argument('-p', '--project', help='load the test project', default='../data/github_test_chars')
args = parser.parse_args()

path = args.filepath
seed = args.seed
temperature = args.temperature
length = args.length
project_seed_path = args.project
numFilesToCreate = 100
Beispiel #20
0
JAVA_LANG = "java"
PY_LANG = "python"
JS_LANG = "javascript"
JSON_LANG = "json"
GO_LANG = "go"

from pygments import highlight
from pygments.formatters.html import HtmlFormatter
from pygments.lexers.data import JsonLexer
from pygments.lexers.javascript import JavascriptLexer
from pygments.lexers.jvm import JavaLexer
from pygments.lexers.python import Python3Lexer
from pygments.lexers.go import GoLexer

highlighter = {
    JAVA_LANG: JavaLexer(),
    PY_LANG: Python3Lexer(),
    JS_LANG: JavascriptLexer(),
    JSON_LANG: JsonLexer(),
    GO_LANG: GoLexer()
}


def syntax_highlighter(generated_code, code_language):
    return highlight(generated_code, highlighter.get(code_language),
                     HtmlFormatter())
Beispiel #21
0
# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.todo',
]

# Add Pygments lexers
lexers['JSON'] = JsonLexer()
lexers['Bash'] = BashLexer()
lexers['Javascript'] = JavascriptLexer()

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'
Beispiel #22
0
# number configurations
numfig = True
numfig_format = {
    'figure': '<b>Fig. %s:</b>',
    'code-block': '<b>Example %s:</b>',
    'table': '<b>Table %s:</b>',
    'section': '<b>§%s:</b>',
}

# languages
highlight_language = 'none'
lexers['bash'] = BashLexer()
lexers['console'] = BashLexer()
lexers['hcl'] = TerraformLexer()
lexers['javascript'] = JavascriptLexer()
lexers['json'] = JavascriptLexer()
lexers['php'] = PhpLexer(startinline=True, funcnamehighlighting=True)
lexers['php-annotations'] = PhpLexer(startinline=True,
                                     funcnamehighlighting=True)
lexers['python'] = PythonLexer()

#templates_path = ['_templates']
source_suffix = '.rst'
master_doc = 'index'
project = u'SimplePie NG'
copyright = u'2017 Ryan Parman'
version = '2.0'
html_title = 'SimplePie: User and Developer Guide'
html_short_title = 'SimplePie'
html_output_encoding = 'utf-8'
Beispiel #23
0
from linguist.libs.file_blob import FileBlob
from pygments.lexers.javascript import JavascriptLexer
from pygments.token import Token

path = '/home/vasilis/Desktop/lodash-master'  # TODO: Use argparse to get that
# path = '/home/vasilis/Documents/projects/mbostock-d3-b516d77/src/geo'
print "Reading data..."

t1 = time.time()
minified_data = []
label_data = []
excluded = {'test', 'tests', '__tests__' 'locale', 'locales', 'ngLocale'}
typoes = {Token.Literal.String.Regex: 'r', Token.Keyword: 'k', Token.Literal.String: 's',
          Token.Punctuation: 'p', Token.Literal.Number: 'n', Token.Operator: 'o', Token.Text: 'p',
          Token.Name: 'i'}
point = JavascriptLexer()

for root, dirs, files in os.walk(path, topdown=True):
    dirs[:] = [d for d in dirs if d not in excluded]  # exclude test directories
    for name in files:
        if name.endswith(".js"):
            blob = FileBlob(os.path.join(root, name))  # Linguist file checking
            if not (blob.is_binary or blob.is_generated):
                with open(os.path.join(root, name)) as js_file:
                    minidata = jsmin(js_file.read())

                labels = []
                chars = []
                for (_, typo, seq) in point.get_tokens_unprocessed(minidata):
                    # print typo, seq
                    chars.append(seq)