def _add_trailing_semicolon_to_js(cls, string): return string # i give up on this issue. adding trailing semicolon is not a trivial job lexer = JavascriptLexer(stripnl=False, stripall=False, ensurenl=False) clean_string = [] tokens_values = [] for token_value in lexer.get_tokens(string + "\n"): tokens_values.append(token_value) need_semicolon = True for (token, value) in reversed(tokens_values): if value == "": continue if LexerToken.Text and value[0] == "\n": need_semicolon = True elif token == LexerToken.Punctuation: need_semicolon = False elif token == LexerToken.JavaComment.Single: pass elif token == LexerToken.Comment.Multiline: pass else: if need_semicolon: need_semicolon = False value += ";" clean_string.append(value) return "".join(reversed(clean_string))[:-1]
def _js_comments_endline2block(cls, string): lexer = JavascriptLexer(stripnl=False, stripall=False, ensurenl=False) string += "\n" string = "".join([(('/* ' + value.strip(' \t\n\r/') + ' */' + ('' if value[-1] != '\n' else '\n')) if token == LexerToken.Comment.Single else value) for (token, value) in lexer.get_tokens(string)]) return string[:-1]
def createLexers(self): lex = {} lex['.c'] = CFamilyLexer() lex['.h'] = CFamilyLexer() lex['.cpp'] = CppLexer() lex['.hpp'] = CppLexer() lex['.css'] = CssLexer() lex['.sass'] = SassLexer() lex['.yaml'] = YamlLexer() lex['.yml'] = YamlLexer() lex['.json'] = JsonLexer() lex['.cs'] = CSharpLexer() lex['.fs'] = FSharpLexer() lex['.e'] = EiffelLexer() lex['.erl'] = ErlangLexer() lex['.hrl'] = ErlangLexer() lex['.es'] = ErlangLexer() lex['.f03'] = FortranLexer() lex['.f90'] = FortranLexer() lex['.F03'] = FortranLexer() lex['.F90'] = FortranLexer() lex['.go'] = GoLexer() lex['.hs'] = HaskellLexer() lex['.v'] = VerilogLexer() lex['.vhdl'] = VhdlLexer() lex['.vhd'] = VhdlLexer() lex['.html'] = HtmlLexer() lex['.htm'] = HtmlLexer() lex['.xhtml'] = HtmlLexer() lex['.xml'] = XmlLexer() lex['.js'] = JavascriptLexer() lex['.tex'] = TypeScriptLexer() lex['.coffee'] = CoffeeScriptLexer() lex['.java'] = JavaLexer() lex['.scala'] = ScalaLexer() lex['.kt'] = KotlinLexer() lex['.ktm'] = KotlinLexer() lex['.kts'] = KotlinLexer() lex['.lisp'] = CommonLispLexer() lex['make'] = MakefileLexer() lex['Make'] = MakefileLexer() lex['CMake'] = CMakeLexer() lex['cmake'] = CMakeLexer() lex['.m'] = MatlabLexer() lex['.mat'] = MatlabLexer() lex['.dpr'] = DelphiLexer() lex['.perl'] = PerlLexer() lex['.php'] = PhpLexer() lex['.pr'] = PrologLexer() lex['.py'] = Python3Lexer() lex['.rb'] = RubyLexer() lex['.sh'] = BashLexer() lex['.sql'] = MySqlLexer() lex['.mysql'] = MySqlLexer() lex['.tcl'] = TclLexer() lex['.awk'] = AwkLexer() return lex
def _highlight(self, start_pos, text): """テキストをハイライトする.""" self.text.mark_set('range_start', start_pos) for token, content in lex(text, JavascriptLexer()): print(content, token) self.text.mark_set('range_end', 'range_start+{0}c'.format(len(content))) self.text.tag_add(str(token), 'range_start', 'range_end') self.text.mark_set('range_start', 'range_end')
def jsparser(path): print "Reading data..." minified_data = [''] label_data = [] t1 = time.time() filecounter = 0 excluded = {'test', 'tests', '__tests__' 'locale', 'locales', 'ngLocale'} point = JavascriptLexer() for root, dirs, files in os.walk(path, topdown=True): dirs[:] = [d for d in dirs if d not in excluded] # exclude test directories for name in files: if name.endswith(".js"): blob = FileBlob(os.path.join(root, name)) # Linguist file checking if not (blob.is_binary or blob.is_generated): filecounter += 1 with open(os.path.join(root, name)) as js_file: data = js_file.read() minidata = '\xff' + jsmin(data) + '\xfe' labels = [] for token in point.get_tokens_unprocessed(minidata): (index, label, seq) = token for i in range(len(seq)): labels.append(label) minified_data.append(minidata) label_data.append(labels) minified_data = ''.join(minified_data) t2 = time.time() print "Created the dataset in: %f milliseconds from %d files" % ((t2 - t1) * 1000., filecounter) chars = list(set(minified_data)) data_size, vocab_size = len(minified_data), len(chars) print 'data has %d characters, %d unique.' % (data_size, vocab_size) return minified_data
def __init__(self, language="python"): """ :param language: python, javascript, java or cpp """ self.language = language if self.language == "python": self.lexer = PythonLexer() elif self.language == "javascript": self.lexer = JavascriptLexer() elif self.language == "cpp": self.lexer = CppLexer() elif self.language == "java": self.lexer = JavaLexer() else: raise NotImplementedError
def strip_comments_from_js(cls, string, flags=FLAG_STRIP_COMMENTS_JS): strip_comments = [] if cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_ENDLINE): strip_comments.append(LexerToken.Comment.Single) if cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_BLOCK): strip_comments.append(LexerToken.Comment.Multiline) if strip_comments: lexer = JavascriptLexer(stripnl=False, stripall=False, ensurenl=False) string += "\n" string = cls._strip_comments( string, lexer, strip_comments, cls._check_flag(flags, cls.FLAG_STRIP_COMMENTS_JS_KEEP_FIRST)) string = string[:-1] return string
def POST(self): data = web.input() code = data.code language = data.lang if language == 'python': from pygments.lexers.python import PythonLexer lexer = PythonLexer() elif language == 'php': from pygments.lexers.php import PhpLexer lexer = PhpLexer() elif language == 'java': from pygments.lexers.jvm import JavaLexer lexer = JavaLexer() elif language == 'javascript': from pygments.lexers.javascript import JavascriptLexer lexer = JavascriptLexer() elif language == 'html': from pygments.lexers.html import HtmlLexer lexer = HtmlLexer() elif language == 'cpp': from pygments.lexers.c_cpp import CppLexer lexer = CppLexer() elif language == 'shell': from pygments.lexers.shell import ShellSessionLexer lexer = ShellSessionLexer() elif language == 'matlab': from pygments.lexers.matlab import MatlabLexer lexer = MatlabLexer() elif language == 'ruby': from pygments.lexers.ruby import RubyLexer lexer = RubyLexer() elif language == 'r': from pygments.lexers.r import RConsoleLexer lexer = RConsoleLexer() elif language == 'lisp': from pygments.lexers.lisp import SchemeLexer lexer = SchemeLexer() elif language == 'go': from pygments.lexers.go import GoLexer lexer = GoLexer() formatter = html.HtmlFormatter(linenos=False, encoding='utf-8', nowrap=False) hilighted_snippet = highlight(code, lexer, formatter) #return hilighted #return render.submit() return render.result(hilighted_snippet)
def format(self, node: "Node") -> Text: """ Formats the node into JSON5. If colors were specified in the constructor, then it's also where the coloration is added before being returned. """ out = "\n".join(map(self._line, self._format(node))) formatter = None if self.colors: formatter = get_formatter_by_name(self.colors) if formatter: out = highlight(out, JavascriptLexer(), formatter) return out
def __init__(self, repl): self._repl = repl self._lexer = JavascriptLexer()
class FridaCompleter(Completer): def __init__(self, repl): self._repl = repl self._lexer = JavascriptLexer() def get_completions(self, document, complete_event): prefix = document.text_before_cursor magic = len(prefix) > 0 and prefix[0] == '%' and not any( map(lambda c: c.isspace(), prefix)) tokens = list(self._lexer.get_tokens(prefix))[:-1] # 0.toString() is invalid syntax, # but pygments doesn't seem to know that for i in range(len(tokens) - 1): if tokens[i][0] == Token.Literal.Number.Integer \ and tokens[i + 1][0] == Token.Punctuation and tokens[i + 1][1] == '.': tokens[i] = (Token.Literal.Number.Float, tokens[i][1] + tokens[i + 1][1]) del tokens[i + 1] before_dot = '' after_dot = '' encountered_dot = False for t in tokens[::-1]: if t[0] in Token.Name.subtypes: before_dot = t[1] + before_dot elif t[0] == Token.Punctuation and t[1] == '.': before_dot = '.' + before_dot if not encountered_dot: encountered_dot = True after_dot = before_dot[1:] before_dot = '' else: if encountered_dot: # The value/contents of the string, number or array doesn't matter, # so we just use the simplest value with that type if t[0] in Token.Literal.String.subtypes: before_dot = '""' + before_dot elif t[0] in Token.Literal.Number.subtypes: before_dot = '0.0' + before_dot elif t[0] == Token.Punctuation and t[1] == ']': before_dot = '[]' + before_dot break try: if encountered_dot: if before_dot == "" or before_dot.endswith("."): return for key in self._get_keys("""\ (() => { let o; try { o = """ + before_dot + """; } catch (e) { return []; } if (o === undefined || o === null) return []; let k = Object.getOwnPropertyNames(o); let p; if (typeof o !== 'object') p = o.__proto__; else p = Object.getPrototypeOf(o); if (p !== null && p !== undefined) k = k.concat(Object.getOwnPropertyNames(p)); return k; })();"""): if self._pattern_matches(after_dot, key): yield Completion(key, -len(after_dot)) else: if magic: keys = self._repl._magic_command_args.keys() else: keys = self._get_keys( "Object.getOwnPropertyNames(this)") for key in keys: if not self._pattern_matches(before_dot, key) or ( key.startswith('_') and before_dot == ''): continue yield Completion(key, -len(before_dot)) except frida.InvalidOperationError: pass except frida.OperationCancelledError: pass except Exception as e: self._repl._print(e) def _get_keys(self, code): repl = self._repl with repl._reactor.io_cancellable: (t, value) = repl._evaluate(code) if t == 'error': return [] return sorted(filter(self._is_valid_name, set(value))) def _is_valid_name(self, name): tokens = list(self._lexer.get_tokens(name)) return len(tokens) == 2 and tokens[0][0] in Token.Name.subtypes def _pattern_matches(self, pattern, text): return re.search(re.escape(pattern), text, re.IGNORECASE) != None
def highlight_json_example(example_text: str) -> str: """Filter. Return an highlighted version of the provided JSON text""" return highlight(example_text, JavascriptLexer(), HtmlFormatter())
from pygments.lexers.javascript import JavascriptLexer from pygments.token import Token from utils import build_labeled_model, temp typoes = { Token.Literal.String.Regex: 'r', Token.Keyword: 'k', Token.Literal.String: 's', Token.Punctuation: 'p', Token.Literal.Number: 'n', Token.Operator: 'o', Token.Text: 'p', Token.Name: 'i' } point = JavascriptLexer() # TODO: Add temperature parser = argparse.ArgumentParser( description='Sample a trained model with labels') parser.add_argument('filepath', help='filepath to model') parser.add_argument('-s', '--seed', help='seed input', type=str, default='') parser.add_argument('-t', '--temperature', help='set sampling temperature', type=float, default=0.85) parser.add_argument('-l', '--length', help='set output length', type=int,
def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--data_source', type=str, required=True) p_args = parser.parse_args() db = TinyDB(os.path.abspath(p_args.data_source)) eq_table = db.table('equivalence') mt_table = db.table('mutants') mutants = [] for mutant in mt_table.all(): if len(eq_table.search( (Query().name == mutant['name']) & (Query().compile_error == False) )) > 0: mutants.append(mutant['name']) valid_configurations = eq_table.search(Query().invalid_configuration == False) configurations = [] operators = {} for configuration in valid_configurations: if configuration['name'] in mutants and not configuration['compile_error']: configurations.append(configuration) for conf in configurations: if conf['operator'] not in operators.keys(): operators[conf['operator']] = { 'operator': conf['operator'], 'mutants_total': len(mt_table.search(Query().operator == conf['operator'])), 'mutants': [], 'mutants_equivalent': [], 'mutants_not_equivalent': [] } if conf['name'] not in operators[conf['operator']]['mutants']: operators[conf['operator']]['mutants'].append(conf['name']) if conf['useless']: if conf['name'] not in operators[conf['operator']]['mutants_equivalent']: operators[conf['operator']]['mutants_equivalent'].append(conf['name']) else: if conf['name'] not in operators[conf['operator']]['mutants_not_equivalent']: operators[conf['operator']]['mutants_not_equivalent'].append(conf['name']) partially_equivalent = 0 totally_equivalent = 0 totally_not_equivalent = 0 for op_name in operators.keys(): mutants_partially_equivalent = [] mutants_totally_equivalent = [] mutants_totally_not_equivalent = [] op = operators[op_name] for mutant in op['mutants']: if mutant in op['mutants_not_equivalent'] and mutant in op['mutants_equivalent']: mutants_partially_equivalent.append(mutant) elif mutant in op['mutants_not_equivalent'] and mutant not in op['mutants_equivalent']: mutants_totally_not_equivalent.append(mutant) elif mutant not in op['mutants_not_equivalent'] and mutant in op['mutants_equivalent']: mutants_totally_equivalent.append(mutant) op['mutants_partially_equivalent'] = len(mutants_partially_equivalent) op['mutants_totally_not_equivalent'] = len(mutants_totally_not_equivalent) op['mutants_totally_equivalent'] = len(mutants_totally_equivalent) partially_equivalent += len(mutants_partially_equivalent) totally_equivalent += len(mutants_totally_equivalent) totally_not_equivalent += len(mutants_totally_not_equivalent) print(str(op['operator']) + ',' + str(op['mutants_total']) + ',' + str(len(mutants_partially_equivalent)) + ', ,' + str(len(mutants_totally_equivalent)) + ', ,' + str(len(mutants_totally_not_equivalent)) + ', ,') print(highlight( code=json.dumps(operators, indent=2, sort_keys=True), lexer=JavascriptLexer(), formatter=TerminalFormatter() )) macros = len(db.search(Query().type == 'config')[0]['macros']) print(str(macros) + ',' + str(len(mt_table.all())) + ',' + str(partially_equivalent) + ', ,' + str(totally_equivalent) + ', ,' + str(totally_not_equivalent) + ', ,') print(len(eq_table.all())) print(len(eq_table.search((Query().compile_error == False)))) print(len(eq_table.search((Query().compile_error == True)))) mutants_with_compilation_error = [] mutants_without_compilation_error = [] mutants_without_compilation_error_2 = [] mutants_without_compilation_error_3 = [] for mutant in mt_table.all(): if len(eq_table.search( (Query().name == mutant['name']) & (Query().compile_error == True) & (Query().invalid_configuration == False) )) > 0: mutants_with_compilation_error.append(mutant['name']) if len(eq_table.search( (Query().name == mutant['name']) & (Query().compile_error == True) & (Query().invalid_configuration == False) )) == 0: mutants_without_compilation_error.append(mutant['name']) if len(eq_table.search( (Query().name == mutant['name']) & (Query().compile_error == True) & (Query().invalid_configuration == True) )) > 0: mutants_without_compilation_error_2.append(mutant['name']) if len(eq_table.search( (Query().name == mutant['name']) & (Query().compile_error == False) & (Query().invalid_configuration == True) )) > 0: mutants_without_compilation_error_2.append(mutant['name']) print("------------------") print(len(mutants_with_compilation_error)) print(len(mutants_without_compilation_error)) print(len(mutants_without_compilation_error_2)) print(len(mutants_without_compilation_error_3)) print(len(mt_table.all()))
def jsoncolorize(value): return highlight(value, JavascriptLexer(), TerminalFormatter())
def lexer(): yield JavascriptLexer()
def main(): config = Config() parser = argparse.ArgumentParser() parser.add_argument('-s', '--source_file', type=str, required=True) parser.add_argument('-M', '--mutants_dir', type=str, required=True) parser.add_argument('-O', '--output_dir', type=str, required=True) parser.add_argument('-P', '--gcc_params', type=str, required=False) parser.add_argument('-I', '--includes', nargs='*', required=False) parser.add_argument('-D', '--defines', nargs='*', required=False) parser.add_argument('-U', '--undefines', nargs='*', required=False) parser.add_argument('-v', '--verbose', default=False, action='store_true') parser.add_argument('-E', '--debug', default=False, action='store_true') parser.add_argument('--disable-impact-analysis', default=False, action='store_true') parser.add_argument('--no-check-duplicates', default=False, action='store_true') p_args = parser.parse_args() config.output_dir = os.path.abspath(p_args.output_dir) config.mutants_dir = os.path.abspath(p_args.mutants_dir) config.source_file = os.path.abspath(p_args.source_file) gcc_params = ['-D' + a for a in p_args.defines] if p_args.defines is not None else [] gcc_params += ['-U' + a for a in p_args.undefines] if p_args.undefines is not None else [] config.include_dirs = [os.path.abspath(i) for i in p_args.includes] if p_args.includes is not None else [] if os.path.exists(config.output_dir): shutil.rmtree(config.output_dir) os.makedirs(config.output_dir) impact_analysis_state = ImpactAnalysisRunner( config=config, disabled=p_args.disable_impact_analysis ).run() product_state = ProductState(impact_analysis_state=impact_analysis_state) product_generator = ProductGenerator(product_state=product_state, gcc_strategy=gcc_to_tce) if not product_generator.is_done(): product_generator.generate(debug=p_args.debug, params=gcc_params) equivalence_res = EquivalenceChecker(product_state=product_state).run() operators = [] for key in equivalence_res['operators'].keys(): mutants = equivalence_res['operators'][key]['mutants'] products_total = equivalence_res['operators'][key]['products_total'] products_compiled = equivalence_res['operators'][key]['products_compiled'] partially_useless = equivalence_res['operators'][key]['partially_useless'] totally_useless = equivalence_res['operators'][key]['totally_useless'] totally_useful = equivalence_res['operators'][key]['totally_useful'] products_useless = equivalence_res['operators'][key]['products_useless'] products_useful = equivalence_res['operators'][key]['products_useful'] operators.append({ '1_operator': key, '2_mutants_total': mutants, '3_mutants_partially_useless': partially_useless, '4_mutants_totally_useless': totally_useless, '5_mutants_totally_useful': totally_useful, '6_products_total': products_total, '7_products_compiled': products_compiled, '8_products_useless': products_useless, '9_products_useful': products_useful, '__csv': str(key) + ',' + str(mutants) + ',' + str(partially_useless) + ', ,' + str(totally_useless) + ', ,' + str(totally_useful) + ', ,' + str(products_total) + ',' + str(products_compiled) + ',' + str(products_useless) + ', ,' + str(products_useful) + ', ,', }) output = { '_operators': operators, '1_macros': equivalence_res['macros'], '2_mutants_total': equivalence_res['total_mutants'], '3_mutants_partially_useless': equivalence_res['partially_useless'], '4_mutants_totally_useless': equivalence_res['totally_useless'], '5_mutants_totally_useful': equivalence_res['totally_useful'], '6_products_total': equivalence_res['products_total'], '7_products_compiled': equivalence_res['products_compiled'], '8_products_useless': equivalence_res['products_useless'], '9_products_useful': equivalence_res['products_useful'], '__csv': str(equivalence_res['macros']) + ',' + str(equivalence_res['total_mutants']) + ',' + str(equivalence_res['partially_useless']) + ', ,' + str(equivalence_res['totally_useless']) + ', ,' + str(equivalence_res['totally_useful']) + ', ,' + str(equivalence_res['products_total']) + ',' + str(equivalence_res['products_compiled']) + ',' + str(equivalence_res['products_useless']) + ', ,' + str(equivalence_res['products_useful']) + ', ,', } if p_args.verbose: print(highlight( code=json.dumps(output, indent=2, sort_keys=True), lexer=JavascriptLexer(), formatter=TerminalFormatter() )) output_file = os.path.join(config.output_dir, 'result.json') print('\nWriting results in %s...' % output_file) result = open(output_file, 'w') result.write(json.dumps(output, indent=2, sort_keys=True))
def syntax_highlight_json(code): return Markup( highlight(code, JavascriptLexer(), HtmlFormatter(noclasses=True)))
import argparse import numpy as np import pickle import time import jsbeautifier from pygments.lexers.javascript import JavascriptLexer from pygments.token import Token from utils import build_labeled_model, temp typoes = {Token.Literal.String.Regex: 'r', Token.Keyword: 'k', Token.Literal.String: 's', Token.Punctuation: 'p', Token.Literal.Number: 'n', Token.Operator: 'o', Token.Text: 'p', Token.Name: 'i'} point = JavascriptLexer() # TODO: Add temperature parser = argparse.ArgumentParser(description='Sample a trained model with labels') parser.add_argument('filepath', help='filepath to model') parser.add_argument('-s', '--seed', help='seed input', type=str, default='') parser.add_argument('-t', '--temperature', help='set sampling temperature', type=float, default=0.85) parser.add_argument('-l', '--length', help='set output length', type=int, default=1000) parser.add_argument('-p', '--project', help='load the test project', default='../data/github_test_chars') args = parser.parse_args() path = args.filepath seed = args.seed temperature = args.temperature length = args.length project_seed_path = args.project numFilesToCreate = 100
JAVA_LANG = "java" PY_LANG = "python" JS_LANG = "javascript" JSON_LANG = "json" GO_LANG = "go" from pygments import highlight from pygments.formatters.html import HtmlFormatter from pygments.lexers.data import JsonLexer from pygments.lexers.javascript import JavascriptLexer from pygments.lexers.jvm import JavaLexer from pygments.lexers.python import Python3Lexer from pygments.lexers.go import GoLexer highlighter = { JAVA_LANG: JavaLexer(), PY_LANG: Python3Lexer(), JS_LANG: JavascriptLexer(), JSON_LANG: JsonLexer(), GO_LANG: GoLexer() } def syntax_highlighter(generated_code, code_language): return highlight(generated_code, highlighter.get(code_language), HtmlFormatter())
# -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.todo', ] # Add Pygments lexers lexers['JSON'] = JsonLexer() lexers['Bash'] = BashLexer() lexers['Javascript'] = JavascriptLexer() # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index'
# number configurations numfig = True numfig_format = { 'figure': '<b>Fig. %s:</b>', 'code-block': '<b>Example %s:</b>', 'table': '<b>Table %s:</b>', 'section': '<b>§%s:</b>', } # languages highlight_language = 'none' lexers['bash'] = BashLexer() lexers['console'] = BashLexer() lexers['hcl'] = TerraformLexer() lexers['javascript'] = JavascriptLexer() lexers['json'] = JavascriptLexer() lexers['php'] = PhpLexer(startinline=True, funcnamehighlighting=True) lexers['php-annotations'] = PhpLexer(startinline=True, funcnamehighlighting=True) lexers['python'] = PythonLexer() #templates_path = ['_templates'] source_suffix = '.rst' master_doc = 'index' project = u'SimplePie NG' copyright = u'2017 Ryan Parman' version = '2.0' html_title = 'SimplePie: User and Developer Guide' html_short_title = 'SimplePie' html_output_encoding = 'utf-8'
from linguist.libs.file_blob import FileBlob from pygments.lexers.javascript import JavascriptLexer from pygments.token import Token path = '/home/vasilis/Desktop/lodash-master' # TODO: Use argparse to get that # path = '/home/vasilis/Documents/projects/mbostock-d3-b516d77/src/geo' print "Reading data..." t1 = time.time() minified_data = [] label_data = [] excluded = {'test', 'tests', '__tests__' 'locale', 'locales', 'ngLocale'} typoes = {Token.Literal.String.Regex: 'r', Token.Keyword: 'k', Token.Literal.String: 's', Token.Punctuation: 'p', Token.Literal.Number: 'n', Token.Operator: 'o', Token.Text: 'p', Token.Name: 'i'} point = JavascriptLexer() for root, dirs, files in os.walk(path, topdown=True): dirs[:] = [d for d in dirs if d not in excluded] # exclude test directories for name in files: if name.endswith(".js"): blob = FileBlob(os.path.join(root, name)) # Linguist file checking if not (blob.is_binary or blob.is_generated): with open(os.path.join(root, name)) as js_file: minidata = jsmin(js_file.read()) labels = [] chars = [] for (_, typo, seq) in point.get_tokens_unprocessed(minidata): # print typo, seq chars.append(seq)