def export_data(self, api_calls: List[ApiCall]): test_file_header = """ To generate a sequence diagram, make sure that API Title is using the following syntax [ ActorA -> ActorB ] Get product details The above will generate the following syntax @startuml ActorA -> ActorB: Get product details @enduml """ sorted_apis_by_sequence = sorted(api_calls, key=lambda a: a.sequence_number or 0) output = [ self.__export_api_call(api_call) for api_call in sorted_apis_by_sequence ] combined_output = "\n".join(output) if not combined_output.strip(): return highlight(test_file_header, JavaLexer(), HtmlFormatter()) complete_text = "@startuml\n" + combined_output + "\n@enduml" return highlight(complete_text, JavaLexer(), HtmlFormatter())
def __init__(self, language="python"): """ :param language: python, javascript, java or cpp """ self.language = language if self.language == "python": self.lexer = PythonLexer() elif self.language == "javascript": self.lexer = JavascriptLexer() elif self.language == "cpp": self.lexer = CppLexer() elif self.language == "java": self.lexer = JavaLexer() else: raise NotImplementedError
def __export_api_call(self, api_call): last_exchange = app_settings.app_data_cache.get_last_exchange( api_call.id) api_test_case = app_settings.app_data_cache.get_api_test_case( api_call.id) doc = gen_function(api_call, last_exchange, api_test_case) return highlight(doc, JavaLexer(), HtmlFormatter())
def export_data(self, api_calls: List[ApiCall]): test_file_header = """ /* Maven Dependencies <dependency> <groupId>io.rest-assured</groupId> <artifactId>rest-assured</artifactId> <version>3.0.0</version> <scope>test</scope> </dependency> Hamcresh Matchers <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-all</artifactId> <version>1.3</version> </dependency> JSON Schema Validation <dependency> <groupId>io.rest-assured</groupId> <artifactId>json-schema-validator</artifactId> <version>4.0.0</version> </dependency> */ import io.restassured.RestAssured.*; import io.restassured.matcher.RestAssuredMatchers.*; import org.hamcrest.Matchers.*; class AirHttpTests { """ test_file_footer = """ } """ output = [self.__export_api_call(api_call) for api_call in api_calls] return ( highlight(test_file_header, JavaLexer(), HtmlFormatter()) + "<br/>" + "<br/>".join(output) + "<br/>" + highlight(test_file_footer, JavaLexer(), HtmlFormatter()) )
def createLexers(self): lex = {} lex['.c'] = CFamilyLexer() lex['.h'] = CFamilyLexer() lex['.cpp'] = CppLexer() lex['.hpp'] = CppLexer() lex['.css'] = CssLexer() lex['.sass'] = SassLexer() lex['.yaml'] = YamlLexer() lex['.yml'] = YamlLexer() lex['.json'] = JsonLexer() lex['.cs'] = CSharpLexer() lex['.fs'] = FSharpLexer() lex['.e'] = EiffelLexer() lex['.erl'] = ErlangLexer() lex['.hrl'] = ErlangLexer() lex['.es'] = ErlangLexer() lex['.f03'] = FortranLexer() lex['.f90'] = FortranLexer() lex['.F03'] = FortranLexer() lex['.F90'] = FortranLexer() lex['.go'] = GoLexer() lex['.hs'] = HaskellLexer() lex['.v'] = VerilogLexer() lex['.vhdl'] = VhdlLexer() lex['.vhd'] = VhdlLexer() lex['.html'] = HtmlLexer() lex['.htm'] = HtmlLexer() lex['.xhtml'] = HtmlLexer() lex['.xml'] = XmlLexer() lex['.js'] = JavascriptLexer() lex['.tex'] = TypeScriptLexer() lex['.coffee'] = CoffeeScriptLexer() lex['.java'] = JavaLexer() lex['.scala'] = ScalaLexer() lex['.kt'] = KotlinLexer() lex['.ktm'] = KotlinLexer() lex['.kts'] = KotlinLexer() lex['.lisp'] = CommonLispLexer() lex['make'] = MakefileLexer() lex['Make'] = MakefileLexer() lex['CMake'] = CMakeLexer() lex['cmake'] = CMakeLexer() lex['.m'] = MatlabLexer() lex['.mat'] = MatlabLexer() lex['.dpr'] = DelphiLexer() lex['.perl'] = PerlLexer() lex['.php'] = PhpLexer() lex['.pr'] = PrologLexer() lex['.py'] = Python3Lexer() lex['.rb'] = RubyLexer() lex['.sh'] = BashLexer() lex['.sql'] = MySqlLexer() lex['.mysql'] = MySqlLexer() lex['.tcl'] = TclLexer() lex['.awk'] = AwkLexer() return lex
def get_tokens_unprocessed(self, text): for index, token, value in JavaLexer.get_tokens_unprocessed( self, text): if value in self.xtendKeywords: yield index, Keyword, value elif token is Name.Attribute: yield index, Name.Builtin.Pseudo, value else: yield index, token, value
def colorize_decompiled_method(method): """ Args: method: Prints highlighted decompiled method """ print( highlight(method, formatter=TerminalFormatter(bg="dark"), lexer=JavaLexer()))
def highlightCode(javaCode, lineGroups): """ highlight_groups = [ (HIGHLIGHT_COLOR, hl_lines), (DEPRECATED_COLOR, deprecated_lines), """ javaLexer = JavaLexer() formatter = DensityFormatter(linenos='inline', highlight_groups=lineGroups) return highlight(javaCode, javaLexer, formatter)
class ProgrammingLexer(object): """lexes a string with multiple programming lexers and returns tokens""" lexers = { 'actionscript': ActionScript3Lexer(), 'c': CLexer(), 'cpp': CppLexer(), 'cs': CSharpLexer(), 'java': JavaLexer(), 'javascript': JavascriptLexer(), 'perl': PerlLexer(), 'php': PhpLexer(startinline=True), 'python': PythonLexer(), 'ruby': RubyLexer(), 'vb': VbNetLexer(), } matched_languages = [] data = None def __init__(self, matched_langs, data_string): self.matched_languages = matched_langs self.data = data_string def lex(self): """ For every possible matched language, we run a lexer to see if we can eliminate it as a possible match. If we detect errors, or have no lexer matches, we remove it from the list. :return: the list of lexer results :rtype: list """ results = {} threads = [] # Looping through each matched language that has a lexer for lexer_id, lexer in \ [[lexid, lxr] for lexid, lxr in self.lexers.items() if lexid in self.matched_languages]: # Creating a thread for each lexer thread = ProgrammingLexerThread(lexer_id, lexer, self.data) thread.start() threads.append(thread) for thr in threads: thr.join() for thr in [th for th in threads if th.result]: results[thr.thread_id] = thr.result return results
def __export_api_call(self, project_info, api_call): last_exchange = app_settings.app_data_cache.get_last_exchange( api_call.id) api_test_case = app_settings.app_data_cache.get_api_test_case( api_call.id) doc = f""" // {api_call.title} {gen_api_request_class(api_call, last_exchange, api_test_case)} {gen_api_response_class(api_call, last_exchange, api_test_case)} {gen_controller(api_call, last_exchange, api_test_case)} {gen_test(api_call, last_exchange, api_test_case)} """ return highlight(doc, JavaLexer(), HtmlFormatter())
def get_andgrocfg_code(request, sha256, foo): storage_path = get_andro_cfg_storage_path(sha256) out = default_storage.open(f'{storage_path}/{foo}').read() if f'{storage_path}/{foo}'.endswith('.raw'): out_formatted = highlight( out, JavaLexer(), HtmlFormatter(style=U39bStyle, noclasses=True)) return HttpResponse(out_formatted, content_type="text/html") elif f'{storage_path}/{foo}'.endswith('.png'): return HttpResponse(out, content_type='image/bmp') else: return HttpResponse(out, content_type="image/bmp")
def post_process_content(org_docs, api_method): """ Processes the content retrieved from mongo before presenting this to the users. :param org_docs: documents retrieved from mongo :param api_method: the fully qualified name of the target API method :return: """ docs = list(org_docs) for doc in docs: doc['content'] = highlight(doc['content'], JavaLexer(), HtmlFormatter(linenos=True)) doc['also-calls'] = list(doc['calls']) doc['also-calls'].remove(api_method) return docs
def highlight_code(self): html_snippets = [] if self.matched_line_number(): snippet_cluster_lns = self.compute_lines_to_highlight(self.adjacent_line_numbers()) snippets = [] for snippet_cluster_ln in snippet_cluster_lns: snippet = [] for n in snippet_cluster_ln: snippet.append(self.file_content_lines[n]) start_line = min(snippet_cluster_ln) highlight_lines = map(lambda x: x - start_line + 1, self.matching_line_numbers) snippets.append(("\n".join(snippet), start_line, highlight_lines)) html_snippets = [highlight(snippet[0], JavaLexer(), LatexFormatter(linenos=True, linenostart=snippet[1])) for snippet in snippets] self.code_snippets = [ GitSearchItemSnippet( self.hl_snippet( snippet[0], snippet[1]), snippet[1]) for snippet in snippets] if not html_snippets: html_snippets.append(highlight(self.file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True))) self.code_snippets.append( GitSearchItemSnippet( self.hl_snippet( self.file_content, 0), 0) ) return "".join(html_snippets)
def pygmentize(code, linenos=False, tofile=False, outputfile=''): style = 'colorful' defstyles = 'overflow:auto;width:auto;' formatter = HtmlFormatter(style=style, linenos=False, noclasses=True, cssclass='', cssstyles=defstyles + get_default_style(), prestyles='margin: 0') html = highlight(code, JavaLexer(), formatter) if linenos: html = insert_line_numbers(html) html = "<!-- Rapport généré avec TPGen -->" + html return html
def POST(self): data = web.input() code = data.code language = data.lang if language == 'python': from pygments.lexers.python import PythonLexer lexer = PythonLexer() elif language == 'php': from pygments.lexers.php import PhpLexer lexer = PhpLexer() elif language == 'java': from pygments.lexers.jvm import JavaLexer lexer = JavaLexer() elif language == 'javascript': from pygments.lexers.javascript import JavascriptLexer lexer = JavascriptLexer() elif language == 'html': from pygments.lexers.html import HtmlLexer lexer = HtmlLexer() elif language == 'cpp': from pygments.lexers.c_cpp import CppLexer lexer = CppLexer() elif language == 'shell': from pygments.lexers.shell import ShellSessionLexer lexer = ShellSessionLexer() elif language == 'matlab': from pygments.lexers.matlab import MatlabLexer lexer = MatlabLexer() elif language == 'ruby': from pygments.lexers.ruby import RubyLexer lexer = RubyLexer() elif language == 'r': from pygments.lexers.r import RConsoleLexer lexer = RConsoleLexer() elif language == 'lisp': from pygments.lexers.lisp import SchemeLexer lexer = SchemeLexer() elif language == 'go': from pygments.lexers.go import GoLexer lexer = GoLexer() formatter = html.HtmlFormatter(linenos=False, encoding='utf-8', nowrap=False) hilighted_snippet = highlight(code, lexer, formatter) #return hilighted #return render.submit() return render.result(hilighted_snippet)
def export_data(self, api_calls: List[ApiCall]): test_file_header = """ /** The generated code is divided into different sections 1. SpringFox configuration 2. Spring API request class 3. Spring API response class 4. Spring API controller definition 5. Unit testing with MockMvc **/ """ project_info = app_settings.app_data_reader.get_or_create_project_info( ) fox_config = gen_fox_config(project_info) output = [ self.__export_api_call(project_info, api_call) for api_call in api_calls ] return highlight(test_file_header + fox_config, JavaLexer(), HtmlFormatter()) + "<br/>".join(output)
def srcCodeParser(self): # Getting all the files with java extension recursively srcCodeAddresses = glob.glob(str(self.src) + '/**/*.java', recursive=True) # # Creating a java lexer instance java_lexer = JavaLexer() src_files = OrderedDict() # iterating to parse each source file for src_file in srcCodeAddresses: with open(src_file) as file: src = file.read() # Attribute for each part of a source file comments = '' classNames = [] attributes = [] methodNames = [] variables = [] # Source parsing parseTree = None try: parseTree = javalang.parse.parse(src) for path, node in parseTree.filter( javalang.tree.VariableDeclarator): # print(path) if isinstance(path[-2], javalang.tree.FieldDeclaration ): # Item second to last attributes.append(node.name) elif isinstance(path[-2], javalang.tree.VariableDeclaration): variables.append(node.name) except: pass # Trimming the source file ind = False if parseTree: if parseTree.imports: last_imp_path = parseTree.imports[-1].path # Last item #print src = src[src.index(last_imp_path) + len(last_imp_path) + 1:] elif parseTree.package: package_name = parseTree.package.name src = src[src.index(package_name) + len(package_name) + 1:] else: # There is neither import nor package declaration ind = True # no parse tree else: ind = True # Lexically tokenize the source file lexed_src = pygments.lex(src, java_lexer) for i, token in enumerate(lexed_src): # if its a comment add it at comments if token[0] in Token.Comment: if ind and i == 0 and token[0] is Token.Comment.Multiline: src = src[src.index(token[1]) + len(token[1]):] continue comments += token[1] # if its a class add it to class names elif token[0] is Token.Name.Class: classNames.append(token[1]) # if its a function ad it to method name elif token[0] is Token.Name.Function: methodNames.append(token[1]) # Get the package declaration if parseTree and parseTree.package: package_name = parseTree.package.name else: package_name = None # Handling special case for AspectJ dataset if self.name == 'aspectj': src_files[os.path.relpath( src_file, start=self.src)] = SourceCode( src, comments, classNames, attributes, methodNames, variables, [os.path.basename(src_file).split('.')[0]], package_name) else: # If source file has package declaration if package_name: src_id = (package_name + '.' + os.path.basename(src_file)) else: src_id = os.path.basename(src_file) src_files[src_id] = SourceCode( src, comments, classNames, attributes, methodNames, variables, [os.path.basename(src_file).split('.')[0]], package_name) return src_files
JAVA_LANG = "java" PY_LANG = "python" JS_LANG = "javascript" JSON_LANG = "json" GO_LANG = "go" from pygments import highlight from pygments.formatters.html import HtmlFormatter from pygments.lexers.data import JsonLexer from pygments.lexers.javascript import JavascriptLexer from pygments.lexers.jvm import JavaLexer from pygments.lexers.python import Python3Lexer from pygments.lexers.go import GoLexer highlighter = { JAVA_LANG: JavaLexer(), PY_LANG: Python3Lexer(), JS_LANG: JavascriptLexer(), JSON_LANG: JsonLexer(), GO_LANG: GoLexer() } def syntax_highlighter(generated_code, code_language): return highlight(generated_code, highlighter.get(code_language), HtmlFormatter())
def setUpClass(cls): cls.lexer = JavaLexer()
class CodeChunkTokenizer(): def __init__(self, language="python"): """ :param language: python, javascript, java or cpp """ self.language = language if self.language == "python": self.lexer = PythonLexer() elif self.language == "javascript": self.lexer = JavascriptLexer() elif self.language == "cpp": self.lexer = CppLexer() elif self.language == "java": self.lexer = JavaLexer() else: raise NotImplementedError def tokenize(self, code_lines, return_types=False, ignore_types=()): #if self.language == "python": # return self._python_tokenize(code_lines, return_types=return_types, ignore_types=ignore_types) if self.language in ["python", "javascript", "cpp", "java"]: return self._pygment_tokenize(code_lines, return_types=return_types, ignore_types=ignore_types) else: raise NotImplementedError def _pygment_tokenize(self, code_lines, return_types=False, ignore_types=()): """ :param code_lines: :param return_types: :param ignore_types: :return: """ try: code = "".join([ code_line.decode('ascii', errors='ignore') for code_line in code_lines ]) types, tokens = list( zip(*[(ttype, token) for ttype, token in self.lexer.get_tokens(code) if not in_any(ttype, ignore_types)])) if return_types: return tokens, types else: return tokens except Exception as e: warnings.warn(str(e)) return [] def _python_tokenize(self, code_lines, return_types=False, ignore_types=()): """ """ code = '\n'.join(code_lines) tokens = tokenize.generate_tokens(StringIO(code).readline) tokens_types = [] try: # http://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings prev_toktype = tokenize.INDENT for tok in tokens: token_type = tok[0] token_string = tok[1] start_line, start_col = tok[2] # skip ignered: if token_type in ignore_types: pass # This series of conditionals removes docstrings: elif tokenize.COMMENT in ignore_types \ and token_type == tokenize.STRING: if prev_toktype != tokenize.INDENT: # This is likely a docstring; double-check we're not inside an operator: if prev_toktype != tokenize.NEWLINE: # Catch whole-module docstrings: if start_col > 0: # Unlabelled indentation means we're inside an operator tokens_types.append((token_string, token_type)) else: tokens_types.append((token_string, token_type)) prev_toktype = token_type except Exception as e: warnings.warn(str(e)) return [] tokens, types = list(zip(*tokens_types)) if return_types: return tokens, list(map(tok_name.__getitem__, types)) return tokens def batch_tokenize(self, code_line_chunks, ignore_types=(), num_processes=cpu_count()): results = [None] * len(code_line_chunks) NUMBER_OF_PROCESSES = num_processes TASKS = [(do_task, (self, i, code_line_chunk, False, ignore_types)) for i, code_line_chunk in enumerate(code_line_chunks)] # Create queues task_queue = Queue() done_queue = Queue() processes_container = [] # Start worker processes for i in range(NUMBER_OF_PROCESSES): p = Process(target=worker, args=(task_queue, done_queue)) processes_container.append(p) p.start() # Submit tasks for task in TASKS: task_queue.put(task) # Get and print results for _ in range(len(TASKS)): i, result = done_queue.get() results[i] = result # Tell child processes to stop for i in range(NUMBER_OF_PROCESSES): task_queue.put('STOP') for process in processes_container: process.terminate() return results
def __init__(self, **kwargs): super(JavaTokenizer, self).__init__(**kwargs) self._lexer = JavaLexer()
def highlight_file(path): file_content = read_file(path) return highlight( file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, lineanchors="foo"))
def highlight_code(self, search_count, rank, targetpath): html_snippets = [] matched_line_numbers = self.matched_line_number() if matched_line_numbers: snippet_cluster_lns = self.compute_lines_to_highlight( self.adjacent_line_numbers()) snippets = [] ##################### Data provision ###################### pure_snippets_for_data_requirement = [] ## #snippet_cluster_lns ##lines_to_be_highlighted #self.file_content_lines ##entire code for snippet_cluster_ln in snippet_cluster_lns: snippet = [] for n in snippet_cluster_ln: snippet.append(self.file_content_lines[n]) pure_snippets_for_data_requirement.append( self.file_content_lines[n]) start_line = min(snippet_cluster_ln) # end_line = max(snippet_cluster_ln) highlight_lines = map(lambda x: x - start_line + 1, self.matching_line_numbers) snippets.append( ("\n".join(snippet), start_line, highlight_lines)) # #Data provision_ Defect4J (모든) 각 쿼리별 랭킹순으로 하이라이트 라인번호 + 전체코드 파일 final_str = "\n".join(self.file_content_lines) original_project_list = [ 'knutwalker_google-closure-compiler', 'google_closure-compiler', 'weitzj_closure-compiler', 'jfree_jfreechart-fse', 'jfree_jfreechart', 'apache_commons-math', 'apache_commons-lang', 'mockito_mockito', 'bryceguo_mockito' ] if not self.file_path.split( '/' )[6] in original_project_list: #Check the duplicate projects. purepath = targetpath[:-4] if not os.path.exists(purepath): os.makedirs(purepath) testcode_path = purepath + '_result_testcode' if not os.path.exists(testcode_path): os.makedirs(testcode_path) if self.file_path.split( '/')[-1] == 'Test.java': # 결과 파일이 Test.java 인 것들 걸러내기 pass else: final_path = purepath + "/" + str(rank) + '_' + str( "||".join(self.file_path.split('/')[6:])) write_file(final_path, str(final_str)) write_file(final_path + "_", str(snippet_cluster_lns)) print "*****************************", final_path, "is Done.." # 여기서 test code 또한 찾아서 셋트로 돌려줘보자. # >> 각각 결과코드에 해당하는 프로젝트 경로를 새로 다 뒤져서 현재 결과파일 앞뒤로 test 키워드 들어있는 파일들을 찾아본다. # >> 테스트 파일 찾았으면 복붙 result_file_name = self.file_path.split('/')[-1] result_pure_file_name = ( (self.file_path.split('/')[-1]).split('.') )[0] #pure name of the java file (e.g., ABC.java -> ABC) stopwords = ['A', 'a', 'test'] javafiles = java_files_from_dir('/'.join( self.file_path.split('/')[:7])) for javafile in javafiles: if result_pure_file_name in stopwords: continue name_of_javafile = javafile.split('/')[-1].split( '.')[0] if 'test' in name_of_javafile and result_pure_file_name in name_of_javafile: #해당 결과파일의 이름과 'test' 라는 키워드가 들어가있는 파일이면 (e.g., xxxtest.java or testxxx.java) content = read_file(javafile) testcode_path = testcode_path + '/' + str( rank) + '_' + str('||'.join( javafile.split('/')[6:])) write_file(testcode_path, content) write_file('/Users/Falcon/Desktop/count.txt', testcode_path) write_file('/Users/Falcon/Desktop/count.txt', javafile) write_file('/Users/Falcon/Desktop/count.txt', '**********************') html_snippets = [ highlight(snippet[0], JavaLexer(), LatexFormatter(linenos=True, linenostart=snippet[1])) for snippet in snippets ] self.code_snippets = [ GitSearchItemSnippet(self.hl_snippet(snippet[0], snippet[1]), snippet[1]) for snippet in snippets ] if not html_snippets: html_snippets.append( highlight(self.file_content, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True))) self.code_snippets.append( GitSearchItemSnippet(self.hl_snippet(self.file_content, 0), 0)) return "".join(html_snippets)
def hl_snippet(self, source, start): return highlight(source, JavaLexer(), HtmlFormatter(linenos=True, anchorlinenos=True, linenostart=start))