def process_diff_srcml(data,type): data = data.replace("shuangyinhao","") splits = data.split("\n") del splits[0] new_splits = list() for split in splits: if split: if split[0] == "+" or split[0] == "-": new_splits.append(split[1:]) else: new_splits.append(split) new_data = "\n".join(new_splits) if type == 1: lang = "cs" else: lang = "java" name = "Temp." + lang with open(name,"w") as f: f.write(new_data) command = "srcml " + name + " > Temp.xml" os.system(command) tree = parse_tree("Temp.xml") root = tree.getroot() parent_map = get_parent_map(tree) tree_str = "" processed_code = iterate_recursive(root,tree_str,parent_map) processed_code = process_srcml_source_code(processed_code) return processed_code
def process_diff_srcml(data, type): data = data.replace("shuangyinhao", "") splits = data.split("\n") del splits[0] new_splits = list() for split in splits: if split: if split[0] == "+" or split[0] == "-": new_splits.append(split[1:]) else: new_splits.append(split) new_data = "\n".join(new_splits) if type == 1: lang = "cs" else: lang = "java" name = "Temp." + lang with open(name, "w") as f: f.write(new_data) command = "srcml " + name + " > Temp.xml" os.system(command) tree = parse_tree("Temp.xml") root = tree.getroot() parent_map = get_parent_map(tree) tree_str = "" processed_code = iterate_recursive(root, tree_str, parent_map) processed_code = process_srcml_source_code(processed_code) return processed_code
def process_expression(data, type): if type == 1: lang = "cs" else: lang = "java" name = "Temp." + lang with open(name, "w") as f: f.write(data) command = "srcml " + name + " > Temp.xml" os.system(command) tree = parse_tree("Temp.xml") root = tree.getroot() parent_map = get_parent_map(tree) tree_str = "" processed_code = iterate_recursive(root, tree_str, parent_map) processed_code = processed_code.split(" ") removed_empty = [x.strip() for x in processed_code if x.strip()] processed_code = " ".join(removed_empty) return processed_code
def pre_process(file_path): split = file_path.split("/") try: tree = parse_tree(file_path) file_type = 0 if split[7] == "cs": file_type = 1 parent_map = get_parent_map(tree) tree_str = "" root = tree.getroot() # C# # if file_type == 1: # namespace = find_name_space(root) # biggest_block = find_block(namespace) # processed_code = iterate_recursive(biggest_block,tree_str,parent_map) # # Java # else: processed_code = iterate_recursive(root,tree_str,parent_map) processed_code = process_srcml_source_code(processed_code) # print(processed_code) srcml_data_path = os.path.join(CURRENT_DIR,"SRCML_PROCESSED_DATA_SPLIT_CAMEL_ALL_V3",split[6],split[7]) if not os.path.exists(srcml_data_path): os.makedirs(srcml_data_path) new_path = os.path.join(CURRENT_DIR,"SRCML_PROCESSED_DATA_SPLIT_CAMEL_ALL_V3",split[6],split[7],split[8]) with codecs.open(new_path,"a",encoding="utf-8", errors="ignore") as f2: f2.write(processed_code) except Exception as e: print(e)
def process_expression(data,type): if type == 1: lang = "cs" else: lang = "java" name = "Temp." + lang with open(name,"w") as f: f.write(data) command = "srcml " + name + " > Temp.xml" os.system(command) tree = parse_tree("Temp.xml") root = tree.getroot() parent_map = get_parent_map(tree) tree_str = "" processed_code = iterate_recursive(root,tree_str,parent_map) processed_code = processed_code.split(" ") removed_empty = [x.strip() for x in processed_code if x.strip()] processed_code = " ".join(removed_empty) return processed_code
java_paths.append(file_path) for cs_path in cs_paths: for java_path in java_paths: cs_splits = cs_path.split("/") java_splits = java_path.split("/") cs_file = cs_splits[8] java_file = java_splits[8] if cs_file.split(".")[0] == java_file.split(".")[0]: print "###################" print cs_file try: cs_tree = parse_tree(cs_path) cs_parent_map = get_parent_map(cs_tree) cs_tree_str = "" cs_root = cs_tree.getroot() java_tree = parse_tree(java_path) java_parent_map = get_parent_map(java_tree) java_tree_str = "" java_root = java_tree.getroot() cs_class_node = get_class_node_cs(cs_root) java_class_node = get_class_node_java(java_root) biggest_block_cs = None biggest_block_java = None for c in cs_class_node.getchildren():
split = file_path.split("/") if split[7] == "cs": cs_paths.append(file_path) for cs_path in cs_paths: try: print( "---------------------------------------------------------------------------------" ) print(cs_path) cs_splits = cs_path.split("/") cs_file = cs_splits[8] cs_parent_map, cs_global_vars_mapping, cs_package_object_mapping, cs_object_method_mapping, cs_third_party_package_object_mapping_list, cs_third_party_object_method_mapping_list = get_necessary_information_to_process_source_code( cs_path, "cs", project) cs_tree = parse_tree(cs_path) cs_parent_map = get_parent_map(cs_tree) cs_tree_str = "" cs_root = cs_tree.getroot() cs_class_node = get_class_node_cs(cs_root) biggest_block_cs = None for c in cs_class_node.getchildren(): tag = c.tag.replace(STUPID_URL, "") if tag == "block": biggest_block_cs = c biggest_block = find_biggest_block(cs_class_node) decl_stmts_global = get_all_decl_stmt_from_block_global( biggest_block) global_vars_mapping = get_information_of_decl_stmts( decl_stmts_global)
print("Index : " + str(count) + "-------------------------------------------------------------") if count > 0: try: # java_paths.append(file_path) # print("---------------------------------------------------------------------------------") print(java_path) java_splits = java_path.split("\\") java_parent_map, java_global_vars_mapping = get_necessary_information_to_process_source_code( java_path, lang, project) java_tree = parse_tree(java_path) java_parent_map = get_parent_map(java_tree) java_tree_str = "" java_root = java_tree.getroot() imports = extract_all_import(java_root, lang) # print(imports) candidate_sdk_packages = get_candidate_sdk_packages_from_import_list( imports, lang) # print(candidate_sdk_packages) java_class_node = get_class_node_java(java_root) biggest_block_java = None for j in java_class_node.getchildren(): tag = j.tag.replace(STUPID_URL, "") if tag == "block":
project = "openjdk-8_temp" lang = "java" java_signatures = list() signature_path = "./SIGNATURE_DATA/" + project print "Signature path : " + signature_path for r, ds, files in os.walk( os.path.join(CURRENT_DIR, "SRCML_DATA_2", lang, project)): for file in files: file_path = os.path.join(r, file) print "Parsing file : " + file_path + " --------------------------" try: tree = parse_tree(file_path) root = tree.getroot() parent_map = get_parent_map(tree) decorations = ["class", "interface"] for decoration in decorations: java_class_nodes = list() java_class_nodes = iterate_to_get_node_with_type( root, decoration, java_class_nodes) biggest_block_java = None class_name = None if len(java_class_nodes) != 0: # print java_class_nodes[0].text for c in java_class_nodes[0].getchildren(): tag = c.tag.replace(STUPID_URL, "") if tag == "block": biggest_block_java = c
for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA_" + str(i),lang)): for file in files: cs_path = os.path.join(r,file) split = cs_path.split("/") count = count + 1 print("Index : " + str(count) + "-------------------------------------------------------------") if count > 1791377: try: # print("---------------------------------------------------------------------------------") print(cs_path) cs_splits = cs_path.split("/") cs_file = cs_splits[8] cs_parent_map, cs_global_vars_mapping = get_necessary_information_to_process_source_code(cs_path, lang, project) cs_tree = parse_tree(cs_path) cs_parent_map = get_parent_map(cs_tree) cs_tree_str = "" cs_root = cs_tree.getroot() imports = extract_all_import(cs_root,lang) # print(imports) candidate_sdk_packages = get_candidate_sdk_packages_from_import_list(imports,lang) # print(candidate_sdk_packages) cs_class_node = get_class_node_cs(cs_root) biggest_block_cs = None for c in cs_class_node.getchildren(): tag = c.tag.replace(STUPID_URL,"") if tag == "block": biggest_block_cs = c
for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA",project)): for file in files: file_path = os.path.join(r,file) split = file_path.split("/") if split[7] == "java": java_paths.append(file_path) for java_path in java_paths: try: print("--------------------") print(java_path) java_splits = java_path.split("/") java_file = java_splits[8] java_parent_map, java_global_vars_mapping, java_package_object_mapping, java_object_method_mapping, java_third_party_package_object_mapping_list, java_third_party_object_method_mapping_list = get_necessary_information_to_process_source_code(java_path, "java", project) java_tree = parse_tree(java_path) java_parent_map = get_parent_map(java_tree) java_tree_str = "" java_root = java_tree.getroot() java_class_node = get_class_node_java(java_root) biggest_block_java = None for j in java_class_node.getchildren(): tag = j.tag.replace(STUPID_URL,"") if tag == "block": biggest_block_java = j biggest_block = find_biggest_block(java_class_node) decl_stmts_global = get_all_decl_stmt_from_block_global(biggest_block) global_vars_mapping = get_information_of_decl_stmts(decl_stmts_global)
project = "corefx" lang = "cs" signature_path = "./SIGNATURE_DATA/" + project print "Signature path : " + signature_path for r,ds,files in os.walk(os.path.join(CURRENT_DIR,"SRCML_DATA_2",lang,project)): for file in files: file_path = os.path.join(r, file) print "Parsing file : " + file_path + " --------------------------" try: tree = parse_tree(file_path) root = tree.getroot() parent_map = get_parent_map(tree) decorations = ["class","interface","struct"] for decoration in decorations: cs_class_nodes = list() cs_class_nodes = iterate_to_get_node_with_type(root,decoration,cs_class_nodes) biggest_block_cs = None class_name = None if len(cs_class_nodes) != 0: for c in cs_class_nodes[0].getchildren(): tag = c.tag.replace(STUPID_URL,"") if tag == "block": biggest_block_cs = c if tag == "name":
for cs_path in cs_paths: for java_path in java_paths: cs_splits = cs_path.split("/") java_splits = java_path.split("/") cs_file = cs_splits[8] java_file = java_splits[8] if cs_file.split(".")[0] == java_file.split(".")[0]: print "###################" print cs_file try: cs_tree = parse_tree(cs_path) cs_parent_map = get_parent_map(cs_tree) cs_tree_str = "" cs_root = cs_tree.getroot() java_tree = parse_tree(java_path) java_parent_map = get_parent_map(java_tree) java_tree_str = "" java_root = java_tree.getroot() cs_class_node = get_class_node_cs(cs_root) java_class_node = get_class_node_java(java_root) biggest_block_cs = None biggest_block_java = None for c in cs_class_node.getchildren():