def inconsistent_lib(): inconsistent_data = read_json("../action-8-6.json") result = {} for proj_id in inconsistent_data: proj_data = inconsistent_data[proj_id] for jar in proj_data: jar = "__fdse__".join(jar.split("__fdse__")[0:2]) if jar not in result: result[jar] = [] if proj_id not in result[jar]: result[jar].append(proj_id) print(len(result)) write_json_format(output_dir + "inconsistent.json", result) datas = read_json(output_dir + "inconsistent.json") new_data = {} for lib in datas: new_data[lib] = len(datas[lib]) sorted_usage = sorted(new_data.items(), key=lambda d: d[1], reverse=True) sorted_usage = sorted_usage[:20] print(sorted_usage) sorted_usage = sorted_usage[::-1] print(sorted_usage) values = [value for key, value in sorted_usage] keys = [key for key, value in sorted_usage] draw_barh(keys, values, "The Number of Projects (#)")
def call_graph_preproess(): dir = "call_graph" files = os.listdir(dir) for file in files: # if file != "2590.txt": # continue print(file) call_graph = read_json(os.path.join(dir, file)) for java_file in call_graph: java_file_obj = call_graph[java_file] for jar in java_file_obj: if jar == "module": continue jar_obj = java_file_obj[jar] for new_jar in jar_obj: new_jar_obj = jar_obj[new_jar] for api in new_jar_obj: api_obj = new_jar_obj[api] new_obj = {} for method in api_obj: if "<init>" in method: new_method = parse_init(method) new_obj[new_method] = api_obj[method] else: new_obj[method.replace("$", ".")] = api_obj[method] new_jar_obj[api] = new_obj write_json("call_graph_preprocessed/" + file, call_graph)
def get_lib_in_tongjiresult(): count = 0 final = {} json_data = read_json("../tongjiresult-8-5.json") for file in json_data: print(file) new_proj = {} file_obj = json_data[file] for jar in file_obj: trees = file_obj[jar] for tree_id in trees: subtree = trees[tree_id] count += len(subtree) for entry in subtree: usePostion = entry["usePostion"] version = entry["resolved_version"] if not usePostion.endswith("/pom.xml"): _module = usePostion.replace("pom.xml", "") else: _module = usePostion.replace("/pom.xml", "") if _module in new_proj: if jar in new_proj[_module]: print(jar + " " + new_proj[_module][jar]) sys.exit(0) else: new_proj[_module] = {} new_proj[_module][jar] = version final[file] = new_proj write_json("datas/tongji_libs.json", final)
def version2jar(version_lib): lib_jar_pair = read_json("datas/lib_jar_pair.txt") if version_lib in lib_jar_pair: return lib_jar_pair[version_lib] else: lib_array = version_lib.split("__fdse__") version = lib_array[0] groupId = lib_array[1] artifactId = lib_array[2] _type = lib_array[3] try: sql = "SELECT id FROM library_versions WHERE group_str = '" + groupId + "' and name_str = '" + artifactId + "' and version = '" + version + "'" version_id = database.querydb(db, sql)[0][0] # print("version id : " + str(version_id)) if len(lib_array) > 4: classifier = lib_array[4] sql = "SELECT jar_package_url FROM version_types WHERE (version_id = " + str(version_id) + " or version_id2 = " +str(version_id)+ ") and type = '" +_type+ "' and classifier = '" +classifier+ "'" else: sql = "SELECT jar_package_url FROM version_types WHERE (version_id = " + str(version_id) + " or version_id2 = " +str(version_id)+ ") and type = '" +_type+ "' and (classifier is null or classifier='')" jar_package_url = database.querydb(db, sql)[0][0] lib_jar_pair[version_lib] = jar_package_url print("jar2version: " + version_lib + " -> " + jar_package_url) write_json("datas/lib_jar_pair.txt", lib_jar_pair) return jar_package_url except: sys.stderr.write("Can't find jar name for : " + version_lib + "\n") sys.exit(0)
def false_consistent_lib(): # total_ununified = read_json("../action-ununified-8-6.json") # result = {} # for proj_id in total_ununified: # proj_data = total_ununified[proj_id] # for jar in proj_data: # jar = "__fdse__".join(jar.split("__fdse__")[0:2]) # if jar not in result: # result[jar] = [] # if proj_id not in result[jar]: # result[jar].append(proj_id) # print(len(result)) # write_json_format(output_dir + "false_consistent.json", result) datas = read_json(output_dir + "false_consistent.json") new_data = {} for lib in datas: new_data[lib] = len(datas[lib]) # print(new_data) sorted_usage = sorted(new_data.items(), key=lambda d: d[1], reverse=True) sorted_usage = sorted_usage[:20] print(sorted_usage) sorted_usage = sorted_usage[::-1] print(sorted_usage) values = [value for key, value in sorted_usage] keys = [key for key, value in sorted_usage] draw_barh(keys, values, "The Number of Projects (#)")
def add_lib_pair(): lib_jar_pair = read_json("datas/lib_jar_pair.txt") print(len(lib_jar_pair)) lib_jar_pair[ "2.0.0-DP.2__fdse__com.netflix.turbine__fdse__turbine-core__fdse__jar"] = "turbine-core-2.0.0-DP.2.jar" print(len(lib_jar_pair)) write_json("datas/lib_jar_pair.txt", lib_jar_pair)
def get_proj_dict(): result = {} json_data = read_json("E:/data/200_plus.txt") for entry in json_data: proj_id = entry["id"] proj_name = entry["name"] result[str(proj_id)] = proj_name return result
def json_to_format(): dir = "call_graph_preprocessed" files = os.listdir(dir) for file in files: # if file != "1383.txt": # continue data = read_json(os.path.join(dir, file)) write_json_format("format/" + file.replace(".txt", ".json"), data)
def project_id2name(): proj_dict = get_proj_dict() data = read_json("datas/large_images.json") new_data = {} for id in data: proj_name = proj_dict[id].replace("/", " ") print(proj_name) new_data[proj_name] = data[id] write_json_format("datas/large_images.json", new_data)
def get_proj_dict(): json_data = read_json("E:/data/200_plus.txt") # print(len(json_data)) proj_dict = {} for entry in json_data: id = entry["id"] name = entry["name"].replace("__fdse__", "/") proj_dict[str(id)] = name return proj_dict
def check_modules(): hit = 0 total = 0 proj_dict = get_proj_dict() modules = read_json("C:/Users/yw/Desktop/modules_maven.json") print(len(modules)) return json_data = read_json("../tongjiresult-7-17.json") for proj_id in json_data: # if proj_id != "526": # continue print("+++++++++++++++++++++++++++++ " + proj_id) proj_name = proj_dict[proj_id.replace(".txt", "")] if proj_name not in modules: proj_modules = [] else: proj_modules = modules[proj_name] proj_obj = json_data[proj_id] temp_modules = set() for jar in proj_obj: jar_obj = proj_obj[jar] for tree_id in jar_obj: subtree = jar_obj[tree_id] for entry in subtree: # total += 1 use_position = entry["usePostion"] if not use_position.endswith("/pom.xml"): _module = use_position.replace("pom.xml", "") else: _module = use_position.replace("/pom.xml", "") temp_modules.add(_module) for m in temp_modules: total += 1 if m in proj_modules: hit += 1 else: print(m) print(total) print(hit)
def get_pair(): lib_jar_pair = {} db = database.connectdb() libs = read_json("datas/tongji_libs.json") for proj in libs: proj_obj = libs[proj] for _module in proj_obj: module_obj = proj_obj[_module] for jar in module_obj: version = module_obj[jar] if version + "__fdse__" + jar in lib_jar_pair: continue jar_array = jar.split("__fdse__") groupId = jar_array[0] artifactId = jar_array[1] _type = jar_array[2] classifier = None if len(jar_array) > 3: classifier = jar_array[3] if "${" in version: continue # print(groupId + " " + artifactId + " " + version) sql = "SELECT id FROM library_versions WHERE group_str = '" + groupId + "' and name_str = '" + artifactId + "' and version = '" + version + "'" library_info = database.querydb(db, sql) library_id = None if len(library_info) > 0: library_id = library_info[0][0] else: sys.stderr.write(groupId + " " + artifactId + " " + version + " library_info" + "\n") sys.stderr.write(str(library_id) + "\n") continue if classifier is None: sql = "SELECT jar_package_url FROM version_types WHERE (version_id = " + str( library_id ) + " or version_id2 = " + str( library_id ) + ") and type = '" + _type + "' and classifier is null" else: sql = "SELECT jar_package_url FROM version_types WHERE (version_id = " + str( library_id ) + " or version_id2 = " + str( library_id ) + ") and type = '" + _type + "' and classifier = '" + classifier + "'" jar_result = database.querydb(db, sql) if len(jar_result) > 0: lib_jar_pair[version + "__fdse__" + jar] = jar_result[0][0] else: sys.stderr.write(groupId + " " + artifactId + " " + version + " " + str(library_id)) write_json("lib_jar_pair.txt", lib_jar_pair)
def maven_count(): projects = read_json("E:/data/200_plus_with_type.txt") print(len(projects)) count = 0 maven_projs = [] for proj in projects: _type = proj["type"] if _type == "maven": count += 1 maven_projs.append(proj) print(count) write_json_format("E:/data/200_plus_maven.txt", maven_projs)
def get_version_of_jar(version): lib_jar_pair_reverse = read_json("datas/lib_jar_pair_reverse.txt") if version in lib_jar_pair_reverse: return lib_jar_pair_reverse[version].split("__fdse__")[0] try: sql = "SELECT version_id FROM version_types WHERE jar_package_url = '" + version + "'" version_id = database.querydb(db, sql)[0][0] sql = "SELECT version FROM library_versions WHERE id = " + str(version_id) + "" new_version = database.querydb(db, sql)[0][0] except: new_version = jar2version(version) print("jar2version: " + version + " -> " + new_version) return new_version
def no_module_maven_projs(): modules = read_json("C:/Users/yw/Desktop/modules_maven.json") print(len(modules)) count = 0 new_modules = {} for proj in modules: if len(modules[proj]) == 0: count += 1 else: new_modules[proj] = modules[proj] print(count) print(len(new_modules)) write_json_format("E:/data/multiversion/modules_maven.json", new_modules)
def get_all_modules(): tongji = read_json("datas/tongji_libs.json") final = {} for file in tongji: print(file) file_obj = tongji[file] modules = [] for pom in file_obj.keys(): if not pom.endswith("/pom.xml"): _module = pom.replace("pom.xml", "") # print(_module) else: _module = pom.replace("/pom.xml", "") modules.append(_module) final[file] = modules write_json("datas/tongji_modules.json", final)
def ic_fc(): fc_ic = read_json(output_dir + "meta-popular-fc-ic-lib.json") data = fc_ic["ic"] xlabel = "Usage Number" ylabel = "Most Pervasive Inconsistent Libraries" # data = fc_ic["fc"] # xlabel = "Usage Number" # ylabel = "Most Pervasive False Consistent Libraries" data = dict(data) sorted_usage = sorted(data.items(), key=lambda d: d[1], reverse=True) sorted_usage = sorted_usage[:20] sorted_usage = sorted_usage[::-1] values = [value for key, value in sorted_usage] keys = [ ":".join(key.split("__fdse__")[0:2]) for key, value in sorted_usage ] draw_barh(keys, values, xlabel, ylabel)
def ic_fc_dot(): fc_ic = read_json(output_dir + "rq1-scatterdot.json") fc_keys = [] fc_values = [] ic_keys = [] ic_values = [] for entry in fc_ic: if entry[1] == "fc": if entry[2] <= 20 and entry[3] <= 200: fc_values.append(entry[2]) fc_keys.append(entry[3]) else: if entry[2] <= 20 and entry[3] <= 200: ic_values.append(entry[2]) ic_keys.append(entry[3]) xlabel = "The Number of Modules (#)" ylabel = "The Number of Libraries (#)" draw_dots(fc_keys, fc_values, ic_keys, ic_values, xlabel, ylabel)
def add_lib_pair(): lib_jar_pair = read_json("datas/lib_jar_pair.txt") print(len(lib_jar_pair)) lib_jar_pair["3.0.20100224__fdse__javax.servlet__fdse__servlet-api__fdse__jar"] = "servlet-api-3.0.20100224.jar" print("jar2version: " + "3.11.0-SNAPSHOT__fdse__com.squareup.okhttp__fdse__mockwebserver__fdse__jar" + " -> " + "mockwebserver-3.11.0-20180713.034253-175.jar") print(len(lib_jar_pair)) write_json("datas/lib_jar_pair.txt", lib_jar_pair) # uncomparable : 2.6.0-cdh5.12.2 2.6.0-cdh5.13.1uncomparable : 4600 : org.apache.hadoop__fdse__hadoop-aws__fdse__jar : 0 # uncomparable : 2.6.0-cdh5.10.1 2.6.0-cdh5.12.2uncomparable : 4600 : org.apache.hadoop__fdse__hadoop-client__fdse__jar : 0 # uncomparable : 2.6.0-cdh5.10.1 2.6.0-cdh5.12.2uncomparable : 4600 : org.apache.hadoop__fdse__hadoop-minicluster__fdse__jar : 0 # uncomparable : 4.10.3-cdh5.8.5 4.10.3-cdh5.5.6uncomparable : 4600 : org.apache.solr__fdse__solr-test-framework__fdse__jar : 0 # uncomparable : 3.4.5-cdh5.9.2 3.4.5-mapr-1503uncomparable : 4600 : org.apache.zookeeper__fdse__zookeeper__fdse__jar : 0 # uncomparable : 1.2.0-cdh5.14.0 1.2.0-cdh5.15.0uncomparable : 4600 : org.apache.hbase__fdse__hbase-testing-util__fdse__jar : 0 # uncomparable : 1.1.0-cdh5.4.11 1.1.0-cdh5.5.6uncomparable : 4600 : org.apache.hive.hcatalog__fdse__hive-hcatalog-streaming__fdse__jar : 0 # uncomparable : 1.7.6-cdh5.15.0 1.7.6-cdh5.14.0uncomparable : 4600 : org.apache.avro__fdse__avro__fdse__jar : 0 # uncomparable : 1.6.0-cdh5.12.2 1.6.0-cdh5.10.1uncomparable : 4600 : org.apache.flume__fdse__flume-ng-configuration__fdse__jar : 0 # uncomparable : 1.7.6-cdh5.15.0 1.7.6-cdh5.14.0uncomparable : 4600 : org.apache.avro__fdse__avro-mapred__fdse__jar__fdse__hadoop2 : 0 # hadoop-common-3.0.0-tests.jar 3.0.0 type=test-jar # hadoop-common-3.1.0.jar 3.1.0 # httpclient-4.2.1-atlassian-2.jar 4.2.1-atlassian-2 # hadoop-common-3.0.0-beta1.jar 3.0.0-beta1 # hadoop-common-3.2.0-20180809.000209-1000.jar 3.2.0-SNAPSHOT # hadoop-common-2.6.0-cdh5.7.0.jar 2.6.0-cdh5.7.0 # hadoop-common-2.7.3.2.6.1.0-129.jar 2.7.3.2.6.1.0-129 # spark-core_2.10-2.2.0.jar spark-core_2.10 2.2.0 # javax.ws.rs-api-2.0-m15.jar 2.0-m15 # guava-23.1-jre.jar 23.1-jre # hbase-server-2.0.0-beta-1.jar 2.0.0-beta-1 # hbase-client-1.1.2.2.6.2.1-1.jar 1.1.2.2.6.2.1-1 # guava-23.0-android.jar 23.0-android # print(compare("3.4.5-cdh5.9.2", "3.4.5-mapr-1503")) # total_efforts = {"test1":131, "test4":43,"test3":531,"test2":43} # min = min(total_efforts.values()) # print(min) # min_list = [k for k, v in total_efforts.items() if v == min] # print(min_list) # print(jar2version("guava-23.0-android.jar")) # add_lib_pair()
def proj_modules(): # pom_module_count = read_json("datas/pom_module_count.json") # result = {} # for proj_id in pom_module_count: # module_count = pom_module_count[proj_id][1] # if module_count == 0: # continue # if module_count not in result: # result[module_count] = [] # if proj_id not in result[module_count]: # result[module_count].append(proj_id) # print(len(result)) # write_json_format(output_dir + "proj_modules.json", result) # inconsistent_data = read_json("../action-8-6.json") # total_ununified = read_json("../action-ununified-8-6.json") # data = read_json(output_dir + "proj_modules.json") # new_data = {} # for module_cnt in data: # projs = data[module_cnt] # total = len(projs) # inconsistent_cnt = 0 # false_consistent_cnt = 0 # for proj in projs: # if proj in inconsistent_data: # inconsistent_cnt += 1 # if proj in total_ununified: # false_consistent_cnt += 1 # new_data[module_cnt] = [total, inconsistent_cnt, false_consistent_cnt] # write_json_format(output_dir + "module_proj_type.json", new_data) module_proj_type = read_json(output_dir + "module_proj_type.json") module_proj_type = sorted(module_proj_type.items(), key=lambda d: int(d[0])) # keys = [] # inconsistent_data = [] # false_consistent_data = [] # total_data = [] # for entry in module_proj_type: # module_cnt = entry[0] # proj_type_cnt = entry[1] # keys.append(module_cnt) # inconsistent_data.append(proj_type_cnt[1]) # false_consistent_data.append(proj_type_cnt[2]-proj_type_cnt[1]) # total_data.append(proj_type_cnt[0]-proj_type_cnt[2]) keys = [''] * 21 inconsistent_data = [0] * 21 false_consistent_data = [0] * 21 total_data = [0] * 21 for i in range(0, 20): start = i * 5 end = i * 5 + 5 keys[i] = str(start + 1) + "-" + str(end) keys[20] = '>100' for entry in module_proj_type: module_cnt = int(entry[0]) proj_type_cnt = entry[1] index = None if module_cnt > 100: index = 20 # elif module_cnt > 90 and module_cnt <= 100: # index = 54 # elif module_cnt > 80 and module_cnt <= 90: # index = 53 # elif module_cnt > 70 and module_cnt <= 80: # index = 52 # elif module_cnt > 60 and module_cnt <= 70: # index = 51 # elif module_cnt > 50 and module_cnt <= 60: # index = 50 else: index = module_cnt // 5 index = int(round(index, 0)) if module_cnt % 5 == 0: index -= 1 if index < 0: index = 0 # inconsistent_data[index] += proj_type_cnt[1] # false_consistent_data[index] += proj_type_cnt[2]-proj_type_cnt[1] # total_data[index] += proj_type_cnt[0]-proj_type_cnt[2] inconsistent_data[index] += proj_type_cnt[1] false_consistent_data[index] += proj_type_cnt[2] total_data[index] += proj_type_cnt[0] pop_indices = [] for i in range(0, len(total_data)): if total_data[i] == 0 and inconsistent_data[ i] == 0 and false_consistent_data[i] == 0: pop_indices.append(i) tag = 0 for e in pop_indices: keys.pop(e - tag) inconsistent_data.pop(e - tag) false_consistent_data.pop(e - tag) total_data.pop(e - tag) tag += 1 draw_mulitbar(keys, inconsistent_data, false_consistent_data, total_data, 'The Number of Modules in a Project (#)', 'The Number of Projects (#)')
def read_actions(): good = [ 3584, 3590, 1548, 526, 3600, 3088, 532, 2580, 21, 1556, 534, 3606, 2075, 2590, 30, 32, 3105, 2594, 2087, 40, 1582, 572, 60, 4672, 3648, 4161, 1602, 68, 73, 1098, 2123, 2635, 3068, 79, 1618, 83, 1107, 3070, 1633, 610, 102, 2663, 4202, 3186, 1139, 118, 119, 633, 123, 1659, 125, 638, 1665, 1156, 133, 136, 649, 4746, 5261, 3214, 3220, 660, 148, 4247, 1691, 155, 672, 2209, 678, 3238, 3244, 3757, 5295, 1202, 1716, 692, 1722, 700, 190, 4288, 707, 709, 3784, 1738, 4811, 3277, 209, 5330, 5333, 1238, 217, 221, 1758, 1765, 2792, 1770, 236, 3822, 4846, 751, 2808, 1784, 3323, 1792, 2307, 3333, 2826, 1806, 1809, 2834, 1819, 5412, 3367, 3370, 2871, 1852, 1342, 2880, 1344, 2886, 1866, 1356, 5456, 338, 3411, 853, 2902, 1879, 1878, 2405, 1383, 361, 2923, 369, 1403, 383, 1416, 1419, 2450, 1430, 407, 2973, 1439, 2975, 5538, 2980, 5033, 1964, 941, 1452, 1971, 1468, 3005, 1992, 4041, 4049, 2004, 477, 3042, 486, 3568, 1520, 3062, 1528, 508, 2558 ] total_index = read_json("datas/tongji_with_index.json") total_action = read_json("../action-8-6.json") total_ununified = read_json("../action-ununified-8-6.json") for proj_id in total_action: # if not os.path.exists("call_graph_preprocessed/" + proj_id + ".txt"): # continue # # jar-lib problem if proj_id == "1120": continue # # bug # if proj_id == "1618": # continue # if proj_id != "1286": # continue print(proj_id) # call_graph = read_json("call_graph_preprocessed/" + proj_id + ".txt") proj_output = {} proj_multi_output = {} inconsistent_lib = 0 modules_related = 0 multilib_versions = 0 for jar in total_action[proj_id]: # if jar != "org.easymock__fdse__easymock__fdse__jar": # continue inconsistent_lib += 1 # module_versions = collections.OrderedDict() # module_properties = collections.OrderedDict() tree_actions = OrderedDict() tree_indices = OrderedDict() module_versions = [] module_properties = [] # tree_actions = [] # tree_indices = [] # tree_files = collections.OrderedDict() desc_dict = None tree_numbers = {} index = 0 for tree_id in total_action[proj_id][jar]: if tree_id == "desc_dict": desc_dict = total_action[proj_id][jar][tree_id] continue subtree = total_action[proj_id][jar][tree_id] temp = subtree[-1] no_action = False if "action_update_define_version_value" not in temp: sys.stderr.write(proj_id + " " + jar + " " + tree_id + "\n") tree_actions[tree_id] = "no action" no_action = True action_jar = None if not no_action: action_pos = temp["action_update_define_pos"] action_version = temp[ "action_update_define_version_value"].split("=")[1] action_jar = version2jar(action_version + "__fdse__" + jar) # subtree_index = collections.OrderedDict() subtree_index = [] # subtree_file = collections.OrderedDict() subtree_number = None for i in range(0, len(subtree)): if "action_update_define_version_value" in subtree[ i] or "skip" in subtree[i]: continue usePostion = subtree[i]["usePostion"] _module = get_module(usePostion) # module_versions[_module] = subtree[i]["resolved_version"] # module_properties[_module] = subtree[i]["propertyName"] module_versions.append( [_module, subtree[i]["resolved_version"]]) module_properties.append( [_module, subtree[i]["propertyName"]]) index += 1 if subtree_number is None: subtree_number = [index] subindex = total_index[proj_id][jar][tree_id][i] if get_module(subindex["usePostion"]) != _module: sys.stderr.write("module conflict: " + proj_id + " " + jar + " " + tree_id + " " + str(i) + "\n") sys.exit(0) if action_jar is None: # subtree_index[_module] = [] subtree_index.append([_module, []]) elif "type" in subindex["index"] and subindex["index"][ "type"] == "no_api_use": # subtree_index[_module] = [0, 0, 0, 0] subtree_index.append([_module, [0, 0, 0, 0]]) else: api_count = subindex["index"][action_jar]["api_count"] modify_api_count = subindex["index"][action_jar][ "modify_api_count"] delete_api_count = subindex["index"][action_jar][ "delete_api_count"] if type(api_count) == list: api_count = [len(api_count), sum(api_count)] if type(modify_api_count) == list: modify_api_count = [ len(modify_api_count), sum(modify_api_count) ] if type(delete_api_count) == list: delete_api_count = [ len(delete_api_count), sum(delete_api_count) ] # api_count = [len(subindex["index"][action_jar]["api_count"]),sum(subindex["index"][action_jar]["api_count"])] if type(subindex["index"][action_jar]["api_count"]) == list else subindex["index"][action_jar]["api_count"] # modify_api_count = [len(subindex["index"][action_jar]["modify_api_count"]),sum(subindex["index"][action_jar]["modify_api_count"])] # delete_api_count = [len(subindex["index"][action_jar]["delete_api_count"]),sum(subindex["index"][action_jar]["delete_api_count"])] stability = api_count[1] - subindex["index"][ action_jar]["index_value"] if type( api_count) == list else api_count - subindex[ "index"][action_jar]["index_value"] # subtree_index[_module] = [api_count, delete_api_count, modify_api_count, stability] subtree_index.append([ _module, [ api_count, delete_api_count, modify_api_count, stability ] ]) if no_action: continue # files related # jar_name = version2jar(subtree[i]["resolved_version"] + "__fdse__" + jar) # for java_file in call_graph: # java_file_obj = call_graph[java_file] # # module对应,jar包对应 # if _module == java_file_obj["module"] and jar_name in java_file_obj: # if _module not in subtree_file: # subtree_file[_module] = [] # subtree_file[_module].append(java_file) if subtree_number is not None: subtree_number.append(index) tree_numbers[tree_id] = subtree_number tree_indices[tree_id] = subtree_index # tree_files[tree_id] = subtree_file if not no_action: tree_actions[tree_id] = { "action_pos": action_pos, "action_version": action_version } modules_related += len(module_versions) multilib_versions += len(set(dict(module_versions).values())) jar_output = {} jar_output["tree_numbers"] = tree_numbers jar_output["module_versions"] = module_versions jar_output["module_properties"] = module_properties jar_output["tree_actions"] = tree_actions jar_output["tree_indices"] = tree_indices # jar_output["tree_files"] = tree_files jar_output["desc_dict"] = desc_dict proj_multi_output[jar] = jar_output # print(json.dumps(proj_multi_output)) proj_ununified_output = {} if proj_id in total_ununified: proj_ununified_output = read_ununified_proj( total_ununified[proj_id]) proj_output["inconsistent_lib"] = inconsistent_lib proj_output["modules_related"] = modules_related proj_output["multilib_versions"] = multilib_versions proj_output["multilib_output"] = proj_multi_output proj_output["ununified_output"] = proj_ununified_output write_json("datas/pdf_data/" + proj_id + ".json", proj_output) for proj_id in total_ununified: # if proj_id == "1286" or proj_id == "1423" or proj_id == "816" or proj_id == "85": # continue # # jar-lib problem # if proj_id == "349" or proj_id == "1544" or proj_id == "1120" or proj_id == "602" or proj_id == "1464" or proj_id == "4600": # continue # # bug # if proj_id == "1618": # continue if not os.path.exists("datas/pdf_data/" + proj_id + ".json"): proj_output = {} proj_ununified_output = read_ununified_proj( total_ununified[proj_id]) proj_output["ununified_output"] = proj_ununified_output write_json("datas/pdf_data_unify/" + proj_id + ".json", proj_output)
def filter_projs(): data = read_json("C:/Users/yw/Desktop/pom_module_count.json") print(len(data))
def filter_lib_in_callgraph(): # 去除掉call_graph中存在的一个pom中同一个库存在不同版本的情况,同时去除找不到module的java文件 tongji_libs = read_json("datas/tongji_libs.json") db = database.connectdb() dir = "../buggyCallgraph-20190613-93/buggyCallgraph4" files = os.listdir(dir) for file in files: if os.path.exists("call_graph/" + file): continue print(file) proj_id = file.replace(".txt", "") # if file == "2735.txt": # continue if proj_id not in tongji_libs: sys.stderr.write(proj_id + " not in tongji\n") continue new_file = {} content = read_json(os.path.join(dir, file)) for path in content: # print(path) _module = match_module(path, list(tongji_libs[proj_id].keys())) if _module is None: continue # print(_module) obj = content[path] temp_obj = {} for jar_name in obj: sql = "SELECT version_id,type,classifier FROM version_types WHERE jar_package_url = '" + jar_name + "'" type_info = database.querydb(db, sql) version_id = type_info[0][0] _type = type_info[0][1] classifier = type_info[0][2] sql = "SELECT group_str,name_str,version FROM library_versions WHERE id = " + str( version_id) query_result = database.querydb(db, sql) version = query_result[0][2] lib_key = query_result[0][0] + "__fdse__" + query_result[0][ 1] + "__fdse__" + _type if classifier is not None: lib_key += "__fdse__" + classifier if lib_key in temp_obj: if version not in temp_obj[lib_key]: temp_obj[lib_key][version] = jar_name else: temp_obj[lib_key] = {} temp_obj[lib_key][version] = jar_name # print(temp_obj) for key in temp_obj: value = temp_obj[key] if len(value) > 1: count = 0 for version in value: # if key not in tongji[file][_module]: # count += 1 # obj.pop(value[version]) if key in tongji_libs[proj_id][ _module] and version != tongji_libs[proj_id][ _module][key]: count += 1 # print(value[version]) # print(obj) obj.pop(value[version]) if count != 0 and count != len( value) and count != len(value) - 1: print("@!!!!!!!!!!!!!!!!!!!!!!!") obj["module"] = _module new_file[path] = obj write_json("call_graph/" + file, new_file)
def data2pdf(): proj_dict = get_proj_dict() table_style = [('GRID', (0, 0), (-1, -1), 0.3, colors.grey), ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE')] jar_box_style = [('BOX', (0, 0), (-1, -1), 0.8, colors.darksalmon)] dir = "datas/pdf_data_unify" output_dir = "datas/pdf_unify/" large_images = read_json("datas/large_images.json") # dir = "datas/pdf_data" # output_dir = "datas/pdf/" files = os.listdir(dir) # error_proj = ['1342', '1238', '1107', '1344', '1356', '136', '1383', '1419', '1468', '148', '1520', '1582', '1716', '1758', '1770', '1784', '1866', '1878', '2004', '2075', '217', '2209', '236', '2635', '2834', '2886', '2973', '2975', '3105', '3186', '32', '3214', '3220', '3277', '3568', '3606', '3648', '3822', '383', '4049', '407', '4288', '5033', '508', '5538', '572', '60', '692', '709', '751', '83'] # print(len(error_proj)) error_proj = [] for file in files: proj_name = proj_dict[file.replace(".json", "")].replace("/", " ") print(proj_name) # if os.path.exists(output_dir + file.replace(".json", ".pdf")): # continue if os.path.exists(output_dir + proj_name + ".pdf"): continue print(file) proj_data = read_json(os.path.join(dir, file)) story = [] # title rpt_title = title("Project : " + proj_dict[file.replace(".json", "")], 1) story.append(Paragraph(rpt_title, normalStyle)) # definition add_definition(story) # overview inconsistent_lib_cnt = 0 if "inconsistent_lib" in proj_data: inconsistent_lib_cnt = proj_data["inconsistent_lib"] add_overview(story, normalStyle, inconsistent_lib_cnt, len(proj_data["ununified_output"])) if "multilib_output" in proj_data: story.append( Paragraph(title('II. Inconsistent Libraries', 2), normalStyle)) multi_data = proj_data["multilib_output"] multi_jar_cnt = 0 for jar in multi_data: multi_jar_cnt += 1 # if jar != "org.easymock__fdse__easymock__fdse__jar": # continue one_box = [] # jar box one_box.append([ Paragraph( title( str(multi_jar_cnt) + '. ' + ":".join(jar.split("__fdse__")[:2]), 3), normalStyle) ]) # Summery desc_dict = multi_data[jar]["desc_dict"] # add_summary(one_box,desc_dict,None,None,":".join(desc_dict["libraryname"].split("__fdse__")[:2]),str(len(set(multi_data[jar]["module_versions"].keys()))),True) add_summary( one_box, desc_dict, None, None, ":".join(desc_dict["libraryname"].split("__fdse__")[:2]), str( len( set( dict(multi_data[jar] ["module_versions"]).values()))), True) # Multiple versions multi_versions(one_box, table_style, multi_data[jar]) # Unify Recommendation # title2 = '<para autoLeading="off" fontSize=14 align=left leading=16 textColor="black"><b><font>Unify Recommendation</font></b></para>' # one_box.append([Paragraph(title2, normalStyle)]) # one_box.append([Paragraph(title("Harmonization Recommendation", 4), normalStyle)]) one_box.append([ Paragraph( '<para fontSize=13 face="Times" leading=17><b> Harmonization Recommendation</b></para>', normalStyle) ]) # subtrees for tree_id in multi_data[jar]["tree_actions"]: action_version = None if multi_data[jar]["tree_actions"][ tree_id] == "no action" else multi_data[jar][ "tree_actions"][tree_id]["action_version"] action_pos = None if multi_data[jar]["tree_actions"][ tree_id] == "no action" else multi_data[jar][ "tree_actions"][tree_id]["action_pos"] # one tree tree_indices = multi_data[jar]["tree_indices"][tree_id] subtree_numbers = multi_data[jar]["tree_numbers"][tree_id] # modules_list = list(OrderedDict(tree_indices).keys()) # modules_str = ', '.join(modules_list[:50]) + '<br/> ···<br/>' + modules_list[-1] + "<br/>" if len(modules_list) > 50 else ', '.join(modules_list) modules_str = "The inconsistent library versions in Modules " + str( subtree_numbers[0]) + "-" + str(subtree_numbers[1]) # content = modules_str if multi_data[jar]["tree_actions"][tree_id] == "no action" else modules_str + ' → ' + action_version content = modules_str + ' can not be harmonized into a suitable version.' if multi_data[ jar]["tree_actions"][ tree_id] == "no action" else modules_str + ' are harmonized into version ' + action_version + '.' text = '<para fontSize=12 align=left textColor = "darkblue" leading=13>' + content + '</para>' one_box.append([Paragraph(text, normalStyle)]) one_box.append([ Paragraph('<para leading=6> <br/></para>', normalStyle) ]) if multi_data[jar]["tree_actions"][tree_id] == "no action": # text = '<para autoLeading="off" fontSize=9 align=left>There is no new version to recommend.</para>' # one_box.append([Paragraph(text, normalStyle)]) continue # add_image() add_image(one_box, file, jar, tree_id) # efforts add_efforts(one_box, table_style, tree_indices, subtree_numbers) # # files related # add_files_related(one_box, table_style, multi_data[jar]["tree_files"][tree_id]) story = story + list(np.transpose(one_box)[0]) # one_box.append([Paragraph('<para align=center leading=2><br/><br/></para>', normalStyle)]) # border_table = Table(one_box) # border_table.setStyle(border_style) # story.append(border_table) story.append(Paragraph('<para><br/></para>', normalStyle)) ununified_data = proj_data["ununified_output"] if len(ununified_data) > 0: if "multilib_output" in proj_data: story.append( Paragraph(title('III. False Consistent Libraries', 2), normalStyle)) else: story.append( Paragraph(title('II. False Consistent Libraries', 2), normalStyle)) ununified_jar_cnt = 0 for jar in ununified_data: ununified_jar_cnt += 1 # border_box one_box = [] # jar box one_box.append([ Paragraph( title( str(ununified_jar_cnt) + '. ' + ":".join(jar.split("__fdse__")[:2]), 3), normalStyle) ]) # Summery unify_version = ununified_data[jar]["module_versions"][0][1] # unify_version = None # for _module in ununified_data[jar]["module_versions"]: # unify_version = ununified_data[jar]["module_versions"][_module] # break desc_dict = ununified_data[jar]["desc_dict"] # add_summary(one_box,desc_dict, unify_version, ":".join(desc_dict["libraryname"].split("__fdse__")[:2]),str(len(set(ununified_data[jar]["module_versions"].keys()))),None,False) add_summary( one_box, desc_dict, str(len(ununified_data[jar]["module_versions"])), unify_version, ":".join(desc_dict["libraryname"].split("__fdse__")[:2]), None, False) # Module versions add_ununified_module_version(one_box, table_style, ununified_data[jar]) # trees tree_numbers = ununified_data[jar]["tree_numbers"] for tree_id in tree_numbers: if tree_id in ununified_data[jar][ "tree_actions"] and ununified_data[jar][ "tree_actions"][tree_id] == "no action": text = '<para fontSize=12 align=left textColor = "darkblue" leading=12>The false consistent library versions in Modules ' + str( tree_numbers[tree_id][0] ) + "-" + str( tree_numbers[tree_id][1] ) + ' can not reference a common property on a local POM file.</para>' one_box.append([Paragraph(text, normalStyle)]) one_box.append([ Paragraph('<para leading=6> <br/></para>', normalStyle) ]) continue content = "The false consistent library versions in Modules " + str( tree_numbers[tree_id][0]) + "-" + str( tree_numbers[tree_id][1]) text = '<para fontSize=12 align=left textColor = "darkblue" leading=10>' + content + '</para>' one_box.append([Paragraph(text, normalStyle)]) one_box.append([ Paragraph('<para leading=6> <br/></para>', normalStyle) ]) # image add_image(one_box, file, jar, tree_id) story = story + list(np.transpose(one_box)[0]) # border_table = Table(one_box) # border_table.setStyle(border_style) # story.append(border_table) story.append(Paragraph('<para><br/><br/></para>', normalStyle)) doc = SimpleDocTemplate(output_dir + proj_name + ".pdf") # doc = SimpleDocTemplate(output_dir + file.replace(".json", ".pdf"),pagesize=A0) # try: doc.build(story) # except: # error_proj.append(file.replace(".json", "")) # sys.stderr.write(file +" error\n") print(error_proj) write_json_format("datas/large_images.json", large_images)
def lib_jar_pair_reverse(): lib_jar_pair = read_json("datas/lib_jar_pair.txt") new_dict = {v: k for k, v in lib_jar_pair.items()} write_json("datas/lib_jar_pair_reverse.txt", new_dict)
def get_one_to_recommend(): well_proj = [ 369, 941, 3186, 85, 2834, 136, 1602, 2808, 3370, 4672, 2004, 4746, 633, 102, 707, 602, 2635, 217, 2450, 4041, 1120, 79, 5295, 1419, 3600, 5456, 1156, 1416, 692, 1556, 21, 2405, 83, 3005, 1633, 2871, 125, 190, 678, 3568, 1879, 3784, 3323, 3070, 1430, 2663, 526, 486, 2980, 4049, 1618, 1964, 3105, 40, 1792, 2123, 3244, 2590, 32, 1992, 3062, 1521, 5330, 1139, 4811, 2594, 1784, 3606, 1342, 3220, 508, 1866, 361, 119, 4161, 1356, 118, 1722, 2826, 1971, 1852, 3042, 1098, 649, 123, 1520, 221, 2307, 1738, 209, 660, 383, 4202, 3367, 2580, 2087, 5033, 5261, 1548, 2973, 1439, 338, 532, 236, 1544, 1468, 1202, 1665, 2923, 1423, 3238, 73, 2880, 3648, 1659, 534, 3757, 2081, 610, 1758, 1383, 1691, 5538, 2975, 1403, 2886, 1238, 4288, 407, 3822, 2075, 1452, 60, 477, 349, 816, 2558, 572, 5333, 1809, 1582, 4846, 1716, 700, 3411, 4247, 853, 3214, 638, 1806, 1819, 68, 2792, 2902, 148, 751, 3590, 3277, 1878, 133, 3333, 155, 1344, 30, 1765, 5412, 3584, 3088, 672, 1517, 1770, 1464, 1286, 1528, 709, 2209, 1107, 3068 ] # print(len(well_proj)) # noindex = [] json_data = read_json("datas/tongji_with_index.json") for project in json_data: # if int(project) in well_proj: # continue # if project != "4600": # continue print(project) project_obj = json_data[project] for lib in project_obj: trees = project_obj[lib] for tree_id in trees: recommend_version = None shared_new_jars = [] old_jars = set() subtree = trees[tree_id] temp = subtree[-1] if "recommend_version" not in temp: sys.stderr.write("no recommend_version: " + project + " " + lib + " " + tree_id + "\n") continue no_index = False have_random = False total_efforts = None for entry in subtree: if "index" in entry: # 存在index则计算 # have_index = True resolved_version = entry["resolved_version"] old_jars.add(resolved_version) new_jars = set(entry["index"].keys()) if "type" in new_jars: new_jars.remove("type") if entry["index"]["type"] == "no_api_use": have_random = True if len(new_jars) > 0: shared_new_jars.append(new_jars) else: no_index = True if no_index: # # todo: # if project not in noindex: # noindex.append(project) # print(project) # sys.stderr.write("no index : " + project + " : " + lib + " : " + tree_id) continue if len(shared_new_jars) > 0: total = set.intersection(*shared_new_jars) if len(total) > 0: total_efforts = {} for entry in subtree: for one_new_jar in total: one_new_jar_version = get_version_of_jar( one_new_jar) if one_new_jar_version is None: sys.stderr.write( "parse to version error : " + one_new_jar) sys.exit(0) if one_new_jar_version not in total_efforts: total_efforts[one_new_jar_version] = 0 if "type" in entry["index"] and entry["index"][ "type"] == "no_api_use": continue total_efforts[one_new_jar_version] += entry[ "index"][one_new_jar]["index_value"] min_effort = min(total_efforts.values()) min_effort_versions = [ k for k, v in total_efforts.items() if v == min_effort ] if len(min_effort_versions) > 1: recommend_version = get_max_version( min_effort_versions) if recommend_version is None: sys.stderr.write("uncomparable : " + project + " : " + lib + " : " + tree_id + "\n") # sys.exit(0) else: recommend_version = min_effort_versions[0] # todo: 没有一个公共的可推荐版本 else: sys.stderr.write("public new version is none: " + project + " : " + lib + " : " + tree_id + "\n") # sys.exit(0) else: # todo: 任意版本可推荐 从原版本中选择 if have_random: shared_new_jars = list(old_jars) recommend_version = get_max_version(shared_new_jars) if recommend_version is None: sys.stderr.write("uncomparable : " + project + " : " + lib + " : " + tree_id + "\n") # sys.exit(0) else: sys.stderr.write("no new jar : " + project + " : " + lib + " : " + tree_id + "\n") sys.exit(0) if recommend_version is not None: # if recommend_version.endswith(".jar"): # print(recommend_version) # recommend_version = get_version_of_jar(recommend_version) # if recommend_version is None: # sys.stderr.write("parse to version error") # sys.exit(0) recommend_obj = {"recommend_version": recommend_version} if total_efforts is not None: recommend_obj["total_efforts"] = total_efforts subtree.append(recommend_obj)
def get_new_version_index(): lib_jar_pair = read_json("datas/lib_jar_pair.txt") json_data = read_json("../tongjiresult-8-5.json") # print(len(json_data)) # return for project in json_data: # if project != "508": # continue # if project != "751": # continue if not os.path.exists("call_graph_preprocessed/" + project + ".txt"): # sys.stderr.write(project + "\n") continue # continue print(project) call_graph = read_json("call_graph_preprocessed/" + project + ".txt") proj_call_count = read_json( "E:/data/RQ1/api_call/total_with_count_preprocessed/" + project + ".txt") project_obj = json_data[project] for lib in project_obj: # print(lib) trees = project_obj[lib] # jar_result = {} for tree_id in trees: subtree = trees[tree_id] for entry in subtree: usePostion = entry["usePostion"] version = entry["resolved_version"] if not usePostion.endswith("/pom.xml"): _module = usePostion.replace("pom.xml", "") else: _module = usePostion.replace("/pom.xml", "") lib_key = version + "__fdse__" + lib # if lib_key in lib_jar_pair: # lib转换成jar名 jar_name = lib_jar_pair[lib_key] index_obj = None # 从call graph中寻找 find_in_call_graph = False for java_file in call_graph: # print(java_file) java_file_obj = call_graph[java_file] # module对应,jar包对应 if _module == java_file_obj[ "module"] and jar_name in java_file_obj: jar_obj = java_file_obj[jar_name] # todo: jar包下面的api为空,没有候选可推荐的jar包 if len(jar_obj) == 0: continue if not find_in_call_graph: index_obj = {} # 初始化 find_in_call_graph = True # # todo: jar包下面的api为空,没有候选可推荐的jar包 # if len(jar_obj) == 0: # if len(index_obj) == 0: # index_obj["type"] = 'no_new_jar' # new_dict = {"api_count": 0, "index_value": 0, "methods_in_modify_api": [], # "delete_api_count": 0, "modify_api_count": 0} # index_obj[jar_name] = new_dict for api in jar_obj: api_obj = jar_obj[api] # todo: 推荐的new jar为空,说明没有新版本可推 if len(api_obj) == 0: if len(index_obj) == 0: index_obj["type"] = 'no_new_jar' new_dict = { "api_count": 0, "index_value": 0, "methods_in_modify_api": [], "delete_api_count": 0, "modify_api_count": 0 } index_obj[jar_name] = new_dict # index_obj[jar_name] = {"api_count": 0,"index_value": 0,"methods_in_modify_api": [],"delete_api_count": 0,"modify_api_count": 0}, elif index_obj["type"] != 'no_new_jar': sys.stderr.write( 'no_new_jar type conflict : ' + java_file + "(" + jar_name + ")") sys.exit(0) for new_jar in api_obj: if new_jar not in index_obj: index_obj[new_jar] = {} # index_obj[new_jar]["api_count"] = 1 # index_obj[new_jar]["api_count"] = proj_call_count[java_file][api] index_obj[new_jar]["api_count"] = [ proj_call_count[java_file][api] ] index_obj[new_jar][ "delete_api_count"] = [] index_obj[new_jar][ "modify_api_count"] = [] index_obj[new_jar][ "methods_in_modify_api"] = [] index_obj[new_jar]["index_value"] = 0 else: # index_obj[new_jar]["api_count"] += 1 # index_obj[new_jar]["api_count"] += proj_call_count[java_file][api] index_obj[new_jar]["api_count"].append( proj_call_count[java_file][api]) new_jar_obj = api_obj[new_jar] # todo: api can't find if api not in new_jar_obj: if len(new_jar_obj) == 0: # index_obj[new_jar]["delete_api_count"] += 1 # index_obj[new_jar]["index_value"] += 1 # index_obj[new_jar]["delete_api_count"] += proj_call_count[java_file][api] index_obj[new_jar][ "delete_api_count"].append( proj_call_count[java_file] [api]) index_obj[new_jar][ "index_value"] += proj_call_count[ java_file][api] else: sys.stderr.write('KeyError : ' + java_file + "(" + jar_name + ":" + api + ")") sys.exit(0) elif new_jar_obj[api] == "jar not found" or new_jar_obj[api] == "class not found" or \ new_jar_obj[api] == "method not found" or new_jar_obj[api] == "jdk method": # index_obj[new_jar]["delete_api_count"] += 1 # index_obj[new_jar]["index_value"] += 1 # index_obj[new_jar]["delete_api_count"] += proj_call_count[java_file][api] index_obj[new_jar][ "delete_api_count"].append( proj_call_count[java_file] [api]) index_obj[new_jar][ "index_value"] += proj_call_count[ java_file][api] else: total = 0 delete = 0 modify = 0 add = 0 for method in new_jar_obj: # if method != api: total += 1 if new_jar_obj[method] == "jar not found" or new_jar_obj[ method] == "class not found" or new_jar_obj[ method] == "method not found" or new_jar_obj[ method] == "jdk method": delete += 1 elif new_jar_obj[ method] == "modify": modify += 1 elif new_jar_obj[method] == "add": add += 1 if delete != 0 or modify != 0 or add != 0: # index_obj[new_jar]["modify_api_count"] += 1 # index_obj[new_jar]["methods_in_modify_api"].append([total, delete, modify, add]) # index_obj[new_jar]["modify_api_count"] += proj_call_count[java_file][api] index_obj[new_jar][ "modify_api_count"].append( proj_call_count[java_file] [api]) for i in range( 0, proj_call_count[java_file] [api]): index_obj[new_jar][ "methods_in_modify_api"].append( [ total, delete, modify, add ]) if total > 0: # index_obj[new_jar]["index_value"] += (delete + modify + add) / total index_obj[new_jar][ "index_value"] += ( delete + modify + add ) / total * proj_call_count[ java_file][api] # todo : module 和 jar包对应不上,说明call graph中没有用到该第三方库的API,则index={} if not find_in_call_graph: index_obj = {} index_obj["type"] = 'no_api_use' entry["index"] = index_obj write_json_format("datas/tongji_with_index.json", json_data)