def letsStartSpectralAlgorithm(): #hungarian.Hungarian_algo() #Form networkx representation of both graphs subprocess.call("rm -rf ../../Data/SpectralC", shell=True) INPUT_FILE_1 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_1_NAME ) INPUT_FILE_2 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_2_NAME ) G1 = Utils.convertNetToGefx(INPUT_FILE_1 + Constants.NET_FORMAT) G2 = Utils.convertNetToGefx(INPUT_FILE_2 + Constants.NET_FORMAT) num_alignment_pairs = 0 #Run Spectral num_clusters = 4 #Do initial clustering subgraphs1 = spectral_clustering.spectral_clustering(G1, Constants.INPUT_FILE_1_NAME, num_clusters) subgraphs2 = spectral_clustering.spectral_clustering(G2, Constants.INPUT_FILE_2_NAME, num_clusters) #Does SDF on those clusters. Does not need to do it on all clusters because some might already be fixed SDF_PATH = Utils.ComputeSpectralDistance(Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME, "SpectralC") #Find best Matching for our bipartite graph #Compute cluster parameters cluster_edge_weight_matrix = compute_cluster_param.find_cluster_spectraledges_SDF(SDF_PATH, num_clusters, num_clusters) best_cluster_pairs = hungarian.Hungarian_algo(cluster_edge_weight_matrix) for cluster1,cluster2 in best_cluster_pairs: newG1 = subgraphs1[cluster1] newG2 = subgraphs2[cluster2] if len(newG1.nodes()) >= 900 and len(newG2.nodes()) >= 900: #Write these two graphs gefx files #nx.write_gexf(newG1, "../../Data/SpectralC/HC/A.gexf") #nx.write_gexf(newG2, "../../Data/SpectralC/HC/B.gexf") #Cluster these further num_alignment_pairs = heirarchical_clustering.heirarchical_clustering_spec(newG1, newG2, num_alignment_pairs,SDF_PATH,num_clusters, ways_type,dataset_type, family_type ) else: #Simply generate alignment file for graph #Generate alignment score of our cluster graphs heirarchical_dir = "../../Data/SpectralC/IntermC" if os.path.exists(heirarchical_dir): subprocess.call("rm -rf "+heirarchical_dir, shell=True) os.makedirs(heirarchical_dir) subgraphpath1 = os.path.join(heirarchical_dir,"A_"+str(num_alignment_pairs)+".gexf") subgraphpath2 = os.path.join(heirarchical_dir,"B_"+str(num_alignment_pairs)+".gexf") nx.write_gexf(newG1, subgraphpath1) nx.write_gexf(newG2, subgraphpath2) generate_alignment.generate_spectralcluster_alignment_score(subgraphpath1, subgraphpath2, "SpectralC", "SDF", ways_type,dataset_type, family_type, num_clusters, num_alignment_pairs) num_alignment_pairs = num_alignment_pairs + 1 #Give final output generate_alignment.generateSpectralFinalScore(INPUT_FILE_1, INPUT_FILE_2,ways_type,dataset_type, family_type, num_clusters )
def generatekmeansFinalScore(networkPath1, networkPath2,ways_type,dataset_type, family_type, hyper_param ): SCORE_DIR = os.path.join("../../Data", "KmeansH", Constants.FINAL_SCORE_DIR, "SDF","A_B") #Check whether gexf file exists or not graph1Path = networkPath1 + Constants.GEXF_FORMAT graph2Path = networkPath2 + Constants.GEXF_FORMAT if os.path.isfile(graph1Path) is False: Utils.convertNetToGefx(networkPath1 + Constants.NET_FORMAT) if os.path.isfile(graph2Path) is False: Utils.convertNetToGefx(networkPath2 + Constants.NET_FORMAT) RESULT_LOG_DIR = os.path.join("../..", Constants.FINAL_RESULT, "KmeansH" , ways_type+"_"+dataset_type+"_"+family_type+"_"+ "_Par_" +str(hyper_param)) #Generate Prof score RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.PROF+"_"+Constants.RESULT_LOG_FILE) cfg_file = Utils.generateCfgFile (graph1Path, graph2Path, dumpDistances=False, dumpSignatures=False, sigs1=None, sigs2=None) cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE os.system(cmd) subprocess.call("rm *sdf "+SCORE_DIR, shell=True) subprocess.call("rm *gz "+SCORE_DIR, shell=True) subprocess.call("rm *af "+SCORE_DIR, shell=True) #Generate our Final score #Concatenate various af final_text = "" FINAL_SCORE_DIR = os.path.join(SCORE_DIR,Constants.FINAL_RESULT) if not os.path.exists(FINAL_SCORE_DIR): os.makedirs(FINAL_SCORE_DIR) for file in os.listdir(SCORE_DIR): if file.endswith(".af"): read_file = open(os.path.join(SCORE_DIR,file)) final_text = final_text + read_file.read() final_align_file_path = os.path.join(FINAL_SCORE_DIR,Constants.RESULT_FILE) write_file = open(final_align_file_path, 'w+') write_file.write(final_text) RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.OURS+"_"+Constants.RESULT_LOG_FILE) Utils.getEdgeCorrectness(graph1Path, graph2Path, final_align_file_path, RESULT_LOG_FILE) subprocess.call("rm *sdf "+SCORE_DIR, shell=True) subprocess.call("rm *gz "+SCORE_DIR, shell=True) subprocess.call("rm *af "+SCORE_DIR, shell=True)
def letsStartKMeans(ways_type,dataset_type, family_type): #hungarian.Hungarian_algo() #Form networkx representation of both graphs subprocess.call("rm -rf ../../Data/KMeans", shell=True) INPUT_FILE_1 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_1_NAME ) INPUT_FILE_2 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_2_NAME ) G1 = Utils.convertNetToGefx(INPUT_FILE_1 + Constants.NET_FORMAT) G2 = Utils.convertNetToGefx(INPUT_FILE_2 + Constants.NET_FORMAT) #print("**************Run kmeans*****************") #Run kmeans for num_clusters in [8,20]: kmeans_cluster.kmeans_cluster(G1, Constants.INPUT_FILE_1_NAME, num_clusters) kmeans_cluster.kmeans_cluster(G2, Constants.INPUT_FILE_2_NAME, num_clusters) SDF_PATH = Utils.ComputeSpectralDistance(Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME, "Kmeans") #Find best Matching for our bipartite graph #Compute cluster parameters cluster_edge_weight_matrix = compute_cluster_param.find_cluster_edges_SDF(SDF_PATH, num_clusters, num_clusters) best_cluster_pairs = hungarian.Hungarian_algo(cluster_edge_weight_matrix) #Generate alignment score of our graphs generate_alignment.generate_alignment_score(best_cluster_pairs, "Kmeans", "SDF", INPUT_FILE_1,INPUT_FILE_2, Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME, ways_type,dataset_type, family_type, num_clusters)
def getMCLFromFile(infile, outdir, mcl_param, clusterfile = './mcl.DAT'): cmd = Constants.MCL.format(infile, clusterfile, mcl_param) os.system(cmd) maingraph = Utils.convertNetToGefx(infile) graphname = os.path.basename(infile).split('.')[0] outdirname = os.path.join(outdir, 'MCL', graphname) if not os.path.exists(outdirname) : os.makedirs(outdirname) clusterindex = 0 with open(clusterfile) as f: for line in f: cluster_nodes = line.split() cluster_graph = maingraph.subgraph(cluster_nodes) pathtoclustergraph = os.path.join(outdirname, graphname + str(clusterindex) + Constants.GEXF_FORMAT) nx.write_gexf(cluster_graph, pathtoclustergraph) clusterindex = clusterindex + 1 return outdirname
def generate_alignment_score(best_cluster_pairs,clusterAlgoName, distAlgoName, networkPath1, networkPath2, networkName1, networkName2, ways_type,dataset_type, family_type, hyper_param): SCORE_DIR = os.path.join("../../Data", clusterAlgoName, Constants.FINAL_SCORE_DIR, distAlgoName, networkName1+"_"+networkName2) if not os.path.exists(SCORE_DIR): os.makedirs(SCORE_DIR) RESULT_LOG_DIR = os.path.join("../..", Constants.FINAL_RESULT, clusterAlgoName , ways_type+"_"+dataset_type+"_"+family_type+"_"+ "_Par_" +str(hyper_param)) if not os.path.exists(RESULT_LOG_DIR): os.makedirs(RESULT_LOG_DIR) for cluster1,cluster2 in best_cluster_pairs: RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, str(cluster1)+"_"+str(cluster2)+"_"+Constants.RESULT_LOG_FILE) subgraph1Path = os.path.join("../../Data", clusterAlgoName, networkName1, networkName1+str(cluster1) + Constants.GEXF_FORMAT) subgraph2Path = os.path.join("../../Data", clusterAlgoName, networkName2, networkName2+str(cluster2) + Constants.GEXF_FORMAT) print subgraph1Path, '-'*10 print subgraph2Path, '-'*10 cfg_file = Utils.generateCfgFile (subgraph1Path, subgraph2Path, dumpDistances=False, dumpSignatures=False, sigs1=None, sigs2=None) #subprocess.call([Constants.GHOST_PATH, "-c", cfg_file, "|","tee",RESULT_LOG_FILE]) cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE os.system(cmd) subprocess.call("mv *sdf "+SCORE_DIR, shell=True) subprocess.call("mv *gz "+SCORE_DIR, shell=True) subprocess.call("mv *af "+SCORE_DIR, shell=True) pass #Concatenate different cluster mapping into one FINAL_SCORE_DIR = os.path.join(SCORE_DIR,Constants.FINAL_RESULT) if not os.path.exists(FINAL_SCORE_DIR): os.makedirs(FINAL_SCORE_DIR) final_text = "" for file in os.listdir(SCORE_DIR): if file.endswith(".af"): read_file = open(os.path.join(SCORE_DIR,file)) final_text = final_text + read_file.read() final_align_file_path = os.path.join(FINAL_SCORE_DIR,Constants.RESULT_FILE) write_file = open(final_align_file_path, 'w+') write_file.write(final_text) #Check whether gexf file exists or not graph1Path = networkPath1 + Constants.GEXF_FORMAT graph2Path = networkPath2 + Constants.GEXF_FORMAT if os.path.isfile(graph1Path) is False: Utils.convertNetToGefx(networkPath1 + Constants.NET_FORMAT) if os.path.isfile(graph2Path) is False: Utils.convertNetToGefx(networkPath2 + Constants.NET_FORMAT) #Generate Prof score RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.PROF+"_"+Constants.RESULT_LOG_FILE) cfg_file = Utils.generateCfgFile (graph1Path, graph2Path, dumpDistances=False, dumpSignatures=False, sigs1=None, sigs2=None) cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE os.system(cmd) subprocess.call("rm *sdf "+SCORE_DIR, shell=True) subprocess.call("rm *gz "+SCORE_DIR, shell=True) subprocess.call("rm *af "+SCORE_DIR, shell=True) #Generate our Final score RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.OURS+"_"+Constants.RESULT_LOG_FILE) Utils.getEdgeCorrectness(graph1Path, graph2Path, final_align_file_path, RESULT_LOG_FILE) subprocess.call("rm *sdf "+SCORE_DIR, shell=True) subprocess.call("rm *gz "+SCORE_DIR, shell=True) subprocess.call("rm *af "+SCORE_DIR, shell=True)