Ejemplo n.º 1
0
def letsStartSpectralAlgorithm():
    #hungarian.Hungarian_algo()
    #Form networkx representation of both graphs
    subprocess.call("rm -rf ../../Data/SpectralC", shell=True)
    INPUT_FILE_1 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_1_NAME )
    INPUT_FILE_2 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_2_NAME )
    G1 = Utils.convertNetToGefx(INPUT_FILE_1 + Constants.NET_FORMAT)
    G2 = Utils.convertNetToGefx(INPUT_FILE_2 + Constants.NET_FORMAT)
    num_alignment_pairs = 0
    #Run Spectral
    num_clusters = 4
    
    #Do initial clustering
    subgraphs1 = spectral_clustering.spectral_clustering(G1, Constants.INPUT_FILE_1_NAME, num_clusters)
    subgraphs2 = spectral_clustering.spectral_clustering(G2, Constants.INPUT_FILE_2_NAME, num_clusters)
         
    #Does SDF on those clusters. Does not need to do it on all clusters because some might already be fixed
    SDF_PATH = Utils.ComputeSpectralDistance(Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME, "SpectralC")
        
    #Find best Matching for our bipartite graph
    #Compute cluster parameters
    cluster_edge_weight_matrix = compute_cluster_param.find_cluster_spectraledges_SDF(SDF_PATH, num_clusters, num_clusters)
    best_cluster_pairs = hungarian.Hungarian_algo(cluster_edge_weight_matrix)
    for cluster1,cluster2 in best_cluster_pairs:
            newG1 = subgraphs1[cluster1]
            newG2 = subgraphs2[cluster2]
            if len(newG1.nodes()) >= 900 and  len(newG2.nodes()) >= 900:
                #Write these two graphs gefx files
                #nx.write_gexf(newG1, "../../Data/SpectralC/HC/A.gexf")
                #nx.write_gexf(newG2, "../../Data/SpectralC/HC/B.gexf")
                #Cluster these further
                num_alignment_pairs = heirarchical_clustering.heirarchical_clustering_spec(newG1, newG2, num_alignment_pairs,SDF_PATH,num_clusters,
                                                                                      ways_type,dataset_type, family_type )    
            else:
                #Simply generate alignment file for graph
                #Generate alignment score of our cluster graphs
                
                heirarchical_dir = "../../Data/SpectralC/IntermC"
                if os.path.exists(heirarchical_dir):
                    subprocess.call("rm -rf "+heirarchical_dir, shell=True)
                os.makedirs(heirarchical_dir)
                
                subgraphpath1 = os.path.join(heirarchical_dir,"A_"+str(num_alignment_pairs)+".gexf")
                subgraphpath2 = os.path.join(heirarchical_dir,"B_"+str(num_alignment_pairs)+".gexf")
                nx.write_gexf(newG1, subgraphpath1)
                nx.write_gexf(newG2, subgraphpath2)
                generate_alignment.generate_spectralcluster_alignment_score(subgraphpath1, subgraphpath2, "SpectralC", "SDF",
                             ways_type,dataset_type, family_type, num_clusters, num_alignment_pairs)
                num_alignment_pairs = num_alignment_pairs + 1
    #Give final output
    generate_alignment.generateSpectralFinalScore(INPUT_FILE_1, INPUT_FILE_2,ways_type,dataset_type, family_type, num_clusters )
def generatekmeansFinalScore(networkPath1, networkPath2,ways_type,dataset_type, family_type, hyper_param ):
    SCORE_DIR = os.path.join("../../Data", "KmeansH", Constants.FINAL_SCORE_DIR, "SDF","A_B")
    #Check whether gexf file exists or not
    graph1Path = networkPath1 + Constants.GEXF_FORMAT
    graph2Path = networkPath2 + Constants.GEXF_FORMAT
    if os.path.isfile(graph1Path) is False:
        Utils.convertNetToGefx(networkPath1 + Constants.NET_FORMAT)
    if os.path.isfile(graph2Path) is False:
        Utils.convertNetToGefx(networkPath2 + Constants.NET_FORMAT)
    RESULT_LOG_DIR =  os.path.join("../..", Constants.FINAL_RESULT, "KmeansH" , ways_type+"_"+dataset_type+"_"+family_type+"_"+
                                   "_Par_" +str(hyper_param)) 
    #Generate Prof score
    RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.PROF+"_"+Constants.RESULT_LOG_FILE)
    cfg_file = Utils.generateCfgFile (graph1Path, graph2Path, dumpDistances=False, dumpSignatures=False,
                     sigs1=None, sigs2=None)
    cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE
    os.system(cmd)
    subprocess.call("rm *sdf "+SCORE_DIR, shell=True)
    subprocess.call("rm *gz "+SCORE_DIR, shell=True)
    subprocess.call("rm *af "+SCORE_DIR, shell=True)
    
    #Generate our Final score
    #Concatenate various af
    final_text = ""
    FINAL_SCORE_DIR = os.path.join(SCORE_DIR,Constants.FINAL_RESULT)
    if not os.path.exists(FINAL_SCORE_DIR):
        os.makedirs(FINAL_SCORE_DIR)
    for file in os.listdir(SCORE_DIR):
        if file.endswith(".af"):
            read_file = open(os.path.join(SCORE_DIR,file))
            final_text = final_text + read_file.read()
    final_align_file_path = os.path.join(FINAL_SCORE_DIR,Constants.RESULT_FILE)
    write_file = open(final_align_file_path, 'w+')
    write_file.write(final_text)
    RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.OURS+"_"+Constants.RESULT_LOG_FILE)           
    Utils.getEdgeCorrectness(graph1Path, graph2Path, final_align_file_path, RESULT_LOG_FILE)   
    subprocess.call("rm *sdf "+SCORE_DIR, shell=True)
    subprocess.call("rm *gz "+SCORE_DIR, shell=True)
    subprocess.call("rm *af "+SCORE_DIR, shell=True)
    
Ejemplo n.º 3
0
def letsStartKMeans(ways_type,dataset_type, family_type):
    #hungarian.Hungarian_algo()
    #Form networkx representation of both graphs
    subprocess.call("rm -rf ../../Data/KMeans", shell=True)
    INPUT_FILE_1 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_1_NAME )
    INPUT_FILE_2 = os.path.join(Constants.NAPA_PATH, ways_type, dataset_type,family_type, Constants.INPUT_FILE_2_NAME )
    G1 = Utils.convertNetToGefx(INPUT_FILE_1 + Constants.NET_FORMAT)
    G2 = Utils.convertNetToGefx(INPUT_FILE_2 + Constants.NET_FORMAT)

    #print("**************Run kmeans*****************")
    #Run kmeans
    for num_clusters in [8,20]:
        kmeans_cluster.kmeans_cluster(G1, Constants.INPUT_FILE_1_NAME, num_clusters)
        kmeans_cluster.kmeans_cluster(G2, Constants.INPUT_FILE_2_NAME, num_clusters)
        SDF_PATH = Utils.ComputeSpectralDistance(Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME, "Kmeans")
    
        #Find best Matching for our bipartite graph
        #Compute cluster parameters
        cluster_edge_weight_matrix = compute_cluster_param.find_cluster_edges_SDF(SDF_PATH, num_clusters, num_clusters)
        best_cluster_pairs = hungarian.Hungarian_algo(cluster_edge_weight_matrix)
        
        #Generate alignment score of our graphs
        generate_alignment.generate_alignment_score(best_cluster_pairs, "Kmeans", "SDF", INPUT_FILE_1,INPUT_FILE_2, Constants.INPUT_FILE_1_NAME, Constants.INPUT_FILE_2_NAME,
                                                    ways_type,dataset_type, family_type, num_clusters)
Ejemplo n.º 4
0
def getMCLFromFile(infile, outdir,  mcl_param, clusterfile = './mcl.DAT'):
  cmd = Constants.MCL.format(infile, clusterfile, mcl_param)
  os.system(cmd)
  
  maingraph = Utils.convertNetToGefx(infile)
  graphname = os.path.basename(infile).split('.')[0]
  outdirname = os.path.join(outdir, 'MCL', graphname)
  if not os.path.exists(outdirname) :
    os.makedirs(outdirname)
  
  clusterindex = 0
  with open(clusterfile) as f:
    for line in f:
      cluster_nodes = line.split()
      cluster_graph = maingraph.subgraph(cluster_nodes)
      pathtoclustergraph = os.path.join(outdirname, graphname + str(clusterindex) + Constants.GEXF_FORMAT)
      nx.write_gexf(cluster_graph, pathtoclustergraph)
      clusterindex = clusterindex + 1
  return outdirname
def generate_alignment_score(best_cluster_pairs,clusterAlgoName, distAlgoName, networkPath1, networkPath2, networkName1, networkName2,
                             ways_type,dataset_type, family_type, hyper_param):
    
    SCORE_DIR = os.path.join("../../Data", clusterAlgoName, Constants.FINAL_SCORE_DIR, distAlgoName, networkName1+"_"+networkName2)
    if not os.path.exists(SCORE_DIR):
        os.makedirs(SCORE_DIR)
    RESULT_LOG_DIR =  os.path.join("../..", Constants.FINAL_RESULT, clusterAlgoName , ways_type+"_"+dataset_type+"_"+family_type+"_"+
                                   "_Par_" +str(hyper_param)) 
    if not os.path.exists(RESULT_LOG_DIR):
        os.makedirs(RESULT_LOG_DIR)
    for cluster1,cluster2 in best_cluster_pairs:
        RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, str(cluster1)+"_"+str(cluster2)+"_"+Constants.RESULT_LOG_FILE)
        subgraph1Path = os.path.join("../../Data", clusterAlgoName, networkName1, networkName1+str(cluster1) + Constants.GEXF_FORMAT)
        subgraph2Path = os.path.join("../../Data", clusterAlgoName, networkName2, networkName2+str(cluster2)  +  Constants.GEXF_FORMAT)
        print subgraph1Path, '-'*10
        print subgraph2Path, '-'*10
        cfg_file = Utils.generateCfgFile (subgraph1Path, subgraph2Path, dumpDistances=False, dumpSignatures=False,
                     sigs1=None, sigs2=None)
        #subprocess.call([Constants.GHOST_PATH, "-c", cfg_file, "|","tee",RESULT_LOG_FILE])
        cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE
        os.system(cmd)
        subprocess.call("mv *sdf "+SCORE_DIR, shell=True)
        subprocess.call("mv *gz "+SCORE_DIR, shell=True)
        subprocess.call("mv *af "+SCORE_DIR, shell=True)
        pass
    
    #Concatenate different cluster mapping into one
    FINAL_SCORE_DIR = os.path.join(SCORE_DIR,Constants.FINAL_RESULT)
    if not os.path.exists(FINAL_SCORE_DIR):
        os.makedirs(FINAL_SCORE_DIR)
    final_text = ""
    for file in os.listdir(SCORE_DIR):
        if file.endswith(".af"):
            read_file = open(os.path.join(SCORE_DIR,file))
            final_text = final_text + read_file.read()
    final_align_file_path = os.path.join(FINAL_SCORE_DIR,Constants.RESULT_FILE)
    write_file = open(final_align_file_path, 'w+')
    write_file.write(final_text)
    
    #Check whether gexf file exists or not
    graph1Path = networkPath1 + Constants.GEXF_FORMAT
    graph2Path = networkPath2 + Constants.GEXF_FORMAT
    if os.path.isfile(graph1Path) is False:
        Utils.convertNetToGefx(networkPath1 + Constants.NET_FORMAT)
    if os.path.isfile(graph2Path) is False:
        Utils.convertNetToGefx(networkPath2 + Constants.NET_FORMAT)
        
    #Generate Prof score
    RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.PROF+"_"+Constants.RESULT_LOG_FILE)
    cfg_file = Utils.generateCfgFile (graph1Path, graph2Path, dumpDistances=False, dumpSignatures=False,
                     sigs1=None, sigs2=None)
    cmd = Constants.GHOST_PATH + " -c "+ cfg_file + " | tee "+ RESULT_LOG_FILE
    os.system(cmd)
    subprocess.call("rm *sdf "+SCORE_DIR, shell=True)
    subprocess.call("rm *gz "+SCORE_DIR, shell=True)
    subprocess.call("rm *af "+SCORE_DIR, shell=True)
    
    #Generate our Final score
    RESULT_LOG_FILE = os.path.join(RESULT_LOG_DIR, Constants.OURS+"_"+Constants.RESULT_LOG_FILE)           
    Utils.getEdgeCorrectness(graph1Path, graph2Path, final_align_file_path, RESULT_LOG_FILE)   
    subprocess.call("rm *sdf "+SCORE_DIR, shell=True)
    subprocess.call("rm *gz "+SCORE_DIR, shell=True)
    subprocess.call("rm *af "+SCORE_DIR, shell=True)