Ejemplo n.º 1
0
    def __set_necessary_info(self):
        v_degree = list()
        v_index = list()
        v_partation = list()
        memberships = self.__partitions._membership

        for index in range(len(memberships)):
            if memberships[index] == self.__community_index_0:
                v_index.append(index)
                v_degree.append(self.__graph.degree(index))
                v_partation.append(0)
            if memberships[index] == self.__community_index_1:
                v_index.append(index)
                v_degree.append(self.__graph.degree(index))
                v_partation.append(1)

        self.__degree_list = v_degree
        self.__vertex_list = v_index
        self.__vertex_part = v_partation

        # 最终合并的社区编号为self.__community_index_1
        partation_expected = VertexClustering(
            graph=self.__partitions._graph,
            membership=list(self.__partitions._membership))
        for i in range(len(partation_expected._membership)):
            if partation_expected._membership[i] == self.__community_index_0:
                partation_expected._membership[i] = self.__community_index_1
        for i in range(len(partation_expected._membership)):
            if partation_expected._membership[
                    i] == partation_expected._len - 1:
                partation_expected._membership[i] = self.__community_index_0
        partation_expected._len -= 1
        # print(partation_expected._membership)
        self.__partitions_expected = partation_expected
Ejemplo n.º 2
0
    def __init__(self,
                 graph,
                 value=None,
                 cut=None,
                 partition=None,
                 partition2=None):
        """Initializes the cut.

        This should not be called directly, everything is taken care of by
        the functions that return cuts.
        """
        # Input validation
        if partition is None or cut is None:
            raise ValueError("partition and cut must be given")

        # Set up a membership vector, initialize parent class
        membership = [1] * graph.vcount()
        for vid in partition:
            membership[vid] = 0
        VertexClustering.__init__(self, graph, membership)

        if value is None:
            # Value of the cut not given, count the number of edges
            value = len(cut)
        self._value = float(value)

        self._partition = sorted(partition)
        self._cut = cut
    def _desc(self, i):
        parts = VertexClustering(self.graph, membership=self.initial_membership)
        learners = self.learning.learners

        vcount = self.graph.vcount()

        actions = [learner.action for learner in learners]
        cls_actions = Counter(actions)
        dis_actions = [round(i / vcount * 100, 2) for i in cls_actions.values()]
        dis_actions.sort(reverse=True)
        clu_actions = VertexClustering(self.graph, membership=actions)

        l_conformity = count_conformity(parts, actions, self.available_action)
        g_conformity = count_security_index(self.graph, clu_actions)
        l_diversity = count_diversity(parts, actions, len(cls_actions))
        g_diversity = count_diversity(parts, actions, self.available_action)
        avg_payoff = sum(self.learning.payoff) / len(self.learning.payoff)
        modularity = clu_actions.modularity

        result = dict()
        result['index'] = data_format(i, width=6)
        result['exist_action'] = data_format(len(cls_actions), width=4)
        result['ldiversity'] = data_format(l_diversity)
        result['gdiversity'] = data_format(g_diversity)
        result['lconformity'] = data_format(l_conformity)
        result['gconformity'] = data_format(g_conformity)
        result['avg_payoff'] = data_format(avg_payoff)
        result['modularity'] = data_format(modularity)
        result['action_dis'] = dis_actions

        logger.info(json.dumps(result))
Ejemplo n.º 4
0
def desc(graph: Graph):
    parts = VertexClustering(graph, [int(i) for i in graph.vs['part']])
    print(graph.vcount(), graph.ecount())
    print(f"Part Num: {len(parts)}")
    print(f"Part Size: {[len(part) for part in parts]}")
    print(f"Modularity: {parts.modularity}")
    in_edges = 0
    for subgraph in parts.subgraphs():
        in_edges += subgraph.ecount()

    print(f"fraction: {in_edges / graph.ecount()}")
    print("Degree Distribution: ")
    print(graph.degree_distribution())
Ejemplo n.º 5
0
def mergePartitions(clustering, g):
  modularity = clustering.modularity
  newMembership = clustering.membership
  clusterIndices = list(set(clustering.membership))
  for i in range(0,len(clusterIndices) - 1):
    for j in range(i+1, len(clusterIndices)):
      #Check if the ith and jth clusters are neighbors. i.e. they have vertices that share an edge
      cxi = clusterIndices[i]
      cxj = clusterIndices[j]
      if isNeighboringCluster(clustering[cxi], clustering[cxj], g):
        #Merge clusters and see if the modularity value increases
        m = map(lambda x: cxi if x==cxj else x, newMembership) #Replaces cluster index j with i
        newClustering = igraph.clustering.VertexClustering(g, membership=m)
        if newClustering.modularity <= modularity:
          continue
        modularity = newClustering.modularity
        newMembership = m
  if id(clustering.membership) == id(newMembership):
    #The modularity didn't increase. So return the original clustering.
    return clustering

  # Renumber cluster ids so that cluster ids are continuous from 0.
  clusterIdMap = {}
  currentId = -1
  renumberedMembership = []
  for cid in newMembership:
    if cid not in clusterIdMap:
      currentId += 1
      clusterIdMap[cid] = currentId

    renumberedMembership.append(clusterIdMap[cid])

  return VertexClustering(g, membership=renumberedMembership)
Ejemplo n.º 6
0
def id_communities(g, layout_list, eps=0.42, min_samples=10):
    layout_distance = spatial.distance.squareform(spatial.distance.pdist(layout_list))
    layout_similarity = 1 - (layout_distance / np.max(layout_distance))
    community_idx_list = DBSCAN().fit(layout_similarity, eps=eps, min_samples=min_samples).labels_
    if -1 in community_idx_list:
        community_idx_list = list(np.array(community_idx_list) + 1)
    community_idx_list = [int(x) for x in community_idx_list]
    vertex_clustering = VertexClustering(g, community_idx_list)
    return g, community_idx_list, vertex_clustering
Ejemplo n.º 7
0
 def update_membership(self, membership):
     """
     update membership manual
     :param membership: list[int]
     :return:
     """
     self.membership = membership
     self.parts = VertexClustering(self.graph, self.membership)
     self._set_sorted_part_degree()
Ejemplo n.º 8
0
def fast_resistance(graph):
    pyr = PyResistance.from_igraph_Graph(graph)
    parts, _ = pyr.apply_method()
    membership = [0] * graph.vcount()
    for num, part in enumerate(parts):
        for node in part:
            membership[node] = num

    return VertexClustering(graph, membership=membership)
Ejemplo n.º 9
0
    def __set_necessary_info(self):
        v_degree = list()
        v_index = list()
        v_partation = list()
        memberships = self.__partitions._membership
        for index in range(len(memberships)):
            if memberships[index] == self.__community_index_0:
                v_index.append(index)
                v_degree.append(self.__graph.degree(index))
                v_partation.append(0)
            if memberships[index] == self.__community_index_1:
                v_index.append(index)
                v_degree.append(self.__graph.degree(index))
                v_partation.append(1)

        self.__degree_list = v_degree
        self.__vertex_list = v_index
        self.__vertex_part = v_partation

        partation_expected = VertexClustering(
            graph=self.__partitions._graph,
            membership=list(self.__partitions._membership))
        for i in range(len(partation_expected._membership)):
            if partation_expected._membership[i] == self.__community_index_0:
                partation_expected._membership[i] = self.__community_index_1
        for i in range(len(partation_expected._membership)):
            if partation_expected._membership[
                    i] == partation_expected._len - 1:
                partation_expected._membership[i] = self.__community_index_0
        partation_expected._len -= 1

        self.__partitions_expected = partation_expected

        # for i in range(0, 11):
        #    print(self.__partitions.subgraph(i).vcount())
        # for i in range(0, 10):
        #    print(self.__partitions_expected.subgraph(i).vcount())

        for index, part in enumerate(self.__partitions_expected):
            subgraph: Graph = self.__partitions_expected.subgraph(index)
            self.__partitions_expected_degree.append(2 * subgraph.ecount())
            self.__partitions_expected_volume.append(
                sum(self.__graph.degree(part)))
Ejemplo n.º 10
0
    def __init__(self, graph, membership=None):
        self.graph = graph
        self.membership = membership if membership else [
            int(i) for i in graph.vs['part']
        ]
        self.parts = VertexClustering(self.graph, self.membership)

        self.neighbors = None
        self.sorted_parts_degree = None

        self._preprocess()
Ejemplo n.º 11
0
    def __init__(self, graph, value=None, cut=None, partition=None,
            partition2=None):
        """Initializes the cut.

        This should not be called directly, everything is taken care of by
        the functions that return cuts.
        """
        # Input validation
        if partition is None or cut is None:
            raise ValueError("partition and cut must be given")

        # Set up a membership vector, initialize parent class
        membership = [1] * graph.vcount()
        for vid in partition:
            membership[vid] = 0
        VertexClustering.__init__(self, graph, membership)

        if value is None:
            # Value of the cut not given, count the number of edges
            value = len(cut)
        self._value = float(value)

        self._partition = sorted(partition)
        self._cut = cut
Ejemplo n.º 12
0
def id_communities(g, coordinates):
    layout_distance = spatial.distance.squareform(
        spatial.distance.pdist(coordinates))
    max_distance = np.max(layout_distance)
    print 'max_distance: %s' % max_distance
    layout_distance = layout_distance / max_distance
    mean_distance = np.mean(layout_distance)
    print 'mean_distance: %s' % mean_distance
    eps = mean_distance / .85
    min_samples = 4.5 / mean_distance
    print 'eps: %s' % eps
    print 'min_samples: %s' % min_samples

    layout_similarity = 1 - layout_distance
    community_idx_list = DBSCAN().fit(layout_similarity,
                                      eps=eps,
                                      min_samples=min_samples).labels_
    if -1 in community_idx_list:
        print '-1 in community_idx_list'
        community_idx_list = list(np.array(community_idx_list) + 1)
    community_idx_list = [int(x) for x in community_idx_list]
    print set(community_idx_list)
    return VertexClustering(g, community_idx_list)
Ejemplo n.º 13
0
def subgraphMining(g, pattern, ig, clustering, frequentPatterns):
  clustering = mergePartitions(clustering, ig)
  gMeasure = len(clustering)
  if gMeasure < 2: #TODO: Remove hadcoded threshold
    return

  frequentPatterns.add(pattern)

  #Get child patterns
  for cp in getChildPatterns(g, pattern):
    if cp in PATTERN_INSTANCES:
      continue
    childInstanceParentMap = getPatternInstances(g, cp, pattern)
    allInstances = set(childInstanceParentMap.keys())
    PATTERN_INSTANCES[cp] = allInstances

    #Create instance graph of child pattern
    childIg = createInstanceGraphForPattern(cp)

    #Assign child instances to clusters based on parent clusters
    clusterId = 0
    membership = []
    parentClusterIds = {}
    for cluster in clustering: #This is the clustering of the parent instance graph.
      for vertexIndex in cluster:
        parentInstance = ig.vs[vertexIndex]["instance"]
        parentClusterIds[parentInstance] = clusterId;
      clusterId += 1

    for childVertex in childIg.vs:
      childInstance = childVertex["instance"] 
      membership.append(parentClusterIds[childInstanceParentMap[childInstance]])

    childIgClustering = VertexClustering(childIg, membership=membership)

    subgraphMining(g, cp, childIg, childIgClustering, frequentPatterns)
  return
Ejemplo n.º 14
0
def copy_partition(partition) -> VertexClustering:
    return VertexClustering(graph=partition._graph,
                            membership=list(partition._membership))
Ejemplo n.º 15
0
def run_clustering_on_graph(topic_id='',method='fast_greedy',experiment=''):
    if not topic_id:topic_required=no_globals
    g,query_sentence,sims,query_node,query_index=load_topic_matrix(topic_id)
    #method='betweenness'   #talking to much time, we will skip it
    #method='walktrap'
    #method='leading_eigenvector'
    #g=filter_graph(g)
    print ("EXPERIMENT: "+str(experiment))
    
    if 'do7_sum_nodes' in experiment:
        #after doing the graph by the cos sim matrix, rank the sentences according to total score.
        clusters=[]
        cluster_weights=[]
        return g,clusters,cluster_weights,query_sentence,query_index

    elif 'do6' in experiment:
        print ("Calculate random walk scores...")
        g_random_walk_scores=calc_random_walk_with_restart(g, query_index)

        print ("Get edge distributions")
        query1_cosim_values=[]
        query2_cosim_values=[]
        cosim_values=[]
        ws_values=[]
        for i,e in enumerate(g.es): #FOR EACH EDGE
            #edge weight#  weight=e['weight']

            # Get nodes of edge
            vertex1=g.vs[e.tuple[0]] #node1 idx
            vertex2=g.vs[e.tuple[1]] #node2 idx
            
            # Get node walk scores
            walk_score1,vertex1,rank1=g_random_walk_scores[vertex1['s_idx']]
            walk_score2,vertex2,rank2=g_random_walk_scores[vertex2['s_idx']]
            
            #Get random walk score average
            rws_avg=(walk_score1+walk_score2)/2
            
            #Get edge cos sim score (current weight)
            edge_cosim=sims[vertex1['s_idx']][vertex2['s_idx']]
            
            query_cosim1=sims[query_index][vertex1['s_idx']] #For do6_2
            query_cosim2=sims[query_index][vertex2['s_idx']] #For do6_2
            
            #Store values for normalization
            query1_cosim_values+=[query_cosim1] #For do6_2
            query2_cosim_values+=[query_cosim2] #For do6_2
            cosim_values+=[edge_cosim]
            ws_values+=[rws_avg]
           
        #Calculate percent distributions
        if True: #Do rank based
            query1_cosim=calc_rank_percent_distribution(query1_cosim_values) #For do6_2
            query2_cosim=calc_rank_percent_distribution(query2_cosim_values) #For do6_2
            cosim_dist=calc_rank_percent_distribution(cosim_values)
            ws_dist=calc_rank_percent_distribution(ws_values)
        else:
            query1_cosim=normalize_max_min(query1_cosim_values) #For do6_2
            query2_cosim=normalize_max_min(query2_cosim_values) #For do6_2
            cosim_dist=normalize_max_min(cosim_values)
            ws_dist=normalize_max_min(ws_values)
        
        #Use percent distributions to calculate new weight
        for i,e in enumerate(g.es): #FOR EACH EDGE
            if experiment=='do6_two_scores_1':
                weight=cosim_dist[i]  #max of cosim OR ws_dist     (do6_1): edge_weight= ( Max[(cos sim) , [(node1_rws)+(node2_rws)]/2)] ]
            elif experiment=='do6_two_scores_2':
                weight=(cosim_dist[i]+(query1_cosim[i]+query2_cosim[i])/2)/2   #  (do6_2): edge_weight=((cos sim) + [(node1_qcs+node2_qcs)/2])/2
            else: #Standard do6_two_scores
                weight=(cosim_dist[i]+ws_dist[i])/2

#            print ("WEIGHT from: "+str(g.es[i]['weight'])+" to: "+str(weight)+" via: "+str(cosim_dist[i])+" and "+str(ws_dist[i]))
            g.es[i]['weight']=weight

    else:
        pass #Standard running no experiments
    
    
    communities=[]
    clusters=[]
    print ("Running clustering ["+method+"] on graph...")
    Perf.start()
    
    markov_subgraphs=[]
    uG='' #Undirected version of graph that corresponds to true cluster
    
    if 'markov' in method:
        print ("---> doing markov clustering")
        matrix=to_sparse(g,weight_attr='weight')
        result = mc.run_mcl(matrix)           # run MCL with default parameters
        clusters = mc.get_clusters(result) 
#D#        print ("GOT CLUSTERS: "+str(clusters))

        #Initialize subgrpah
        for idx,v in enumerate(g.vs):
            g.vs[idx]['subgraph']=''

        for cluster in clusters:
            cluster_id=cluster[0]
            indexes=cluster[1:]
            if not indexes:continue
            print ("markov cluster "+str(cluster_id)+" has: "+str(indexes))
            
            #Store subgraph index into back into igraph
            for idx in indexes:
                g.vs[idx]['subgraph']=cluster_id

        #Add subgraph cluster indexes back into graph
        print ("AT 0: "+str(g.vs[0]))
        #cl = VertexClustering(g, attribute='subgroup') #'g.vs["subgroup"])
        VC = VertexClustering(g)
        clusters = VC.FromAttribute(g, 'subgraph')
        #print ("GOT CLUSTER: "+str(cl))
    
    if 'betweenness' in method:
        #cluster_count=15
        print ("**betweenness requires edge trimming.  Doing that now...")
        g=filter_graph(g,the_type='remove_low_weights')
        print ("Calculating edge betweenness...")
        communities=g.community_edge_betweenness(clusters=cluster_count,weights='weight') #directed=
        print ("Fixing/checking dendogram -- must be fully connected.")
        communities=fix_dendrogram(g, communities)
    #########################################################

    if 'fast_greedy' in method:
        #** only works with undirected graphs
        uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first.
        communities = uG.community_fastgreedy(weights = 'weight')
        
        #When an algorithm in igraph produces a VertexDendrogram, it may optionally produce a "hint" as well that tells us where to cut the dendrogram (i.e. after how many merges)to obtain a VertexClustering that is in some sense optimal.
        #For instance, the VertexDendrogram produced by community_fastgreedy() proposes that the dendrogram should be cut at the point where the modularity is maximized.
        #Running as_clustering() on a VertexDendrogram simply uses the hint produced by the clustering algorithm to flatten the dendrogram into a clustering,
        #but you may override this by specifying the desired number of clusters as an argument to as_clustering().
        #As for the "distance" between two communities: it's a complicated thing because most community detection methods don't give you that information.
        #They simply produce a sequence of merges from individual vertices up to a mega-community encapsulating everyone, and there is no "distance" information encoded in the dendrogram;
        #in other words, the branches of the dendrogram have no "length". The best you can do is probably to go back to your graph and check the edge density between the communities; this could be a good indication of closeness. For example:
        #https://stackoverflow.com/questions/17413836/igraph-in-python-relation-between-a-vertexdendrogram-object-and-vertexclusterin
        

    #########################################################
    if 'walktrap' in method:
    #** only works with undirected graphs
        uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first.
        communities = uG.community_walktrap(weights = 'weight')

    #########################################################
    if 'leading_eigenvector' in method:
        #http://igraph.org/python/doc/igraph.Graph-class.html#community_leading_eigenvector
        uG = g.as_undirected(combine_edges = 'mean') #Retain edge attributes: max, first.
        clusters= uG.community_leading_eigenvector(clusters=None,weights = 'weight') #if clusters=None then tries as many as possible


    time_clustering=Perf.end()

    #Choose optimum number of communities
    if not clusters:
        num_communities = communities.optimal_count
        clusters = communities.as_clustering(n= num_communities) #Cut dendogram at level n. Returns VertexClustering object
                                                          #"When an algorithm in igraph produces a `VertexDendrogram`, it may optionally produce a "hint" as well that tells us where to cut the dendrogram 
                                                          
                                            
    # Calc weight of each cluster
    #########################################################
    #> weight = average cosine similarity between query and each node in cluster
    #> reuse sim calcs where possible
    #> note: subgraph index not same as g
    
    cluster_weights=[]
    if experiment=='do3_avg_cosims':
        #The weight of the cluster will be the average of two values: the average cosine similarity (the
        # existing one in the code now) AND the average value of cosine similarity between all pairs (without 
        #the query).
        for i,subgraph in enumerate(clusters.subgraphs()):
            edge_sums=0

            #FOR EACH cluster

            #[1] QUERY TO EACH SENTENCE
            for idx, v in enumerate(subgraph.vs):
                #Sim between [QUERY_SENTENCE] and [Sentence at s_idx]
                edge_sim=sims[query_index][v['s_idx']] #Use stored sentence index to look up old cosine sim value
                edge_sums+=edge_sim
                
            #[2] SENTENCE to SENTENCE
            all_sum=0
            sum_count=0
            for idx1, v1 in enumerate(subgraph.vs):
                if idx1==query_index:continue     #skip query indexes
                for idx2, v2 in enumerate(subgraph.vs):
                    if idx2==query_index:continue #skip query indexes
                    if idx1==idx2: continue      #skip same sentences
                    all_sum+=sims[v1['s_idx']][v2['s_idx']]
                    sum_count+=1
                    
            ## Calc Cluster weights
            avg_weight=edge_sums/subgraph.vcount()
            if sum_count:
                avg_inter_weights=((all_sum)/2)/sum_count #2 cause double count
            else:
                avg_inter_weights=0
            print ("[experiment info]:  Cluster #"+str(i)+" inter weights average: "+str(avg_inter_weights))
            
            cluster_weights+=[(avg_weight+avg_inter_weights)/2]
    elif experiment=='do4_median_weight':
        #The weight of a cluster will be as:
        # Compute the average vector for all sentences in the cluster (each sentence is a vector, 
        #   compute the average vector to get the median vector) 
        # Then compute the cos sim between the query vector and the median vector.
        # This score is the weight of the cluster.

        pass
    else:
        for i,subgraph in enumerate(clusters.subgraphs()):
            edge_sums=0
            for idx, v in enumerate(subgraph.vs):
                if False:
                    print ("GOT: "+str(v.attribute_names()))
                    print ("Node: "+str(v['label']))
                edge_sim=sims[query_index][v['s_idx']] #Use stored sentence index to look up old cosine sim value
                edge_sums+=edge_sim
            avg_weight=edge_sums/subgraph.vcount()
            cluster_weights+=[avg_weight]
    #D        print ("Cluster #"+str(i)+" has node count: "+str(subgraph.vcount())+" avg weight: "+str(avg_weight))
                

    print ("For topic: "+str(topic_id)+" Done clustering took: "+str(time_clustering)+" seconds")
    
    if False:
        #Dview    output_clusters(g,communities,clusters,cluster_weights=cluster_weights)
        #Dview    g.write_pickle(fname="save_clustered_graph.dat")
        print ("Visualize clusters...")
        view_graph_clusters(g,clusters)
        
    return g,clusters,cluster_weights,query_sentence,query_index,uG
 def _update(self):
     membership = [i for i in self.gutil.membership]
     self.strategy.update_parts(VertexClustering(self.graph, membership=membership))
 def _update(self):
     membership = [learner.action for learner in self.learning.learners]
     self.strategy.update_parts(
         VertexClustering(self.graph, membership=membership))