def visualize_col(): """ """ adj_matrix = col_matrix(col_k, nnc_mps, agreement_matrix, nnc_arbit, mps) classes = get_equiv_classes(nnc_mps, agreement_matrix, nnc_arbit, mps) classes_set = set(classes) classes_set.remove(-1) num_classes = len(classes_set) classes_ord = list(classes_set) classes_ord.sort() class_indices = {classes_ord[i]: i for i in range(num_classes)} class_sizes = [classes.count(cls) for cls in classes_ord] groups = [None for i in range(len(classes))] for i in range(len(classes_ord)): class_index = i rel_p = [mp[1][0] for mp in mps if classes[mps.index(mp)] == classes_ord[class_index]] counts = [rel_p.count(rel_p[j]) for j in range(len(rel_p))] dominant = rel_p[counts.index(max(counts))] groups[i] = dominant names = [str(i) for i in range(len(adj_matrix))] #groups = [str(i) for i in range(len(adj_matrix))] write_json_graph_general(adj_matrix, names, groups, filename = "viz/col/" + agr_method + '_' + str(start_session) + '_' + str(end_session) + 'minsess' + str(min_sessions) + '_' + 'k=' + str(col_k) + '_' + str(nnc_arbit) + ".json")
def top_k(k, nnc_mps, internal, agreement_matrix, arbit, mps, common_session_matrix=None): """ Compute the matrix for the NNC graph. Parameters ========== k - number of top neighbors to be included nnc_mps - the set of MPs to be considered internal - if True, we take the top k neighbors among nnc_mps; if False, we take the top k neighbors among all MPs (but visualize only the connections to neighbors among the nnc_mps) agreement_matrix - the agreement matrix used arbit - whether to take all neighbors corresponding to the top k values of similarity, or take the first in each group mps - all the nodes in the network common_session_matrix - the matrix holding common session counts over the min_sessions threshold, indexed in the same way as the agreement matrix Returns ======= An adjacency matrix over the MPs so that there is a edge from i to j if j is among the top k neighbors of i. Notes ===== I am stupid and have implemented this with an adjacency matrix. However, the graph is very likely to be sufficiently sparse, so an adjacency list will be much better. """ # MPs to be considered nnc_mps_ind = [mps.index(mp) for mp in nnc_mps] adj_matrix = [[0 for i in mps] for j in mps] for i in nnc_mps_ind: row = [agreement_matrix[i][j] for j in nnc_mps_ind if j != i] for j in range(len(row)): if row[j] == none_value: row[j] = 0 row_values = list(set(row)) row_values.sort(key = lambda x: -x) top_ki = row_values[:k] top_ki = [value for value in top_ki if value != 0] if arbit == False: for j in nnc_mps_ind: if j != i and agreement_matrix[i][j] in top_ki: adj_matrix[i][j] = 1 if arbit == True: marked = [False]*len(top_ki) for j in nnc_mps_ind: if j != i and agreement_matrix[i][j] in top_ki: ind = top_ki.index(agreement_matrix[i][j]) if marked[ind] == False: marked[ind] = True adj_matrix[i][j] = 1 return adj_matrix
def get_equiv_classes(nnc_mps, agreement_matrix, nnc_arbit, mps): adj_matrix = top_k(1, nnc_mps, nnc_internal, agreement_matrix, nnc_arbit, mps) rel_indices = [mps.index(mp) for mp in nnc_mps] size = len(adj_matrix) classes = [-1]*size marked = [False]*size for i in rel_indices: if not marked[i]: marked[i] = True classes[i] = i cloud = set([i]) new = True while new: new_cloud = set() for j in cloud: temp = [l for l in rel_indices if not marked[l] and (adj_matrix[j][l] == 1 or adj_matrix[l][j] == 1)] for l in temp: marked[l] = True new_cloud.add(l) classes[l] = i if len(new_cloud) == 0: new = False cloud = new_cloud return classes
def top_k_old(k, nnc_mps, internal, agreement_matrix, arbit, mps, common_session_matrix=None): """ Compute the matrix for the NNC graph. Parameters ========== k - number of top neighbors to be included nnc_mps - the set of MPs to be considered internal - if True, we take the top k neighbors among nnc_mps; if False, we take the top k neighbors among all MPs (but visualize only the connections to neighbors among the nnc_mps) agreement_matrix - the agreement matrix used arbit - whether to take all neighbors corresponding to the top k values of similarity, or take the first in each group mps - all the nodes in the network common_session_matrix - the matrix holding common session counts over the min_sessions threshold, indexed in the same way as the agreement matrix Returns ======= An adjacency matrix over the MPs so that there is a edge from i to j if j is among the top k neighbors of i. Notes ===== I am stupid and have implemented this with an adjacency matrix. However, the graph is very likely to be sufficiently sparse, so an adjacency list will be much better. """ # MPs to be considered graph_mps = nnc_mps nnc_mps_ind = [mps.index(mp) for mp in nnc_mps] adj_matrix = [[None for i in mps] for j in mps] # compute the matrix for i in nnc_mps_ind: row = [] if internal == True: for j in range(len(mps)): if agreement_matrix[i][j] == None or j not in nnc_mps_ind: row.append(-1) else: row.append(agreement_matrix[i][j]) if internal == False: for j in range(len(mps)): if agreement_matrix[i][j] == None: row.append(-1) else: row.append(agreement_matrix[i][j]) ind = range(len(mps)) ind.sort(key = lambda index: -row[index]) ind = [index for index in ind if index != i and row[index] != -1] if arbit == False: trunc = ind[:min(len(ind), k)] rel_neigh = [t for t in nnc_mps_ind if agreement_matrix[i][t] in [agreement_matrix[i][l] for l in trunc]] for t in rel_neigh: adj_matrix[i][t] = 1 else: for t in range(min(len(ind), k)): adj_matrix[i][ind[t]] = 1 return adj_matrix