def lesion_graph_degree(graph, num_lesions): """ Remove vertices from a graph according to degree. Args: graph: NetworkX graph to be lesioned. num_lesions: Number of top degree nodes to remove. Returns: G: NetworkX graph A: Adjacency matrix for graph """ # Error checking G = deepcopy(graph) if num_lesions == 0: return G, nx.adjacency_matrix(G) assert num_lesions >= 0 and num_lesions < graph.order, 'Attempting to\ remove too many/few nodes' for l in range(num_lesions): # Identify node to cut node_i, node_d = max(G.degree().items(), key=lambda degree: degree[1]) G.remove_node(node_i) #print (node_i, node_d) if G.order() > 0: return G, nx.adjacency_matrix(G) else: print 'Graph completely lesioned.' return None, None
def similarity(self, G=None, H=None, iters=1000): """ Returns the graph similarity based on :param G: networkx graph of original graph (default: self.G) :param H: networkx graph of inferred graph (default: self.H) :param iter: number of iterations (default: 20) :return: float """ if G is None: G = self.G if H is None: H = self.H n = len(G) gA = nx.adjacency_matrix(G) hA = nx.adjacency_matrix(H) s = np.identity(n) # initial condition for i in range(iters): temp = (np.kron(gA, hA) + np.kron(gA.T, hA.T)) * s + \ np.identity(n) * 0.0000001 s = temp / np.linalg.norm(temp) a = np.trace(s) temp = (np.kron(gA, hA) + np.kron(gA.T, hA.T)) * s s = temp / np.linalg.norm(temp) a += np.trace(s) return a / 2
def are_isomorphic(G, H): """Check whether two graphs G and H are isomorphic. Note: This function is brute force and very slow. args: G: a networkx Graph H: a networkx Graph returns: True if G and H are isomorphic. False if G and H are not isomorphic. """ n = len(G.nodes()) m = len(H.nodes()) if n != m: return False if sorted(G.degree().values()) != sorted(H.degree().values()): return False else: a_g = nx.adjacency_matrix(G) vertex_perms = list(permutations(H.nodes(), m)) for i in vertex_perms: a_h = nx.adjacency_matrix(H, i) if (a_h == a_g).all(): #print(list(zip(G.nodes(), i)), "is an isomorphism") return True return False
def env_init(self): """ Based on the levin model, the dispersion probability is initialized. """ self.dispersionModel = InvasiveUtility.Levin notDirectedG = networkx.Graph(self.simulationParameterObj.graph) adjMatrix = adjacency_matrix(notDirectedG) edges = self.simulationParameterObj.graph.edges() simulationParameterObj = self.simulationParameterObj if self.dispersionModel == InvasiveUtility.Levin: parameters = InvasiveUtility.calculatePath(notDirectedG,adjMatrix, edges, simulationParameterObj.downStreamRate, simulationParameterObj.upStreamRate) C = (1 - simulationParameterObj.upStreamRate * simulationParameterObj.downStreamRate) / ( (1 - 2 * simulationParameterObj.upStreamRate) * (1 - simulationParameterObj.downStreamRate)) self.dispertionTable = np.dot(1 / C, parameters) self.germinationObj = GerminationDispersionParameterClass(1, 1) #calculating the worst case fully invaded rivers cost worst_case = repmat(1, 1, self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize)[0] cost_state_unit = InvasiveUtility.get_unit_invaded_reaches(worst_case, self.simulationParameterObj.habitatSize) * self.actionParameterObj.costPerReach stateCost = cost_state_unit + InvasiveUtility.get_invaded_reaches( worst_case) * self.actionParameterObj.costPerTree stateCost = stateCost + InvasiveUtility.get_empty_slots(worst_case) * self.actionParameterObj.emptyCost costAction = InvasiveUtility.get_budget_cost_actions(repmat(3, 1, self.simulationParameterObj.nbrReaches)[0], worst_case, self.actionParameterObj) networkx.adjacency_matrix(self.simulationParameterObj.graph) return "VERSION RL-Glue-3.0 PROBLEMTYPE non-episodic DISCOUNTFACTOR " + str( self.discountFactor) + " OBSERVATIONS INTS (" + str( self.simulationParameterObj.nbrReaches * self.simulationParameterObj.habitatSize) + " 1 3) ACTIONS INTS (" + str( self.simulationParameterObj.nbrReaches) + " 1 4) REWARDS (" + str(self.Bad_Action_Penalty)+" "+str( -1 * (costAction + stateCost)) + ") EXTRA "+str(self.simulationParameterObj.graph.edges()) + " BUDGET "+str(self.actionParameterObj.budget) +" by Majid Taleghan."
def nsim_bvd04(G1, G2, max_iter=100, eps=1e-4): """ Algorithm to calculate node-node similarity matrix of two directed graphs. Return ------ A 2d similarity matrix of |V1| x |V2|. Reference --------- Blondel, Vincent D. et al. "A Measure of Similarity between Graph Vertices: Applications to Synonym Extraction and Web Searching." SIAM Review (2004) """ N = len(G1.nodes()) M = len(G2.nodes()) A = nx.adjacency_matrix(G1).todense() B = nx.adjacency_matrix(G2).todense() nsim_prev = np.zeros((M, N)) nsim = np.ones((M, N)) for i in range(max_iter): if np.allclose(nsim, nsim_prev, atol=eps): break nsim_prev = np.copy(nsim) nsim = np.dot(np.dot(B, nsim_prev), A.T) + \ np.dot(np.dot(B.T, nsim_prev), A) fnorm = np.linalg.norm(nsim, ord='fro') nsim = nsim / fnorm print("Converge after %d iterations (eps=%f)." % (i, eps)) return nsim.T
def lesion_graph_degree_thresh(graph, threshold): """ Remove vertices from a graph with degree greater than or equal to threshold. Parameters: ----------- graph: NetworkX graph to be lesioned. threshold: Degree above which to remove nodes. Returns: -------- G: NetworkX graph A: Adjacency matrix for graph """ # Error checking G = deepcopy(graph) assert threshold >= 0, " In percolation, `threshold` must be >= 0." # Check if lesioning is necessary for threshold if threshold > max(G.degree().values()): return G, nx.adjacency_matrix(G) # Identify all node indices >= threshold node_inds = np.where(np.asarray(G.degree().values()) >= threshold)[0] # Eliminate these nodes G.remove_nodes_from(node_inds) if G.order() > 0: return G, nx.adjacency_matrix(G) else: #print 'Graph completely lesioned.' return None, None
def dsd_matrix(G, nodeList, npyFile, LMsetSize=50, centralityFunc='degree', **kwargs): """ any kwargs, if specified, will be passed into centrality function call. """ # if npy path not entered, or file does not exist, compute D if not npyFile or not os.path.isfile(npyFile): # # construct hemat # adjMatrix = np.array(nx.adjacency_matrix(G,nodeList)) if np.shape(adjMatrix) == (): adjMatrix = np.array(nx.adjacency_matrix(G,nodeList).todense()) HEmatrix = dsd.hematrix(adjMatrix) # construct DSD LMset = get_LMset(G, nodeList, LMsetSize, centralityFunc, **kwargs) D = dsd.DSD(HEmatrix,LMset) if npyFile: try: np.save(npyFile, D) except IOError: os.makedirs(npyFile[:npyFile.rfind('/')]) np.save(npyFile, D) # otherwise just load and return it else: D = np.load(npyFile) return D
def update_nl(self, dist): self.G.remove_edges_from(self.G.edges()) print "dr:" print self.dr() for row in list(enumerate(self.dr())): for col in list(enumerate(row[1])): if col[1] > dist: self.G.add_edge(col[0], row[0], x=self.x[col[0]]-self.x[row[0]], y=self.y[col[0]]-self.y[row[0]], z=0., r=col[1]) print nx.adjacency_matrix(self.G)
def createMultiplex(self): print self.politicsGraph.nodes().__len__() sortedPoliticalComentators = sorted(self.politicsGraph.nodes()) politicalComentatorsAdjMat = nx.adjacency_matrix(self.politicsGraph, sortedPoliticalComentators) sortedChurchComentators = sorted(self.churchGraph.nodes()) churchComentatorsAdjMat = nx.adjacency_matrix(self.churchGraph, sortedChurchComentators) otherComentators = sorted(self.othersGraph.nodes()) otherAdjMat = nx.adjacency_matrix(self.othersGraph, otherComentators) self.addLayerToGraph(politicalComentatorsAdjMat, sortedPoliticalComentators, 'L1', 1) self.addLayerToGraph(churchComentatorsAdjMat, sortedChurchComentators, 'L2', 2) self.addLayerToGraph(otherAdjMat, otherComentators, 'L3', 3)
def show_graph_features(G,circular=False): for node,adj_list in G.adjacency_iter(): print node, ': ',adj_list print nx.adjacency_matrix(G) #N\u00f3s: print 'N\u00famero de n\u00f3s: ',G.number_of_nodes() print 'N\u00f3s: \\n','\\t',G.nodes() #Arestas: print 'N\u00famero de arestas: ',G.number_of_edges() print 'Arestas: \\n','\\t',G.edges(data=True)
def graph_diff(g0, g1): m0 = nx.adjacency_matrix(g0) m1 = nx.adjacency_matrix(g1) delta = 0 for i in range(0, m0.shape[0]): for j in range(0, m0.shape[1]): if i > j: # print m0[(i,j)],int(m1[(i,j)]>0) # if(m0[(i,j)]==1 and int(m1[(i,j)]>0)!=1): if m0[(i, j)] != int(m1[(i, j)] > 0): delta += 1 return delta
def cal_exact_Nd_simple(H, random_weight=False): """return (Nd, N-Rank) """ G = H.copy() N = H.number_of_nodes() try: G2 = nx.adjacency_matrix(G, weight = 'weight') #~ print 'weight' except: G2 = nx.adjacency_matrix(G) if random_weight: G2 = np.array(G2)*np.random.random((N, N)) rank_G = mranksvd(G2) return max(1, N-rank_G), N-rank_G
def GTrieInsert(self, graph, label=None, states=False): if not self.root.isLeaf() and self.null_graph: self.insertRecursive(networkx.Graph(), [], networkx.adjacency_matrix(networkx.Graph()).todense(), self.root, 0, label, states) components = networkx.connected_components(graph.to_undirected()) \ if networkx.is_directed(graph) else networkx.connected_components(graph) component_len = [1 for x in components if len(x) > 1] if len(list(components)) > 1 and sum(component_len) > 1: print "Illegal Graph Insert: Graph has more than one connnected component." return cannonGraph = self.GTCannon(graph.copy()) matrix = networkx.adjacency_matrix(cannonGraph).todense() conditions = self.utility.symmetryConditions(cannonGraph) self.insertRecursive(cannonGraph, conditions, matrix, self.root, 0, label, states)
def get_harmonic_pathlength(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_harmonic_pathlength.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) components = nx.connected_component_subgraphs(G) values =[] for i in range(len(components)): adjacency = nx.adjacency_matrix(components[i]) hiwi = 0 values_indi = [] for row in adjacency: if row.sum() > 0: hiwi += 1./row.sum() values_indi.append(hiwi) if len(values_indi) > 0: values.append(sum(values_indi)/len(values_indi)) #the following holds only for a connected network #adjacency = nx.adjacency_matrix(G) #hiwi = 0 #values = [] #for row in adjacency: #if row.sum() > 0: #hiwi += 1./row.sum() #values.append(hiwi) if len(values) == 0: f.write("%f\t0.\n" % (threshold)) print 'harmonic pathlength: 0' else: print 'harmonic pathlength: %f' % (sum(values)/len(values)) f.write("%f\t%f\n" % (threshold, (sum(values)/len(values)))) f.close()
def adjMatrixEponential( graph ): A = nx.adjacency_matrix(graph, weight="weight") alpha, alpha_v = scipy.sparse.linalg.eigsh(A, 1, which="LM") for i in range(2, maxexponent): #le range(2, maxexponent) limite le comptage que de 2 a la valeur exponentiel déjà declaré plus haut. Donc, on aura 2,3,4 au lieu de 1,2,3,4 A_i = math.pow(alpha, -i) * numpy.linalg.matrix_power(A, i) A += A_i return A
def init(): global op1, op2, grouping, network, adj_mat op1 = [(-1 + 2 * random.random()) for agents in range(no_of_agents)] op2 = [(-1 + 2 * random.random()) for agents in range(no_of_agents)] grouping = [random.randint(1,m) for agents in range(no_of_agents)] network = nx.erdos_renyi_graph(no_of_agents,p) adj_mat = nx.adjacency_matrix(network)
def adjacency_spectrum(G, weight='weight'): """Return eigenvalues of the adjacency matrix of G. Parameters ---------- G : graph A NetworkX graph weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- evals : NumPy array Eigenvalues Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See to_numpy_matrix for other options. See Also -------- adjacency_matrix """ from scipy.linalg import eigvals return eigvals(nx.adjacency_matrix(G,weight=weight).todense())
def connectGraph(G, geneList): i = 1 j = 1 k = 1 while (i < len(geneList)): while (j < len(geneList)): if (i != j): #print("This part works") # proof that the idea works #print(geneList[i]) #print(geneList[j]) if(bool(set(geneList[i]) & set(geneList[j]))): G.add_edge(geneList[i-1],geneList[j-1]) #print(geneList[i-1] + ", " + geneList[j-1]) #Proof it works #print("success") proof that this line is reached j = j + 2 j = j + 2 i = i + 2 # THE FINAL GENE HAS NO CONNECTIONS, FIX IT k = k + 2 # file dropped size a fair bit. j = k #nx.write_graphml(G, "anotherTestGraph4.xml") # Writes the graph to a file #This will print all the genes and the number of links they have in the graph. #i = 0 #while (i < len(geneList)): # print("The degree for gene" , geneList[i] , "is:" , G.degree(geneList[i])) # i = i + 2 # which can be imported to cytoscape to visualise the graph. A = nx.adjacency_matrix(G) # Creates an adjacency matrix of the graph above. #print (A) #proof it works #print(G.edges()) #proof that the gene connections work #print(geneList) #Shows the list creates properly with all genes return G
def balanced_stochastic_blockmodel(communities=2, groupsize=3, p_in=1.0, p_out=0.0, seed=None): """gives dense adjacency matrix representaiton of randomly generated SBM with balanced community size""" G = nx.planted_partition_graph(l=communities, k=groupsize, p_in=p_in, p_out =p_out, seed=seed) A = nx.adjacency_matrix(G).todense() return A
def busmap_by_spectral_clustering(network, n_clusters, **kwds): lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1']) G = OrderedGraph() G.add_nodes_from(network.buses.index) G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples()) return pd.Series(sk_spectral_clustering(nx.adjacency_matrix(G), n_clusters, **kwds) + 1, index=network.buses.index)
def get_my_harmonic_pathlength(filename) : # ?? threshold = 0 f = open(filename[:-4]+'_harmonic_pathlength.dat','w') print f f.write('threshold\tharmonic_pathlength\n') for i in range(0,101) : threshold = float(i)/100 G = get_my_threshold_matrix(filename, threshold) components = nx.connected_component_subgraphs(G) # subgraphs in graph! values =[] for i in range(len(components)) : adjacency = nx.adjacency_matrix(components[i]) hiwi = 0 values_indi = [] for row in adjacency : if row.sum() > 0 : hiwi += 1./row.sum() values_indi.append(hiwi) if len(values_indi) > 0 : values.append(sum(values_indi)/len(values_indi)) if len(values) == 0 : f.write('%f\t0.\n' % (threshold)) else : f.write('%f\t%f\n' % (threshold, (sum(values)/len(values)))) f.close()
def _get_node_plot_props(G, node_class=None, max_energy=None, active_node_color=None, active_edge_color=None, dead_node_color=None): """ `node_class` - Generic | Internal | Sensory | Motor `node_size` - proportional to the sum of the presynaptic connections it makes with other nodes. `node_colors` - function of excitatory/inhibitory, energy_value, firing/inactive """ cm = CMAP_DIFF # Shade from red (inhibitory) to green (excitatory) nodes = G.nodes(node_class) adj_matrix = nx.adjacency_matrix(G) node_pos = nx.get_node_attributes(G.subgraph(nodes), 'pos') edge_width = np.array([d['weight'] for (u, v, d) in G.edges(data=True) if u in nodes]) firing_nc = colors.hex2color(active_node_color) if active_node_color is not None \ else list(colors.hex2color(RENDER_NODE_PROPS['Firing']['node_face_color'])) dead_nc = colors.hex2color(dead_node_color) if dead_node_color is not None \ else list(colors.hex2color(RENDER_NODE_PROPS['Dead']['node_face_color'])) _ = firing_nc.append(1.) _ = dead_nc.append(1.) node_colors = _get_node_colors(G, cm, node_class=node_class, max_energy=max_energy, firing_node_color=firing_nc, dead_node_color=dead_nc) if node_class is not None: min_ns, max_ns = RENDER_NODE_PROPS[node_class]['min_node_size'], RENDER_NODE_PROPS[node_class]['max_node_size'] node_shape = RENDER_NODE_PROPS[node_class]['shape'] node_size = np.array([np.maximum(adj_matrix[i].sum(), .01) for i, n_id in enumerate(G.nodes()) if G.node[n_id]['node_class'] == node_class]) # proportional to the number of connections else: node_shape, node_size = RENDER_NODE_PROPS['Default']['shape'], adj_matrix.sum(axis=1) min_ns, max_ns = RENDER_NODE_PROPS['Default']['min_node_size'], RENDER_NODE_PROPS['Default']['max_node_size'] node_size = min_ns + (max_ns - min_ns) * (node_size - node_size.min()) / (node_size.max() - node_size.min()) \ if node_size.max() > node_size.min() else max_ns * np.ones_like(node_size) return node_pos, node_colors, node_shape, node_size, edge_width
def random_simple_deg_seq(sequence, brain_size=[7., 7., 7.], seed=None, tries=10): '''Wrapper function to get a SIMPLE (no parallel or self-loop edges) graph that has a given degree sequence. This graph is used conventionally as a control because it yields a random graph that accounts for degree distribution. Parameters: sequence: list of int Degree of each node to be added to the graph. brain_size: list of 3 floats Size of the brain to use when distributing node locations. Added for convenience, but does not affect connectivity pattern. seed: hashable object for random seed Seed for the random number generator. tries: int Number of attempts at creating graph (function will retry if self-loops exist. Returns: Networkx graph object, adjacency matrix, and random distances''' G = nx.random_degree_sequence_graph(sequence=sequence, seed=seed, tries=tries) A = nx.adjacency_matrix(G) N = len(sequence) centroids = np.random.uniform([0, 0, 0], brain_size, (N, 3)) D = aux_tools.dist_mat(centroids) return G, A, D
def hits(G,max_iter=100,tol=1.0e-6): M=nx.adjacency_matrix(G,nodelist=G.nodes()) (n,m)=M.shape # should be square A=M.T*M # authority matrix x=scipy.ones((n,1))/n # initial guess # power iteration on authority matrix i=0 while True: xlast=x x=A*x x=x/x.sum() # check convergence, l1 norm err=scipy.absolute(x-xlast).sum() if err < tol: break if i>max_iter: raise NetworkXError(\ "HITS: power iteration failed to converge in %d iterations."%(i+1)) i+=1 a=np.asarray(x).flatten() h=np.asarray(M*a).flatten() hubs=dict(zip(G.nodes(),h/h.sum())) authorities=dict(zip(G.nodes(),a/a.sum())) return hubs,authorities
def loglikelihood(self, net): """Returns the log likelihood of the given network. Similar to the loglikelihood method of a Conditional Probability Distribution. """ adjmat = nx.adjacency_matrix(net) # if any of the mustexist or mustnotexist constraints are violated, # return negative infinity if (not (adjmat | self.mustexist).all()) or \ (adjmat & self.mustnotexist).any(): return NEGINF # if any custom constraints are violated, return negative infinity if self.constraints and not all(c(adjmat) for c in self.constraints): return NEGINF loglike = 0.0 if self.energy_matrix != None: energy = N.sum(adjmat * self.energy_matrix) loglike = -self.weight * energy return loglike
def hits(G, max_iter=100, eps=1e-4): """HITS algorithm: calculate the hub and authority scores for nodes in a graph. Return ------ A 2d matrix of [hub, auth] scores. Reference --------- [1] Kleinberg, Jon M. "Authoritative Sources in a Hyperlinked Environment." JACM, 1999. """ N = len(G.nodes()) A = nx.adjacency_matrix(G).todense() Mu = np.concatenate((np.zeros((N, N)), A), 1) Md = np.concatenate((A.T, np.zeros((N, N))), 1) M = np.concatenate((Mu, Md), 0) ha_prev = np.zeros((N*2, 1)) ha = np.ones((N*2, 1)) for i in range(max_iter): if np.allclose(ha, ha_prev, atol=eps): break ha_prev = np.copy(ha) ha = normalized(np.dot(M, ha_prev)) print("Converge after %d iterations (eps=%f)." % (i, eps)) return np.reshape(ha, newshape=(N, 2), order=1)
def _step1AddFlowVars(Graph): """ Adding flow variables to the gurobi model. Adds flows from node m->n if m<n and the (m, n)'th position in the adjacency matrix of Graph is non-zero. Only adds flows from m->n and not n->m because direction in this model is shown by a sign difference, but otherwise the Graph is not directed. Initializes the 'flow' attributes of the edges of Graph to be zero. Parameters ---------- Graph : NetworkX Graph """ adjacency = nx.adjacency_matrix(Graph).todense() for row in xrange(len(adjacency)): for col in xrange(len(adjacency)): if adjacency[row, col] != 0: if row < col: ntwk.addVar(name = 'flow %d->%d' % (row, col), lb = -inf, ub = inf) ntwk.update() # -- Initializing the 'flow' atribute of the edges to be 0 -- for (m, n) in Graph.edges_iter(): Graph[m][n]['Flow'] = np.zeros(len(Graph.node[0]['Mismatch']))
def __init__(self, graph, communities=None): """ initialize partition of graph, with optional communities Parameters ---------- graph : networkx graph communities : list of sets, optional a list of sets with nodes in each set if communities is None, will initialize with one per node Returns ------- part : WeightedPartition object """ # assert graph has edge weights, and no negative weights mat = nx.adjacency_matrix(graph).todense() if mat.min() < 0: raise ValueError("Graph has invalid negative weights") self.graph = nx.from_numpy_matrix(mat) if communities is None: self._communities = self._init_communities_from_nodes() else: self.set_communities(communities) self.total_edge_weight = graph.size(weight="weight") self.degrees = graph.degree(weight="weight")
def coarse_grain_W(num_intervals, num_eigenvectors, g, sparse = True): """ Produces W_tilde := R*W*K, where W is the stochastic matrix of the original graph, and R,K, are intermediary matrices defined in the paper. Has an optional arguments to use non-sparse matrices, which are (minorly) faster for small graphs. """ A = nx.adjacency_matrix(g) num_nodes = A.shape[0] A = A / np.sum(A, 0) #stochastic matrix -- don't need A anymore A = np.nan_to_num(A) eigenvalues,left_eigenvectors = eig(A, left = True, right = False) if sparse == True: A = s.csr_matrix(A) groups = make_groups(eigenvalues, left_eigenvectors, num_intervals , num_eigenvectors) R = make_sparse_R(groups, num_nodes) K = make_sparse_K(groups, num_nodes, g) return np.dot(R, np.dot(A, K)) else: groups = make_groups(eigenvalues, left_eigenvectors, num_intervals , num_eigenvectors) R = make_R(groups, num_nodes) K = make_K(groups, num_nodes, g) return np.dot(R, np.dot(A, K))
def test_eigenvector_v_katz_random(self): G = nx.gnp_random_graph(10,0.5, seed=1234) l = float(max(eigvals(nx.adjacency_matrix(G).todense()))) e = nx.eigenvector_centrality_numpy(G) k = nx.katz_centrality_numpy(G, 1.0/l) for n in G: assert_almost_equal(e[n], k[n])
def load_from_planetoid_files(dataset_name, path): """Loads Planetoid data in GCN format, as released with the GCN code. This function is adapted from https://github.com/tkipf/gcn. This function assumes that the following files can be found at the location specified by `path`: ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object. ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object. ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object. ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object. ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object. ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object. ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict object. ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. Args: dataset_name: A string representing the dataset name (e.g., `cora`). path: Path to the directory containing the files. Returns: All data input files loaded (as well the training/test data). """ def _sample_mask(idx, l): """Create mask.""" mask = np.zeros(l) mask[idx] = 1 return np.array(mask, dtype=np.bool) def _parse_index_file(filename): """Parse index file.""" index = [] for line in open(filename): index.append(int(line.strip())) return index def _load_file(name): """Load from data file.""" filename = 'ind.{}.{}'.format(dataset_name, name) filename = os.path.join(path, filename) with open(filename, 'rb') as f: if sys.version_info > (3, 0): return pickle.load(f, encoding='latin1') # pylint: disable=unexpected-keyword-arg else: return pickle.load(f) x = _load_file('x') y = _load_file('y') tx = _load_file('tx') ty = _load_file('ty') allx = _load_file('allx') ally = _load_file('ally') graph = _load_file('graph') filename = 'ind.{}.test.index'.format(dataset_name) filename = os.path.join(path, filename) test_idx_reorder = _parse_index_file(filename) test_idx_range = np.sort(test_idx_reorder) if dataset_name == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph). # Find isolated nodes, add them as zero-vecs into the right position. test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] idx_test = test_idx_range.tolist() idx_train = range(len(y)) idx_val = range(len(y), len(y) + 500) train_mask = _sample_mask(idx_train, labels.shape[0]) val_mask = _sample_mask(idx_val, labels.shape[0]) test_mask = _sample_mask(idx_test, labels.shape[0]) y_train = np.zeros(labels.shape) y_val = np.zeros(labels.shape) y_test = np.zeros(labels.shape) y_train[train_mask, :] = labels[train_mask, :] y_val[val_mask, :] = labels[val_mask, :] y_test[test_mask, :] = labels[test_mask, :] return (adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels)
from scipy.linalg import block_diag list1 = list() G_asymmetric = nx.DiGraph() #G_asymmetric.add_edge('A','B') G_asymmetric.add_edge('A', 'C') G_asymmetric.add_edge('A', 'D') G_asymmetric.add_edge('A', 'E') G_asymmetric.add_edge('B', 'C') G_asymmetric.add_edge('B', 'A') G_asymmetric.add_edge('B', 'E') G_asymmetric.add_edge('E', 'A') p = nx.degree(G_asymmetric, 'A') print(p) nx.spring_layout(G_asymmetric) nx.draw_networkx(G_asymmetric) #s=nx.clustering(G_asymmetric,'A') #print(s) A = nx.adjacency_matrix(G_asymmetric) adj = A.toarray() list1 = adj.tolist() a = list1[0] b = list1[1] c = list1[2] d = list1[3] e = list1[4] print(a, b, c, d, e) s = block_diag(a, b, c, d, e) #A.setdiag(A.diagonal()*2) print(s)
def comm_eigenvectors(comm, num_vectors=20, verbose=False): W = nx.adjacency_matrix(comm) W_sym = W + W.T D_array = np.diag(np.array(W_sym.sum(axis=1)).flatten()) D = sparse.csc_matrix(D_array) L_comb = D - W_sym try: L_rw = sparse.linalg.inv(D).dot(W_sym) except: L_rw = sparse.csc_matrix(scipy.linalg.pinv(D_array)).dot(W_sym) if verbose: W_eig_v, W_vectors = scipy.linalg.eigh(W_sym.toarray()) W_sort_index = break_tie_argsort(W_eig_v) print(W_vectors[:, W_sort_index]) # eigen vectors if W.shape[0] > 2 * num_vectors: try: logging.debug("Using sparse method to compute eigen vectors") _, W_vectors_upper = sparse.linalg.eigsh(W_sym, k=num_vectors, sigma=0, which='LM') logging.debug("Using sparse method to compute eigen vectors") _, W_vectors_lower = sparse.linalg.eigsh(W_sym, k=num_vectors, which='LM') except: logging.warning("Sparse method doesn't converge.") W_values, W_vectors = scipy.linalg.eigh(W_sym.toarray()) W_sort_index = break_tie_argsort(W_values) W_vectors_upper = W_vectors[:, W_sort_index[: num_vectors]] # small eigen values W_vectors_lower = W_vectors[:, W_sort_index[-num_vectors:] [::-1]] # big eigen values else: W_values, W_vectors = scipy.linalg.eigh(W_sym.toarray()) W_sort_index = break_tie_argsort(W_values) middle = len(W_values) // 2 W_vectors_upper = W_vectors[:, W_sort_index[: middle]] # small eigen values W_vectors_lower = W_vectors[:, W_sort_index[middle:] [::-1]] # big eigen values if W.shape[0] > num_vectors + 2: try: logging.debug("Using sparse method to compute eigen vectors") _, comb_vectors = sparse.linalg.eigsh(L_comb, k=num_vectors + 1, sigma=0, which='LM') comb_vectors = comb_vectors[:, 1:] except: logging.warning("Sparse method doesn't converge.") comb_values, comb_vectors = scipy.linalg.eigh(L_comb.toarray()) comb_sort_index = break_tie_argsort(comb_values) comb_vectors = comb_vectors[:, comb_sort_index[1:21]] try: logging.debug("Using sparse method to compute eigen vectors") _, rw_vectors = sparse.linalg.eigsh(L_rw, k=num_vectors + 1, which='LM') rw_vectors = rw_vectors[:, 1:] except scipy.sparse.linalg.ArpackNoConvergence: logging.warning("Sparse method doesn't converge.") rw_values, rw_vectors = scipy.linalg.eigh(L_rw.toarray()) rw_sort_index = break_tie_argsort(rw_values, reverse=True) rw_vectors = rw_vectors[:, rw_sort_index[1:21]] else: comb_values, comb_vectors = scipy.linalg.eigh(L_comb.toarray()) comb_sort_index = break_tie_argsort(comb_values) comb_vectors = comb_vectors[:, comb_sort_index[1:21]] rw_values, rw_vectors = scipy.linalg.eigh(L_rw.toarray()) rw_sort_index = break_tie_argsort(rw_values, reverse=True) rw_vectors = rw_vectors[:, rw_sort_index[1:21]] return np.real(W_vectors_upper), np.real(W_vectors_lower), np.real( comb_vectors), np.real(rw_vectors)
def load_data(dataset_name, splits_file_path=None, train_percentage=None, val_percentage=None, embedding_mode=None, embedding_method=None, embedding_method_graph=None, embedding_method_space=None): if dataset_name in {'cora', 'citeseer', 'pubmed'}: adj, features, labels, _, _, _ = utils.load_data(dataset_name) labels = np.argmax(labels, axis=-1) features = features.todense() G = nx.DiGraph(adj) else: graph_adjacency_list_file_path = os.path.join('new_data', dataset_name, 'out1_graph_edges.txt') graph_node_features_and_labels_file_path = os.path.join( 'new_data', dataset_name, f'out1_node_feature_label.txt') G = nx.DiGraph() graph_node_features_dict = {} graph_labels_dict = {} if dataset_name == 'film': with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) feature_blank = np.zeros(932, dtype=np.uint8) feature_blank[np.array(line[1].split(','), dtype=np.uint16)] = 1 graph_node_features_dict[int(line[0])] = feature_blank graph_labels_dict[int(line[0])] = int(line[2]) else: with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) graph_node_features_dict[int(line[0])] = np.array( line[1].split(','), dtype=np.uint8) graph_labels_dict[int(line[0])] = int(line[2]) with open(graph_adjacency_list_file_path) as graph_adjacency_list_file: graph_adjacency_list_file.readline() for line in graph_adjacency_list_file: line = line.rstrip().split('\t') assert (len(line) == 2) if int(line[0]) not in G: G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])], label=graph_labels_dict[int(line[0])]) if int(line[1]) not in G: G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])], label=graph_labels_dict[int(line[1])]) G.add_edge(int(line[0]), int(line[1])) adj = nx.adjacency_matrix(G, sorted(G.nodes())) features = np.array([ features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0]) ]) labels = np.array([ label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0]) ]) features = utils.preprocess_features(features) if not embedding_mode: g = DGLGraph(adj + sp.eye(adj.shape[0])) else: if embedding_mode == 'ExperimentTwoAll': embedding_file_path = os.path.join( 'embedding_method_combinations_all', f'outf_nodes_relation_{dataset_name}all_embedding_methods.txt') elif embedding_mode == 'ExperimentTwoPairs': embedding_file_path = os.path.join( 'embedding_method_combinations_in_pairs', f'outf_nodes_relation_{dataset_name}_graph_{embedding_method_graph}_space_{embedding_method_space}.txt' ) else: embedding_file_path = os.path.join( 'structural_neighborhood', f'outf_nodes_space_relation_{dataset_name}_{embedding_method}.txt' ) space_and_relation_type_to_idx_dict = {} with open(embedding_file_path) as embedding_file: for line in embedding_file: if line.rstrip() == 'node1,node2 space relation_type': continue line = re.split(r'[\t,]', line.rstrip()) assert (len(line) == 4) assert (int(line[0]) in G and int(line[1]) in G) if (line[2], int( line[3])) not in space_and_relation_type_to_idx_dict: space_and_relation_type_to_idx_dict[(line[2], int( line[3]))] = len(space_and_relation_type_to_idx_dict) if G.has_edge(int(line[0]), int(line[1])): G.remove_edge(int(line[0]), int(line[1])) G.add_edge(int(line[0]), int(line[1]), subgraph_idx=space_and_relation_type_to_idx_dict[( line[2], int(line[3]))]) space_and_relation_type_to_idx_dict['self_loop'] = len( space_and_relation_type_to_idx_dict) for node in sorted(G.nodes()): if G.has_edge(node, node): G.remove_edge(node, node) G.add_edge( node, node, subgraph_idx=space_and_relation_type_to_idx_dict['self_loop']) adj = nx.adjacency_matrix(G, sorted(G.nodes())) g = DGLGraph(adj) for u, v, feature in G.edges(data='subgraph_idx'): g.edges[g.edge_id(u, v)].data['subgraph_idx'] = th.tensor([feature]) if splits_file_path: with np.load(splits_file_path) as splits_file: train_mask = splits_file['train_mask'] val_mask = splits_file['val_mask'] test_mask = splits_file['test_mask'] else: assert (train_percentage is not None and val_percentage is not None) assert (train_percentage < 1.0 and val_percentage < 1.0 and train_percentage + val_percentage < 1.0) if dataset_name in {'cora', 'citeseer'}: disconnected_node_file_path = os.path.join( 'unconnected_nodes', f'{dataset_name}_unconnected_nodes.txt') with open(disconnected_node_file_path) as disconnected_node_file: disconnected_node_file.readline() disconnected_nodes = [] for line in disconnected_node_file: line = line.rstrip() disconnected_nodes.append(int(line)) disconnected_nodes = np.array(disconnected_nodes) connected_nodes = np.setdiff1d(np.arange(features.shape[0]), disconnected_nodes) connected_labels = labels[connected_nodes] train_and_val_index, test_index = next( ShuffleSplit(n_splits=1, train_size=train_percentage + val_percentage).split( np.empty_like(connected_labels), connected_labels)) train_index, val_index = next( ShuffleSplit(n_splits=1, train_size=train_percentage).split( np.empty_like(connected_labels[train_and_val_index]), connected_labels[train_and_val_index])) train_index = train_and_val_index[train_index] val_index = train_and_val_index[val_index] train_mask = np.zeros_like(labels) train_mask[connected_nodes[train_index]] = 1 val_mask = np.zeros_like(labels) val_mask[connected_nodes[val_index]] = 1 test_mask = np.zeros_like(labels) test_mask[connected_nodes[test_index]] = 1 else: train_and_val_index, test_index = next( ShuffleSplit(n_splits=1, train_size=train_percentage + val_percentage).split(np.empty_like(labels), labels)) train_index, val_index = next( ShuffleSplit(n_splits=1, train_size=train_percentage).split( np.empty_like(labels[train_and_val_index]), labels[train_and_val_index])) train_index = train_and_val_index[train_index] val_index = train_and_val_index[val_index] train_mask = np.zeros_like(labels) train_mask[train_index] = 1 val_mask = np.zeros_like(labels) val_mask[val_index] = 1 test_mask = np.zeros_like(labels) test_mask[test_index] = 1 num_features = features.shape[1] num_labels = len(np.unique(labels)) assert (np.array_equal(np.unique(labels), np.arange(len(np.unique(labels))))) features = th.FloatTensor(features) labels = th.LongTensor(labels) train_mask = th.BoolTensor(train_mask) val_mask = th.BoolTensor(val_mask) test_mask = th.BoolTensor(test_mask) # Adapted from https://docs.dgl.ai/tutorials/models/1_gnn/1_gcn.html degs = g.in_degrees().float() norm = th.pow(degs, -0.5).cuda() norm[th.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) return g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels
def reconstruction_mse(reconstruction, graph: nx.Graph): X = np.array(nx.adjacency_matrix(graph).todense()) X_hat = reconstruction err = np.sum(np.sqrt(np.square(X_hat - X))) return err
def prep_for_learning(ep_len, m, n, h, init_states, obstacles, pick_up_state, delivery_state, rewards, rew_val, custom_flag, custom_task): # Create the environment and get the TS # ts_start_time = timeit.default_timer() disc = 1 TS, obs_mat, state_mat = create_ts(m, n, h) path = '../data/ts_' + str(m) + 'x' + str(n) + 'x' + str(h) + '_1Ag_1.txt' paths = [path] bases = {init_states[0]: 'Base1'} obs_mat = update_obs_mat(obs_mat, state_mat, m, obstacles, init_states[0]) TS = update_adj_mat_3D(m, n, h, TS, obs_mat) create_input_file(TS, state_mat, obs_mat, paths[0], bases, disc, m, n, h, 0) ts_file = paths ts_dict = Ts(directed=True, multi=False) ts_dict.read_from_file(ts_file[0]) ts = expand_duration_ts(ts_dict) ts_timecost = timeit.default_timer() - ts_start_time # Get the DFA # dfa_start_time = timeit.default_timer() pick_up = str(pick_up_state[0][0] * n + pick_up_state[0][1]) delivery = str(delivery_state[0][0] * n + delivery_state[0][1]) tf = str(ep_len) # time bound if custom_flag == 1: phi = custom_task else: phi = '([H^1 r' + pick_up + ']^[0, 15] * [H^1 r' + delivery + ']^[0, 15])^[0, 31]' #phi = '([H^1 r' + pick_up + ']^[0, ' + tf + '] * [H^1 r' + delivery + ']^[0,' + tf + '])^[0, ' + tf + ']' # Construc the task according to pickup/delivery _, dfa_nor, bdd = twtl.translate( phi, kind=DFAType.Infinity, norm=True ) # states and sim. time ex. phi = '([H^1 r47]^[0, 30] * [H^1 r31]^[0, 30])^[0, 30]' dfa_timecost = timeit.default_timer( ) - dfa_start_time # DFAType.Normal for normal, DFAType.Infinity for relaxed # Get the PA # pa_start_time = timeit.default_timer() alpha = 1 nom_weight_dict = {} weight_dict = {} pa_or = ts_times_fsa(ts, dfa_nor) # Original pa edges_all = nx.get_edge_attributes(ts_dict.g, 'edge_weight') max_edge = max(edges_all, key=edges_all.get) norm_factor = edges_all[max_edge] for pa_edge in pa_or.g.edges(): edge = (pa_edge[0][0], pa_edge[1][0], 0) nom_weight_dict[pa_edge] = edges_all[edge] / norm_factor nx.set_edge_attributes(pa_or.g, 'edge_weight', nom_weight_dict) nx.set_edge_attributes(pa_or.g, 'weight', 1) pa = copy.deepcopy(pa_or) # copy the pa time_weight = nx.get_edge_attributes(pa.g, 'weight') edge_weight = nx.get_edge_attributes(pa.g, 'edge_weight') for pa_edge in pa.g.edges(): weight_dict[pa_edge] = alpha * time_weight[pa_edge] + ( 1 - alpha) * edge_weight[pa_edge] nx.set_edge_attributes(pa.g, 'new_weight', weight_dict) pa_timecost = timeit.default_timer() - pa_start_time # Compute the energy of the states # energy_time = timeit.default_timer() compute_energy(pa) energy_dict = nx.get_node_attributes(pa.g, 'energy') energy_pa = [] for ind in range(len(pa.g.nodes())): energy_pa.append(pa.g.nodes([0])[ind][1].values()[0]) blocking_inds = [] for ind in range(len(pa.g.nodes())): if energy_pa[ind] > 100000: blocking_inds.append(ind) pa_g_nodes = pa.g.nodes() for ind in sorted(blocking_inds, reverse=True): del pa_g_nodes[ind] del energy_pa[ind] # projection of pa on ts # init_state = [init_states[0][0] * n + init_states[0][1]] pa2ts = [] for i in range(len(pa_g_nodes)): if pa_g_nodes[i][0] != 'Base1': pa2ts.append(int(pa_g_nodes[i][0].replace("r", ""))) else: pa2ts.append(init_state[0]) i_s = i # Agent's initial location in pa energy_timecost = timeit.default_timer() - pa_start_time # TS adjacency matrix and source-target TS_adj = TS TS_s = [] TS_t = [] for i in range(len(TS_adj)): for j in range(len(TS_adj)): if TS_adj[i, j] != 0: TS_s.append(i) TS_t.append(j) # pa adjacency matrix and source-target pa_adj_st = nx.adjacency_matrix(pa.g) pa_adj = pa_adj_st.todense() pa_s = [] # source node pa_t = [] # target node c1 = -1 for i in range(len(pa_adj)): if i not in blocking_inds: c1 = c1 + 1 c2 = -1 for j in range(len(pa_adj)): if j not in blocking_inds: c2 = c2 + 1 if pa_adj[i, j] == 1: pa_s.append(c1) pa_t.append(c2) blocking_pa_inds = [] for ind in range(len(pa_s)): if pa_s[ind] in blocking_inds: blocking_pa_inds.append(ind) elif pa_t[ind] in blocking_inds: blocking_pa_inds.append(ind) #for ind in sorted(blocking_pa_inds, reverse=True): # del pa_s[ind] # del pa_t[ind] # PA rewards matrix rewards_ts = np.zeros(m * n) rewards_pa = np.zeros(len(pa2ts)) rewards_ts_indexes = [] for i in range(len(rewards)): rewards_ts_indexes.append( rewards[i][0] * n + rewards[i][1] ) # rewards_ts_indexes[i] = rewards[i][0] * n + rewards[i][1] rewards_ts[rewards_ts_indexes[i]] = rew_val for i in range(len(rewards_pa)): rewards_pa[i] = rewards_ts[pa2ts[i]] # # Display some important info print('##### PICK-UP and DELIVERY MISSION #####' + "\n") print('Initial Location : ' + str(init_states[0]) + ' <---> Region ' + str(init_state[0])) print('Pick-up Location : ' + str(pick_up_state[0]) + ' <---> Region ' + pick_up) print('Delivery Location : ' + str(delivery_state[0]) + ' <---> Regions ' + delivery) print('Reward Locations : ' + str(rewards) + ' <---> Regions ' + str(rewards_ts_indexes) + "\n") print('State Matrix : ') print(state_mat) print("\n") print('Mission Duration : ' + tf + ' time steps') print('TWTL Task : ' + phi + "\n") print('Computational Costst : TS created in ' + str(ts_timecost) + ' seconds') # print(' TS created in ' + str(ts_timecost) + ' seconds') print(' DFA created in ' + str(dfa_timecost) + ' seconds') print(' PA created in ' + str(pa_timecost) + ' seconds') print(' Energy of PA states calculated in ' + str(energy_timecost) + ' seconds') return i_s, pa, pa_s, pa_t, pa2ts, energy_pa, rewards_pa, pick_up, delivery, pa_g_nodes
def preprocess_data(data_home,args, **kwargs): bucket_size = kwargs.get('bucket', 300) encoding = kwargs.get('encoding', 'iso-8859-1') celebrity_threshold = kwargs.get('celebrity', 10) mindf = kwargs.get('mindf', 10) d2v = kwargs.get('d2v', False) adj_d2v = args.adj_d2v one_hot_label = kwargs.get('onehot', False) vocab_file = os.path.join(data_home, 'vocab.pkl') if d2v: dump_name = 'doc2vec_win_' + str(args.d2vwindow) + '_dm_' + str(args.d2vdm) + 'adj_d2v_'+ str(adj_d2v*1) + '_dump.pkl' else: dump_name = 'tfidf_win_' + str(args.d2vwindow) + '_dm_' + str(args.d2vdm) + 'adj_d2v_' + str(adj_d2v*1) + '_dump.pkl' dump_file = os.path.join(data_home, dump_name) if os.path.exists(dump_file) and not model_args.builddata: logging.info('loading data from dumped file ' + dump_name) data = load_obj(dump_file) logging.info('loading data finished!') return data dl = DataLoader(data_home=data_home, bucket_size=bucket_size, encoding=encoding, celebrity_threshold=celebrity_threshold, one_hot_labels=one_hot_label, mindf=mindf, token_pattern=r'(?u)(?<![@])#?\b\w\w+\b') dl.load_data() dl.assignClasses() if d2v: dl.doc2vec(args=args) X_train = dl.X_train_doc2vec X_test = dl.X_test_doc2vec X_dev = dl.X_dev_doc2vec elif args.word2vec: dl.word2vec(args) X_train = dl.X_train_word2vec X_dev = dl.X_dev_word2vec X_test = dl.X_test_word2vec else: dl.tfidf() X_train = dl.X_train X_dev = dl.X_dev X_test = dl.X_test vocab = dl.vectorizer.vocabulary_ logging.info('saving vocab in {}'.format(vocab_file)) dump_obj(vocab, vocab_file) logging.info('vocab dumped successfully!') U_test = dl.df_test.index.tolist() U_dev = dl.df_dev.index.tolist() U_train = dl.df_train.index.tolist() if adj_d2v and args.doc2vec: adj = dl.adj_doc2vec G = nx.from_numpy_matrix(adj, parallel_edges=False, create_using=None) else: dl.get_graph() logging.info('creating adjacency matrix...') adj = nx.adjacency_matrix(dl.graph, nodelist=range(len(U_train + U_dev + U_test)), weight='w') # converting the edges index to pytorch format G = dl.graph edges = list(G.edges) edges = np.array(edges) edges = edges[np.lexsort(np.fliplr(edges).T)] wadj = args.weighted_adjacency ## if we want to weight adjacency materix if wadj: logging.info('multiplying weights...') w_adj_s = dl.adj_weight_d2v * adj else: w_adj_s = 0 logging.info('adjacency matrix created.') Y_test = dl.test_classes Y_train = dl.train_classes Y_dev = dl.dev_classes classLatMedian = {str(c): dl.cluster_median[c][0] for c in dl.cluster_median} classLonMedian = {str(c): dl.cluster_median[c][1] for c in dl.cluster_median} P_test = [str(a[0]) + ',' + str(a[1]) for a in dl.df_test[['lat', 'lon']].values.tolist()] P_train = [str(a[0]) + ',' + str(a[1]) for a in dl.df_train[['lat', 'lon']].values.tolist()] P_dev = [str(a[0]) + ',' + str(a[1]) for a in dl.df_dev[['lat', 'lon']].values.tolist()] userLocation = {} for i, u in enumerate(U_train): userLocation[u] = P_train[i] for i, u in enumerate(U_test): userLocation[u] = P_test[i] for i, u in enumerate(U_dev): userLocation[u] = P_dev[i] total_users = X_train.shape[0] + X_dev.shape[0] + X_test.shape[0] data = (adj, X_train, Y_train, X_dev, Y_dev, X_test, Y_test, U_train, U_dev, U_test, classLatMedian, classLonMedian,userLocation, w_adj_s, edges, total_users) if not model_args.builddata: logging.info('dumping data in {} ...'.format(str(dump_file))) dump_obj(data, dump_file) logging.info('data dump finished!') return data
def eigen_pairs_perturbation(datadir, graphNames): eps =[ 0.0001] for graphname in graphNames: G = nx.read_edgelist(datadir + "dataset/" + graphname + ".txt") print ("*********************dataset =" + graphname ) print ("read....") matrix = nx.adjacency_matrix(G) A = matrix.toarray() print ("eigenval....") matrix = np.matrix(A) v1, w1 = np.linalg.eig(matrix) v2, w2 = sort_eigens(v1, w1) v3, w3 = sort_eigens2(v1, w1) max1 = v1[0] + 1 min = v2[len(v2) - 1] for ep in eps: with open(datadir + "output/" + graphname + "(" + str(ep) + ")" + "result.csv", 'wb') as csvfile: fieldnames = ["name"] eigenvals = [] eigenvecs = [] # step 1 (sweeping method) start = time.clock() k = len(v2) v,w = eigen_values_computation(matrix, k, ep, max1, min,v3, w3) first = time.clock() - start # step 3 (finding duplicates) xs3 = eigenvalue_computation_duplicates(matrix, k, ep, min, max1, v,v2) second = time.clock() - first # step 4 (inverse power) xs3, eigenvectors = eigenvector_computation(matrix, xs3 ,w ,v ,len(v2),v2,w2) third = time.clock() - second eigenvals.append(xs3) eigenvecs.append(eigenvectors) percent = [5, 10, 15, 20, 25, 30, 50, 100] dict6 = {"name": "percent"} dict7 = {"name": "error"} dict8 = {"name": "errorvecs"} dict9 = {"name": "totalerror"} cc=0 for item in percent: newv2 =[] newxs3 =[] neww2 =[] neweigenvectors =[] s1= float(len(v2)) s2 = float(item) x= (float(len(v2))*float(item))/100 x = int (x) for k in range(x): newv2.append(v2[k]) neww2.append(w2[k]) if k < len(xs3): newxs3.append(xs3[k]) neweigenvectors.append(eigenvectors[:, k]) per1 ,error, error2, vecsim= per_correct_eigens(newv2, newxs3, ep,neww2, neweigenvectors) dict6.update(({str(cc): per1})) dict7.update(({str(cc): error})) dict8.update(({str(cc): error2})) dict9.update(({str(cc): (error2+error)/2})) cc+=1 maxlen = len(v1) if maxlen <len(v): maxlen = len(v) if maxlen <len(xs3): maxlen = len(xs3) for i in range(maxlen): fieldnames.append(str(i)) writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() dict1 = {"name": "realval"} dict0 = {"name": "realvec"} dict2 = {"name": "step1"} dict4 = {"name": "step3"} dict5 = {"name": "step4"} for item in range(len(v2)): dict1.update({str(item): v2[item]}) dict0.update({str(item): w2[item]}) for item in range(len(v)): dict2.update({str(item): v[item]}) for item in range(len(xs3)): dict4.update({str(item): xs3[item]}) dict5.update({str(item): eigenvectors[item]}) writer.writerow(dict1) writer.writerow(dict0) writer.writerow(dict2) writer.writerow(dict4) writer.writerow(dict5) writer.writerow({"name": "time(sec)", "0": first, "1": second, "2": third}) writer.writerow({"name": "time(min)", "0": first/60, "1": second/60, "2": third/60, }) writer.writerow({"name": "title", "0": 5, "1": 10, "2": 15, "3": 20, "4": 25, "5": 30, "6": 50, "7": 100}) writer.writerow(dict6) writer.writerow(dict7) writer.writerow(dict8) writer.writerow(dict9) # writer.close() M = np.array([v2, xs3]) if len(v2)==len(xs3): similarities = cosine_similarity(M) if abs(similarities[0][1])>0.9: break return eigenvals, eigenvecs
def dict_to_adj(the_dict, directed=True): if directed: graph = nx.from_dict_of_lists(the_dict, create_using=nx.DiGraph()) else: graph = nx.from_dict_of_lists(the_dict) return nx.adjacency_matrix(graph, nodelist=sorted(graph.nodes()))
from __future__ import absolute_import from __future__ import division from __future__ import print_function import pandas as pd import edward as ed import numpy as np import tensorflow as tf import matplotlib.pyplot as plt import networkx as nx from edward.models import Normal, Poisson, InverseGamma #from observations import celegans import collections X = pd.read_csv("/user_network.csv") # X.columns = ['id', 'unix_time', 'dst_id', "src_id"] X.iloc[:, 2:4].to_csv("/network_10k.edgelist", sep=" ", index=False) G = nx.read_edgelist( "/network_excluded.edgelist", create_using=nx.DiGraph()) # read and parse edgelist to (networkx) graph A = nx.adjacency_matrix(G) # make Adjacency matrix x_train = np.asarray(A.todense()) # convert Adjacency matrix to numpy array