def test_from_adjacency_named(self): # example from issue #3105 data = {"A": {"A": 0, "B": 0, "C": 0}, "B": {"A": 1, "B": 0, "C": 0}, "C": {"A": 0, "B": 1, "C": 0}} dftrue = pd.DataFrame(data) df = dftrue[["A", "C", "B"]] G = nx.from_pandas_adjacency(df, create_using=nx.DiGraph()) df = nx.to_pandas_adjacency(G, dtype=int) pd.testing.assert_frame_equal(df, dftrue)
def test_roundtrip(self): # edgelist Gtrue = nx.Graph([(1, 1), (1, 2)]) df = nx.to_pandas_edgelist(Gtrue) G = nx.from_pandas_edgelist(df) assert_graphs_equal(Gtrue, G) # adjacency Gtrue = nx.Graph(({1: {1: {'weight': 1}, 2: {'weight': 1}}, 2: {1: {'weight': 1}}})) df = nx.to_pandas_adjacency(Gtrue, dtype=int) G = nx.from_pandas_adjacency(df) assert_graphs_equal(Gtrue, G)
def ReadGraph(adress): ds = pd.read_csv(adress, header=None) G = nx.from_pandas_adjacency(ds) return G
import numpy as np from src.hierarchy import signal_flow from graspy.models import SBMEstimator node_signal_flow = signal_flow(adj) mean_sf = np.zeros(k) for i in np.unique(pred_labels): inds = np.where(pred_labels == i)[0] mean_sf[i] = np.mean(node_signal_flow[inds]) cluster_mean_latent = gmm.model_.means_[:, 0] block_probs = SBMEstimator().fit(bin_adj, y=pred_labels).block_p_ block_prob_df = pd.DataFrame(data=block_probs, index=range(k), columns=range(k)) block_g = nx.from_pandas_adjacency(block_prob_df, create_using=nx.DiGraph) plt.figure(figsize=(10, 10)) # don't ever let em tell you you're too pythonic pos = dict(zip(range(k), zip(cluster_mean_latent, mean_sf))) # nx.draw_networkx_nodes(block_g, pos=pos) labels = nx.get_edge_attributes(block_g, "weight") # nx.draw_networkx_edge_labels(block_g, pos, edge_labels=labels) norm = mpl.colors.LogNorm(vmin=0.01, vmax=0.1) sm = ScalarMappable(cmap="Reds", norm=norm) cmap = sm.to_rgba(np.array(list(labels.values())) + 0.01) nx.draw_networkx( block_g, pos,
def read_h5(filename: str = 'dandelion_data.h5') -> Dandelion: """ Read in and returns a `Dandelion` class from .h5 format. Parameters ---------- filename : str path to `.h5` file Returns ------- `Dandelion` object. """ try: data = pd.read_hdf(filename, 'data') data = sanitize_data(data) if check_mix_dtype(data): for x in return_mix_dtype(data): data[x].replace('', pd.NA, inplace=True) data = sanitize_data(data) except: raise AttributeError( '{} does not contain attribute `data`'.format(filename)) try: metadata = pd.read_hdf(filename, 'metadata') except: pass try: edges = pd.read_hdf(filename, 'edges') except: pass try: g_0 = pd.read_hdf(filename, 'graph/graph_0') g_1 = pd.read_hdf(filename, 'graph/graph_1') g_0 = g_0 + 1 g_0 = g_0.fillna(0) g_1 = g_1 + 1 g_1 = g_1.fillna(0) graph0 = nx.from_pandas_adjacency(g_0) graph1 = nx.from_pandas_adjacency(g_1) for u, v, d in graph0.edges(data=True): d['weight'] = d['weight'] - 1 for u, v, d in graph1.edges(data=True): d['weight'] = d['weight'] - 1 graph = (graph0, graph1) except: pass with h5py.File(filename, 'r') as hf: try: layout0 = {} for k in hf['layout/layout_0'].attrs.keys(): layout0.update({k: np.array(hf['layout/layout_0'].attrs[k])}) layout1 = {} for k in hf['layout/layout_1'].attrs.keys(): layout1.update({k: np.array(hf['layout/layout_1'].attrs[k])}) layout = (layout0, layout1) except: pass germline = {} try: for g in hf['germline'].attrs: germline.update({g: hf['germline'].attrs[g]}) except: pass distance = Tree() try: for d in hf['distance'].keys(): d_ = pd.read_hdf(filename, 'distance/' + d) distance[d] = scipy.sparse.csr_matrix(d_.values) except: pass try: threshold = np.float(np.array(hf['threshold'])) except: threshold = None constructor = {} constructor['data'] = data if 'metadata' in locals(): constructor['metadata'] = metadata if 'germline' in locals(): constructor['germline'] = germline if 'edges' in locals(): constructor['edges'] = edges if 'distance' in locals(): constructor['distance'] = distance if 'layout' in locals(): constructor['layout'] = layout if 'graph' in locals(): constructor['graph'] = graph try: res = Dandelion(**constructor) except: res = Dandelion(**constructor, initialize=False) if 'threshold' in locals(): res.threshold = threshold else: pass return (res)
def ReadDiGraph(adress): ds = pd.read_csv(adress,header=None) G = nx.from_pandas_adjacency(ds,create_using=nx.DiGraph()) return G
size=15, line_width=2)) node_trace.text = list(pos.keys()) traces = edges_list + [node_trace] layout = go.Layout( title=title, # titlefont_size=16, # template='plotly_white', font=dict(color="white"), paper_bgcolor="#2c2f38", plot_bgcolor='#2c2f38', showlegend=False, hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) return dict(data=traces, layout=layout) distance = get_network_data() complete_graph = nx.from_pandas_adjacency(distance) def get_nplot(cluster, title): fig = go.Figure(network_plot(cluster, complete_graph, title)) return fig
if '0229' not in dm and year not in ('2019', '2018', '1861', '1862'): if year not in year_data: year_data[year] = {} year_data[year][dm] = int(new_line[3]) df = pd.DataFrame(year_data) df['mmdd'] = df.index df.reset_index(drop=True, inplace=True) return df if __name__ == "__main__": df_cor = df.drop(columns=['mmdd']).corr(method='pearson', min_periods=1) network_df = nx.from_pandas_adjacency(df_cor, create_using=None) # nx.draw_spring(network_df) # plt.show() thresh_vals = [0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75] df_cor_clean = df_cor.copy() df_cor_clean[df_cor_clean < 0.4] = 0 network_df = nx.from_pandas_adjacency(df_cor_clean, create_using=None) # pos = nx.spring_layout(network_df,scale=2) # nx.draw(G,pos,font_size=8) # plt.show() # G = nx.path_graph(4) pos = nx.spring_layout(network_df)
# plot heatmap ax = sns.heatmap(R_squared, xticklabels=1, yticklabels=1) plt.show() # plot correlation graph, assuming > 0.3 signifies an existing correlation corr_graph_vals = R_squared[R_squared > 0.3].fillna(0) corr_graph_vals = corr_graph_vals.round(3) # remove diagonal entries, and remove variables with no strong correlation cols = corr_graph_vals.columns for c in cols: corr_graph_vals[c][c] = 0 var_mask = (corr_graph_vals.T != 0).any() corr_graph_vals = corr_graph_vals[var_mask] corr_graph_vals = corr_graph_vals.T[var_mask].T corr_graph = nx.from_pandas_adjacency(corr_graph_vals) pos = nx.spring_layout(corr_graph) nx.draw_networkx(corr_graph, pos) weights = nx.get_edge_attributes(corr_graph, 'weight') nx.draw_networkx_edge_labels(corr_graph, pos, edge_labels=weights) plt.show() # print correlated variables to SalePrice pd.set_option('display.max_rows', len(corr_matrix)) print("R^2:", R_squared.sort_values(by=['SalePrice'], ascending='True')) # step: Choosen highest R^2 variables with respect to Y (SalePrice) # Note, these are chosen without first transforming the variables primary_vars = [ 'SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'
def make_plot(self): from graphion.session.handler import get_directed # dependency cycle fix if get_directed(self.sid): G = from_pandas_adjacency(df, create_using=DiGraph) else: G = from_pandas_adjacency(df, create_using=Graph) self.nodeCount = number_of_nodes(G) """ Create NetworkX graph layout manager """ if diagramType == "FORCE": layout = spring_layout(G, k=10.42 / sqrt(self.nodeCount), seed=server.config['SEED']) elif diagramType == "HIERARCHICAL": if self.nodeCount > 1: layout = graphviz_layout(Graph([ (u, v, d) for u, v, d in G.edges(data=True) ]), prog='dot') else: layout = circular_layout( G ) # graphviz_layout does not work with one node, just display a "circular_layout" elif diagramType == "RADIAL": layout = circular_layout(G) else: pass # get node and edge information from graph nodes, nodes_coordinates = zip(*sorted(layout.items())) nodes_x, nodes_y = list(zip(*nodes_coordinates)) # calculate centrality centrality = degree_centrality(G) _, nodeCentralities = zip(*sorted(centrality.items())) if self.nodeCount > 1: # get degree information if is_directed(G): inDegreeSize = dict(G.in_degree) inDegree = inDegreeSize.copy() outDegreeSize = dict(G.out_degree) outDegree = outDegreeSize.copy() totalDegreeSize = {} for n in nodes: totalDegreeSize[n] = inDegreeSize[n] + outDegreeSize[n] totalDegree = totalDegreeSize.copy() else: inDegreeSize = dict(G.degree) inDegree = inDegreeSize.copy() outDegreeSize = inDegreeSize.copy() outDegree = inDegreeSize.copy() totalDegreeSize = inDegreeSize.copy() totalDegree = inDegreeSize.copy() # get weight information if is_directed(G): inWeightSize = dict(G.in_degree(weight='weight')) inWeight = inWeightSize.copy() outWeightSize = dict(G.out_degree(weight='weight')) outWeight = outWeightSize.copy() totalWeightSize = {} for n in nodes: totalWeightSize[n] = inWeightSize[n] + outWeightSize[n] totalWeight = totalWeightSize.copy() else: inWeightSize = dict(G.degree(weight='weight')) inWeight = inWeightSize.copy() outWeightSize = inWeightSize.copy() outWeight = inWeightSize.copy() totalWeightSize = inWeightSize.copy() totalWeight = inWeightSize.copy() # Creating a scale to ensure that the node sizes don't go bananas minNodeSize = 0.1 # minNodeSize * maxNodeSize = minimum node size maxIn = -maxsize - 1 minIn = maxsize maxOut = -maxsize - 1 minOut = maxsize maxTot = -maxsize - 1 minTot = maxsize maxInw = -maxsize - 1 minInw = maxsize maxOutw = -maxsize - 1 minOutw = maxsize maxTotw = -maxsize - 1 minTotw = maxsize for n in nodes: ind = inDegreeSize[n] outd = outDegreeSize[n] totd = totalDegreeSize[n] inw = inWeightSize[n] outw = outWeightSize[n] totw = totalWeightSize[n] if ind > maxIn: maxIn = ind elif ind < minIn: minIn = ind if outd > maxOut: maxOut = outd elif outd < minOut: minOut = outd if totd > maxTot: maxTot = totd elif totd < minTot: minTot = totd if inw > maxInw: maxInw = inw elif inw < minInw: minInw = inw if outw > maxOutw: maxOutw = outw elif outw < minOutw: minOutw = outw if totw > maxTotw: maxTotw = totw elif totw < minTotw: minTotw = totw if maxIn == minIn: sameInDegree = True else: sameInDegree = False for n in nodes: result = (inDegreeSize[n] - minIn) / maxIn if result < minNodeSize: inDegreeSize[n] = minNodeSize else: inDegreeSize[n] = result if maxOut == minOut: sameOutDegree = True else: sameOutDegree = False for n in nodes: result = (outDegreeSize[n] - minOut) / maxOut if result < minNodeSize: outDegreeSize[n] = minNodeSize else: outDegreeSize[n] = result if maxTot == minTot: sameTotalDegree = True else: sameTotalDegree = False for n in nodes: result = (totalDegreeSize[n] - minTot) / maxTot if result < minNodeSize: totalDegreeSize[n] = minNodeSize else: totalDegreeSize[n] = result if maxInw == minInw: sameInWeight = True else: sameInWeight = False for n in nodes: result = (inWeightSize[n] - minInw) / maxInw if result < minNodeSize: inWeightSize[n] = minNodeSize else: inWeightSize[n] = result if maxOutw == minOutw: sameOutWeight = True else: sameOutWeight = False for n in nodes: result = (outWeightSize[n] - minOutw) / maxOutw if result < minNodeSize: outWeightSize[n] = minNodeSize else: outWeightSize[n] = result if maxTotw == minTotw: sameTotalWeight = True else: sameTotalWeight = False for n in nodes: result = (totalWeightSize[n] - minTotw) / maxTotw if result < minNodeSize: totalWeightSize[n] = minNodeSize else: totalWeightSize[n] = result # Making a dictionary for all attributes, and ensuring none of the values go crazy. attributes = {} maxNodeSize = 30 for n in nodes: outd = outDegreeSize[n] totd = totalDegreeSize[n] inw = inWeightSize[n] outw = outWeightSize[n] totw = totalWeightSize[n] if sameInDegree: ind = 1 else: ind = inDegreeSize[n] if sameOutDegree: outd = 1 else: outd = outDegreeSize[n] if sameTotalDegree: totd = 1 else: totd = totalDegreeSize[n] if sameInWeight: inw = 1 else: inw = inWeightSize[n] if sameOutWeight: outw = 1 else: outw = outWeightSize[n] if sameTotalWeight: totw = 1 else: totw = totalWeightSize[n] attributes[n] = { 'indegreesize': ind * maxNodeSize, 'outdegreesize': outd * maxNodeSize, 'totaldegreesize': totd * maxNodeSize, 'inweightsize': inw * maxNodeSize, 'outweightsize': outw * maxNodeSize, 'totalweightsize': totw * maxNodeSize, 'indegree': inDegree[n], 'outdegree': outDegree[n], 'totaldegree': totalDegree[n], 'inweight': inWeight[n], 'outweight': outWeight[n], 'totalweight': totalWeight[n], 'count': 0 } set_node_attributes(G, attributes) plot = HVGraph.from_networkx(G, layout).opts( directed=get_directed(self.sid), arrowhead_length=0.01) # disabling displaying all node info on hovering over the node tooltips = [('Index', '@index'), ('In-Degree', '@indegree'), ('Out-Degree', '@outdegree'), ('Total Degree', '@totaldegree'), ('In Edge Weight', '@inweight'), ('Out Edge-Weight', '@outweight'), ('Total Edge-Weight', '@totalweight')] hover = HoverTool(tooltips=tooltips) else: attributes = {} for n in nodes: attributes[n] = { 'indegreesize': 1, 'outdegreesize': 1, 'totaldegreesize': 1, 'inweightsize': 1, 'outweightsize': 1, 'totalweightsize': 1, 'indegree': 0, 'outdegree': 0, 'totaldegree': 0, 'inweight': 0, 'outweight': 0, 'totalweight': 0, 'count': 0 } set_node_attributes(G, attributes) plot = HVGraph.from_networkx(G, layout).opts( directed=get_directed(self.sid), arrowhead_length=0.01) tooltips = [('Index', '@index'), ('In-Degree', '@indegree'), ('Out-Degree', '@outdegree'), ('Total Degree', '@totaldegree'), ('In Edge Weight', '@inweight'), ('Out Edge-Weight', '@outweight'), ('Total Edge-Weight', '@totalweight')] hover = HoverTool(tooltips=tooltips) # Make custom dictionary with color palettes for c in self.colorList: if c == 'cividis': self.colorMap[c] = Cividis256 elif c == 'viridis': self.colorMap[c] = Viridis256 elif c == 'inferno': self.colorMap[c] = Inferno256 else: self.colorMap[c] = palette[c] if max(nodeCentralities) > 0: if datashaded and self.nodeCount > 1: plot = bundle_graph(plot) points = plot.nodes points.opts(cmap=self.colorMap[self.color_palette], color=self.node_color, size=self.node_size, tools=['box_select', 'lasso_select', 'tap', hover], active_tools=['wheel_zoom'], toolbar='above', show_legend=False, width=self.size, height=self.size) plot.opts(node_size=0, node_color=None, node_line_width=0, node_hover_fill_color='green') return plot, points
def generate3DDiagram(file, sid, df=False): if not df: df = decreaseDiagramSize(file) else: df = file names = df.columns.tolist() N = len(names) G = from_pandas_adjacency(df) G = convert_node_labels_to_integers(G) # 3d spring layout pos = spring_layout(G, dim=3) # numpy array of x,y,z positions in sorted node order layt = array([pos[v] for v in sorted(G)]) # scalar colors scalars = array(list(G.nodes())) + 5 # edges maximum = 0 for (u, v, d) in G.edges(data=True): w = d['weight'] if w > maximum: maximum = w Edges = array([(int(u), int(v), { 'weight': d['weight'] / maximum }) for (u, v, d) in G.edges(data=True) if d['weight'] > 0]) def make_edge(x, y, z, weight): return Scatter3d( x=x, y=y, z=z, # line=dict(color='rgb(' + str(int(100 + (weight ** 2 - 0.25) * 100)) + ',100,100)', width=(weight * 3) ** 2), line=dict(color='rgb(' + str(int(weight) * 180) + ', 0, 0)', width=(weight * 3)**2), hoverinfo='none', mode='lines') Xn = [layt[k][0] for k in range(N)] # x-coordinates of nodes Yn = [layt[k][1] for k in range(N)] # y-coordinates Zn = [layt[k][2] for k in range(N)] # z-coordinates edge_traces = [] for e in Edges: x_edge_ends = [layt[e[0]][0], layt[e[1]][0], None] # x-coordinates of edge ends y_edge_ends = [layt[e[0]][1], layt[e[1]][1], None] z_edge_ends = [layt[e[0]][2], layt[e[1]][2], None] edge_traces.append( make_edge(x_edge_ends, y_edge_ends, z_edge_ends, e[2]['weight'])) trace2 = Scatter3d(x=Xn, y=Yn, z=Zn, mode='markers', marker=dict(symbol='circle', size=6, color=scalars, colorscale='Viridis', line=dict(color='rgb(50,50,50)', width=0.5)), text=names, hoverinfo='text') axis = dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='') from graphion.session.handler import calculate_plot_size psize = calculate_plot_size(sid) layout = Layout(title="Force-directed layout", width=psize, height=psize, showlegend=False, scene=dict( xaxis=dict(axis), yaxis=dict(axis), zaxis=dict(axis), ), margin=dict(t=100), hovermode='closest', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') data = [trace2] + edge_traces fig = Figure(data=data, layout=layout) extension('plotly') painful = Plotly(fig) return painful
import pandas as pd import networkx as nx phases = {} G = {} for i in range(1,12): var_name = "phase" + str(i) file_name = "https://raw.githubusercontent.com/ragini30/Networks-Homework/main/" + var_name + ".csv" # file_name = "../data/CAVIAR/" + var_name + ".csv" phases[i] = pd.read_csv(file_name, index_col = ["players"]) phases[i].columns = "n" + phases[i].columns phases[i].index = phases[i].columns G[i] = nx.from_pandas_adjacency(phases[i]) nx.DiGraph() G[i].name = var_name dc9=nx.degree_centrality(G[9]) skeys=['n1','n3','n12','n83'] [dc9[k] for k in skeys] bc3=nx.betweenness_centrality(G[3], normalized = True) [bc3[k] for k in skeys] bc9=nx.betweenness_centrality(G[9], normalized = True) [bc9[k] for k in skeys] ec3=nx.eigenvector_centrality(G[3]) [ec3[k] for k in skeys] ec9=nx.eigenvector_centrality(G[9]) [ec9[k] for k in skeys]
def _adj_to_edge(df): """convert adjacency matrix to edge list """ return nx.to_pandas_edgelist(nx.from_pandas_adjacency(df))
""" import networkx as nx #import numpy as np import pandas as pd #import matplotlib.pyplot as plt blocks = ['Salt Data//salt-block4.csv'] for x in blocks: print(x) block = pd.read_csv(x) block = block.drop(['Unnamed: 0'], axis =1) for i in block.columns: block.loc[block[i] < .3, i] = 0 block.loc[block[i] >= .3, i] = 1 print("done with for loop") block = block.reset_index(drop = True) block.columns = range(block.shape[1]) G = nx.from_pandas_adjacency(block) print(nx.info(G)) nx.write_gpickle(G, str(x)[:-4] + '.p') #G.remove_nodes_from(list(nx.isolates(G))) #g = nx.draw(G, node_size=1, width = .01)
chunks = [distance[x:x+38] for x in range(0, len(distance), 38)] #turn into a distance matrix cities = [] for i in range(1, len(data)+1): cities.append('city ' + str(i)) distances = pd.DataFrame(chunks, columns=cities, index=cities) #Question a #Create X,Y coordinates of each node xypos = {} for i in range(len(data)): xypos['city ' + str(data['city'][i])] = ( data['longitude'][i] , data['latitude'][i]) G = nx.from_pandas_adjacency(distances) nx.draw_networkx_nodes(G, pos = xypos) nx.draw_networkx_labels(G, pos = xypos) #Question c #Definining several functions to use later in a while loop def saveresult(draw=True): """ To be used after optimizing a Gurobi model. Saves the result of a Gurobi Optimizer solution as a Graph, returning NetworkX Graph Object. Use draw=True argument to draw the graph. """ vals = m.getAttr('x', vars) res=[]
def to_networkx_graph(data, create_using=None, multigraph_input=False): """Make a NetworkX graph from a known data structure. The preferred way to call this is automatically from the class constructor >>> d = {0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1) >>> G = nx.Graph(d) instead of the equivalent >>> G = nx.from_dict_of_dicts(d) Parameters ---------- data : object to be converted Current known types are: any NetworkX graph dict-of-dicts dict-of-lists list of edges Pandas DataFrame (row per edge) numpy matrix numpy ndarray scipy sparse matrix pygraphviz agraph create_using : NetworkX graph constructor, optional (default=nx.Graph) Graph type to create. If graph instance, then cleared before populated. multigraph_input : bool (default False) If True and data is a dict_of_dicts, try to create a multigraph assuming dict_of_dict_of_lists. If data and create_using are both multigraphs then create a multigraph from a multigraph. """ # NX graph if hasattr(data, "adj"): try: result = from_dict_of_dicts(data.adj, create_using=create_using, multigraph_input=data.is_multigraph()) if hasattr(data, 'graph'): # data.graph should be dict-like result.graph.update(data.graph) if hasattr(data, 'nodes'): # data.nodes should be dict-like result._node.update((n, dd.copy()) for n, dd in data.nodes.items()) return result except: raise nx.NetworkXError("Input is not a correct NetworkX graph.") # pygraphviz agraph if hasattr(data, "is_strict"): try: return nx.nx_agraph.from_agraph(data, create_using=create_using) except: raise nx.NetworkXError("Input is not a correct pygraphviz graph.") # dict of dicts/lists if isinstance(data, dict): try: return from_dict_of_dicts(data, create_using=create_using, multigraph_input=multigraph_input) except: try: return from_dict_of_lists(data, create_using=create_using) except: raise TypeError("Input is not known type.") # list or generator of edges if (isinstance(data, (list, tuple)) or any(hasattr(data, attr) for attr in ['_adjdict', 'next', '__next__'])): try: return from_edgelist(data, create_using=create_using) except: raise nx.NetworkXError("Input is not a valid edge list") # Pandas DataFrame try: import pandas as pd if isinstance(data, pd.DataFrame): if data.shape[0] == data.shape[1]: try: return nx.from_pandas_adjacency(data, create_using=create_using) except: msg = "Input is not a correct Pandas DataFrame adjacency matrix." raise nx.NetworkXError(msg) else: try: return nx.from_pandas_edgelist(data, edge_attr=True, create_using=create_using) except: msg = "Input is not a correct Pandas DataFrame edge-list." raise nx.NetworkXError(msg) except ImportError: msg = 'pandas not found, skipping conversion test.' warnings.warn(msg, ImportWarning) # numpy matrix or ndarray try: import numpy if isinstance(data, (numpy.matrix, numpy.ndarray)): try: return nx.from_numpy_matrix(data, create_using=create_using) except: raise nx.NetworkXError( "Input is not a correct numpy matrix or array.") except ImportError: warnings.warn('numpy not found, skipping conversion test.', ImportWarning) # scipy sparse matrix - any format try: import scipy if hasattr(data, "format"): try: return nx.from_scipy_sparse_matrix(data, create_using=create_using) except: raise nx.NetworkXError( "Input is not a correct scipy sparse matrix type.") except ImportError: warnings.warn('scipy not found, skipping conversion test.', ImportWarning) raise nx.NetworkXError( "Input is not a known data type for conversion.")
def initialize_with_mobility(filename): logger.info(f"Reading in mobility from {filename}") mobility = pd.read_csv(filename) mobility.set_index("ADM", inplace=True) G = nx.from_pandas_adjacency(mobility, nx.DiGraph) return G
def to_networkx_graph(data, create_using=None, multigraph_input=False): """Make a NetworkX graph from a known data structure. The preferred way to call this is automatically from the class constructor >>> d = {0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1) >>> G = nx.Graph(d) instead of the equivalent >>> G = nx.from_dict_of_dicts(d) Parameters ---------- data : object to be converted Current known types are: any NetworkX graph dict-of-dicts dict-of-lists list of edges Pandas DataFrame (row per edge) numpy matrix numpy ndarray scipy sparse matrix pygraphviz agraph create_using : NetworkX graph Use specified graph for result. Otherwise a new graph is created. multigraph_input : bool (default False) If True and data is a dict_of_dicts, try to create a multigraph assuming dict_of_dict_of_lists. If data and create_using are both multigraphs then create a multigraph from a multigraph. """ # NX graph if hasattr(data, "adj"): try: result = from_dict_of_dicts(data.adj, create_using=create_using, multigraph_input=data.is_multigraph()) if hasattr(data, 'graph'): # data.graph should be dict-like result.graph.update(data.graph) if hasattr(data, 'nodes'): # data.nodes should be dict-like result._node.update((n, dd.copy()) for n, dd in data.nodes.items()) return result except: raise nx.NetworkXError("Input is not a correct NetworkX graph.") # pygraphviz agraph if hasattr(data, "is_strict"): try: return nx.nx_agraph.from_agraph(data, create_using=create_using) except: raise nx.NetworkXError("Input is not a correct pygraphviz graph.") # dict of dicts/lists if isinstance(data, dict): try: return from_dict_of_dicts(data, create_using=create_using, multigraph_input=multigraph_input) except: try: return from_dict_of_lists(data, create_using=create_using) except: raise TypeError("Input is not known type.") # list or generator of edges if (isinstance(data, (list, tuple)) or any(hasattr(data, attr) for attr in ['_adjdict', 'next', '__next__'])): try: return from_edgelist(data, create_using=create_using) except: raise nx.NetworkXError("Input is not a valid edge list") # Pandas DataFrame try: import pandas as pd if isinstance(data, pd.DataFrame): if data.shape[0] == data.shape[1]: try: return nx.from_pandas_adjacency(data, create_using=create_using) except: msg = "Input is not a correct Pandas DataFrame adjacency matrix." raise nx.NetworkXError(msg) else: try: return nx.from_pandas_edgelist(data, edge_attr=True, create_using=create_using) except: msg = "Input is not a correct Pandas DataFrame edge-list." raise nx.NetworkXError(msg) except ImportError: msg = 'pandas not found, skipping conversion test.' warnings.warn(msg, ImportWarning) # numpy matrix or ndarray try: import numpy if isinstance(data, (numpy.matrix, numpy.ndarray)): try: return nx.from_numpy_matrix(data, create_using=create_using) except: raise nx.NetworkXError( "Input is not a correct numpy matrix or array.") except ImportError: warnings.warn('numpy not found, skipping conversion test.', ImportWarning) # scipy sparse matrix - any format try: import scipy if hasattr(data, "format"): try: return nx.from_scipy_sparse_matrix(data, create_using=create_using) except: raise nx.NetworkXError( "Input is not a correct scipy sparse matrix type.") except ImportError: warnings.warn('scipy not found, skipping conversion test.', ImportWarning) raise nx.NetworkXError( "Input is not a known data type for conversion.")
import numpy as np import pandas as pd import networkx as nx import matplotlib.pyplot as plt pairs1 = pd.read_csv('city_pairs.csv', names=['city1', 'city2']) pairs2 = pd.read_csv('city_pairs.csv', names=['city2', 'city1']) pairs = pd.concat([pairs1, pairs2]) # create adjacency matrix adjacency = pd.crosstab(pairs['city1'], pairs['city2']) # create graph object with NetworkX library g = nx.from_pandas_adjacency(adjacency) # run PageRank algorithm and add result to matrix rank = nx.pagerank(g, alpha=0.85) adjacency['rank'] = pd.Series(rank) adjacency = adjacency.sort_values(by='rank', ascending=False) # relabel columns short = [x.replace(' ', '')[:3] for x in adjacency.columns] adjacency.set_axis(1, short) print(adjacency) # color by city state cities = pd.read_csv('cities.csv', names=['state'], index_col=0)
def to_networkx_graph(data, create_using=None, multigraph_input=False): """Make a NetworkX graph from a known data structure. The preferred way to call this is automatically from the class constructor >>> d = {0: {1: {"weight": 1}}} # dict-of-dicts single edge (0,1) >>> G = nx.Graph(d) instead of the equivalent >>> G = nx.from_dict_of_dicts(d) Parameters ---------- data : object to be converted Current known types are: any NetworkX graph dict-of-dicts dict-of-lists container (e.g. set, list, tuple) of edges iterator (e.g. itertools.chain) that produces edges generator of edges Pandas DataFrame (row per edge) numpy matrix numpy ndarray scipy sparse matrix pygraphviz agraph create_using : NetworkX graph constructor, optional (default=nx.Graph) Graph type to create. If graph instance, then cleared before populated. multigraph_input : bool (default False) If True and data is a dict_of_dicts, try to create a multigraph assuming dict_of_dict_of_lists. If data and create_using are both multigraphs then create a multigraph from a multigraph. """ # NX graph if hasattr(data, "adj"): try: result = from_dict_of_dicts( data.adj, create_using=create_using, multigraph_input=data.is_multigraph(), ) if hasattr(data, "graph"): # data.graph should be dict-like result.graph.update(data.graph) if hasattr(data, "nodes"): # data.nodes should be dict-like # result.add_node_from(data.nodes.items()) possible but # for custom node_attr_dict_factory which may be hashable # will be unexpected behavior for n, dd in data.nodes.items(): result._node[n].update(dd) return result except Exception as e: raise nx.NetworkXError( "Input is not a correct NetworkX graph.") from e # pygraphviz agraph if hasattr(data, "is_strict"): try: return nx.nx_agraph.from_agraph(data, create_using=create_using) except Exception as e: raise nx.NetworkXError( "Input is not a correct pygraphviz graph.") from e # dict of dicts/lists if isinstance(data, dict): try: return from_dict_of_dicts(data, create_using=create_using, multigraph_input=multigraph_input) except: try: return from_dict_of_lists(data, create_using=create_using) except Exception as e: raise TypeError("Input is not known type.") from e # Pandas DataFrame try: import pandas as pd if isinstance(data, pd.DataFrame): if data.shape[0] == data.shape[1]: try: return nx.from_pandas_adjacency(data, create_using=create_using) except Exception as e: msg = "Input is not a correct Pandas DataFrame adjacency matrix." raise nx.NetworkXError(msg) from e else: try: return nx.from_pandas_edgelist(data, edge_attr=True, create_using=create_using) except Exception as e: msg = "Input is not a correct Pandas DataFrame edge-list." raise nx.NetworkXError(msg) from e except ImportError: msg = "pandas not found, skipping conversion test." warnings.warn(msg, ImportWarning) # numpy matrix or ndarray try: import numpy if isinstance(data, (numpy.matrix, numpy.ndarray)): try: return nx.from_numpy_matrix(data, create_using=create_using) except Exception as e: raise nx.NetworkXError( "Input is not a correct numpy matrix or array.") from e except ImportError: warnings.warn("numpy not found, skipping conversion test.", ImportWarning) # scipy sparse matrix - any format try: import scipy if hasattr(data, "format"): try: return nx.from_scipy_sparse_matrix(data, create_using=create_using) except Exception as e: raise nx.NetworkXError( "Input is not a correct scipy sparse matrix type.") from e except ImportError: warnings.warn("scipy not found, skipping conversion test.", ImportWarning) # Note: most general check - should remain last in order of execution # Includes containers (e.g. list, set, dict, etc.), generators, and # iterators (e.g. itertools.chain) of edges if isinstance(data, (Collection, Generator, Iterator)): try: return from_edgelist(data, create_using=create_using) except Exception as e: raise nx.NetworkXError("Input is not a valid edge list") from e raise nx.NetworkXError("Input is not a known data type for conversion.")
def test_clf(seed_labels, y_true, A, true_prob=None, avg='micro', clf=local_and_global_consistency, kl_opt=False): """Test a semi-supervised node classification against some known node -> label mapping Classification metrics returned are micro-averaged over cateories. Parameters ---------- seed_labels: dict contains initial "seed" nodes and their seed_labels y_true: pandas.Series must have a defined (ordered) pandas.CategoricalDtype A: array pandas matrix to build/classify graph (rows/columns are node names). clf: function one of the networkx.node_classification algorithms. Defaults to `local_and_global_consistency`. """ t = get_threslist(A) cat = pd.CategoricalDtype(y_true.cat.categories, ordered=True) p, r, f, kl_vals, probs = [], [], [], [], [] for ti in t: A_i = A.where(A > ti, other=0) G = nx.from_pandas_adjacency(A_i, create_using=nx.Graph) for k, v in seed_labels.items(): if k in G.node.keys(): G.node[k]['label'] = v pred_labels, pred = clf(G) if true_prob is not None: def kl_div(λ): prob = softmax(λ * pred[true_prob.columns]) kl = entropy(true_prob.T, prob.T) return np.nansum(kl) opt_kl_temp = minimize_scalar(kl_div) K = opt_kl_temp['x'] # K = 500. pred_prob = softmax(K * pred[true_prob.columns]) kl_vals.append(entropy(true_prob.T, pred_prob.T)) probs.append(pred_prob) y_pred = pd.Series(pred_labels).astype(cat) pi, ri, fi, _ = precision_recall_fscore_support(y_true.cat.codes, y_pred.cat.codes, average=avg, labels=[0, 1, 2]) p.append(pi) r.append(ri) f.append(fi) if kl_opt: opt_pos = np.argmin([np.sum(i) for i in kl_vals]) else: opt_pos = np.argmax(f) d = dict( x=A, p=np.array(p), r=np.array(r), t=t, f=np.array(f), opt_pos=opt_pos, thres=A.where(A > t[opt_pos], other=0), kl_vals=np.array(kl_vals), ) d['aps'] = (np.diff(d['r']) * d['p'][1:]).sum() if true_prob is not None: d['probs'] = probs[opt_pos] return d
https://colab.research.google.com/drive/1KERj8-aGjiBXC4fW_8dfG-F9ld_CAcBp """ import pandas as pd import networkx as nx phases = {} G = {} for i in range(1,12): var_name = "phase" + str(i) file_name = "https://raw.githubusercontent.com/ragini30/Networks-Homework/main/" + var_name + ".csv" # file_name = "../data/CAVIAR/" + var_name + ".csv" phases[i] = pd.read_csv(file_name, index_col = ["players"]) phases[i].columns = "n" + phases[i].columns phases[i].index = phases[i].columns G[i] = nx.from_pandas_adjacency(phases[i],create_using=nx.DiGraph()) G[i].name = var_name for i in range(1,12): h,a=nx.algorithms.link_analysis.hits(G[i],max_iter=1000000) print("phase {0} n1 hubs {1} auth {2}.".format(i,h['n1'],a['n1'])) print("n3 hubs {1} auth {2}.".format(i,h['n3'],a['n3'])) !apt-get install graphviz graphviz-dev !pip install pygraphviz import matplotlib.pyplot as plt for i in range(1,12): fig=plt.figure(i) fig.suptitle(i) nx.draw(G[i], pos=nx.drawing.nx_agraph.graphviz_layout(G[i]), with_labels=True)
labels = np.array(itemgetter(*labels)(name_map)) # %% [markdown] # # blockmodel_df = get_blockmodel_df(A, labels, use_weights=True, return_counts=False) sns.heatmap(blockmodel_df, annot=True, cmap="Reds") # %% [markdown] # # make the networkx graph from graspy.embed import AdjacencySpectralEmbed g = nx.from_pandas_adjacency(blockmodel_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(labels, return_counts=True) size_scaler = 5 size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") sym_adj = symmetrize(adj) node_lap = AdjacencySpectralEmbed(n_components=1).fit_transform(sym_adj) node_lap = np.squeeze(node_lap) lap_map = dict(zip(uni_labels, node_lap)) nx.set_node_attributes(g, lap_map, name="Laplacian-2") color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color")
def to_minigraph( adj, labels, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): # convert the adjacency and a partition to a minigraph based on SBM probs prob_df = get_blockmodel_df(adj, labels, return_counts=use_counts, use_weights=use_weights) if drop_neg and ("-1" in prob_df.index): prob_df.drop("-1", axis=0, inplace=True) prob_df.drop("-1", axis=1, inplace=True) if remove_diag: adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(labels, return_counts=True) # add size attribute base on number of vertices size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") # add signal flow attribute (for the minigraph itself) mini_adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(mini_adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") # add spectral properties sym_adj = symmetrize(mini_adj) n_components = 10 latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(uni_labels, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors if color_map is None: color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color") return g
# df = df.iloc[:5, :5].copy() n_nodes = df.shape[0] # Rename Nodes (node names must start from 0) index_bkp = df.index columns_bkp = df.columns df.index = range(0, n_nodes) df.columns = range(0, n_nodes) df.columns = df.index nodelist = df.index.values # Build Networkx object print('--- Building Network ---') # C for Correlation network G = nx.from_pandas_adjacency(df, create_using=nx.Graph) # P for Proximity network (which in this case is a Correlation) P = [w for i, j, w in G.edges.data('weight')] # Converts (P)roximity to (D)istance using a map. D_dict = dict(zip(G.edges(), map(prox2dist, P))) # Set the distance value for each edge nx.set_edge_attributes(G, name='distance', values=D_dict) # Compute closure (Using the Dijkstra Class directly) print('--- Computing Dijkstra APSP ---') dij = Dijkstra.from_edgelist(D_dict, directed=False, verbose=10) # Serial Computation poolresults = list(range(len(dij.N))) for node in dij.N: print('> Dijkstra node %s of %s' % (node + 1, len(dij.N)))
(Projection from a bipartite network.) ''' f = open("PREVERE1.csv") ncols = len(f.readline().split(',')) df = pd.read_csv( "PREVERE1.csv", skiprows=1, usecols=range(1, ncols), header=None ) # skips the top row in the file and only uses columns 1 though ncols (using 0-indicing). Tells pandas not to expect a header print(df) # notice the column names are 1, ...., ncols-1 col_names = { i: i - 1 for i in range(1, ncols) } # Column names must match column indices (It's what networkx wants, I don't know why.) df = df.rename(columns=col_names ) # Renaming columns so that column names match column indices. G = nx.from_pandas_adjacency( df) # Makes a network from the pandas adjacency matrix # Visualize the network pos = nx.spring_layout(G) plt.figure( figsize=(50, 50) ) # Make figure bigger so we can actually see all the edges! (See what happens otherwise by calling nx.draw(G) before this line.) nx.draw(G, pos, node_size=25) plt.show() # How should we visualize the edge weights? # Access edge data and assign colors according to edge weight. (This is just one option for visualizing edge weights.) color = ['k', 'y', 'r'] # k means black, y means yellow, r means red. for u, v in G.edges(): # Iterate through all edges (u, v). G[u][v][ 'weight'] # weight of edge (u,v). This edge attribute is already stored in G, because it was in the df adjacency matrix that we created the network from.
def sparce_invcov(self, df, cols=None, style="GraphLassoCV", param=0.2, layout="circular", center=None, figsize=(7, 7)): """ cols: columns to calculate. If None, takes all numerical columns style: GraphLassoCV or LedoitWolf param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold layout: choose between "circular", "spring", "shell" center: Put a certain colname in the center of the graph Sparse covariance matrix estimation Plot the sparce precision matrix """ new_df = Utility().normalize(df).dropna() # Remove NA, normalize if cols == None: cols = df._get_numeric_data().columns data = new_df[cols] if style == "GraphLassoCV": model = GraphicalLassoCV(alphas=[param, param], cv=10, max_iter=5000) model.fit(data) sparce_mat = np.zeros(np.shape(model.precision_)) sparce_mat[model.precision_ != 0] = -1 np.fill_diagonal(sparce_mat, 1) else: # Style == LedoitWolf model = LedoitWolf() model.fit(data) sparce_mat = np.zeros(np.shape(model.get_precision())) sparce_mat[np.abs(model.get_precision()) > param] = -1 np.fill_diagonal(sparce_mat, 1) sparce_mat = pd.DataFrame(sparce_mat, index=data.columns, columns=data.columns) # NetworkX Graph fig, ax = plt.subplots(figsize=figsize) G = nx.from_pandas_adjacency(sparce_mat) pos = { "circular": nx.drawing.circular_layout, "shell": nx.drawing.shell_layout, "spring": nx.drawing.spring_layout, }[layout](G, scale=2) pos[center] = np.array([0, 0]) node_color = [ 'mintcream' if node == center else 'mintcream' for node in G.nodes ] node_size = [ len(node) * 1500 if node == center else len(node) * 500 for node in G.nodes() ] nodes = nx.draw_networkx_nodes(G, pos, node_shape='o', node_color=node_color, node_size=node_size) nodes.set_edgecolor('k') nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8) nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10) plt.axis('off') plt.tight_layout() # Display precision matrix as heatmap fig, ax = plt.subplots(figsize=(5, 5)) sns.heatmap(sparce_mat, vmax=1, vmin=-1, linewidth=0.1, cmap=plt.cm.RdBu_r, cbar=False) ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9) plt.title('Sparse Inverse Covariance') plt.show() return sparce_mat
import pandas as pd import networkx as nx # adjacency matrix from BibExcel df = pd.read_csv('filename.csv', sep=';', index_col=0) G = nx.from_pandas_adjacency(df) G.name = 'Name' print(nx.info(G))
import pandas as pd import networkx as nx df = pd.DataFrame( { 'a': [0.1, 0.2, 0.8], 'b': [0.4, 0.3, 0.9], 'c': [0.6, 0.7, 0.5] }, index=['a', 'b', 'c'], columns=['a', 'b', 'c']) print(df) g = nx.from_pandas_adjacency(df, nx.DiGraph) sdf = nx.to_pandas_adjacency(nx.stochastic_graph(g)) print(sdf) pr = nx.pagerank_numpy(g, alpha=1) print(f'\na:{pr["a"]:.3f}\nb:{pr["b"]:.3f}\nc:{pr["c"]:.3f}')
def create_graph_from_invcov(invcov, p, q, title="", subtitle="", size=500, remove_nodes=True, labels=None, show_weights=True): """Creates networkx graph from inverse covariance matrix and returns a bokeh plot of it. Arguments: invcov {np.ndarray} -- Inverse Covariance matrix of a grpah Keyword Arguments: title {str} -- Optional Title of the plot (default: {""}) """ assert (invcov.shape[0] == invcov.shape[1]) invcov = pd.DataFrame(invcov) G = nx.from_pandas_adjacency(invcov) if (remove_nodes): G.remove_nodes_from(list(nx.isolates(G))) graph_renderer = from_networkx( G, nx.shell_layout, nlist=[list(range(p)), list(range(p, p + q))]) graph_renderer.node_renderer.data_source.add([(k < p) * max(k, 1) for k, v in G.degree()], 'is_p') graph_renderer.node_renderer.data_source.add( [min(10 + v, 20) for k, v in G.degree()], 'degree') # mapper = LinearColorMapper(palette=linear_palette(Spectral, p+1), low=0, high=p) mapper = LinearColorMapper(palette=linear_palette(Spectral4, 2), low=0, high=1) graph_renderer.node_renderer.glyph = Circle(size='degree', fill_color={ 'field': 'is_p', 'transform': mapper }) graph_renderer.node_renderer.selection_glyph = Circle( size='degree', fill_color=Spectral4[3]) graph_renderer.node_renderer.hover_glyph = Circle(size='degree', fill_color=Spectral4[3]) edge_mapper = LinearColorMapper(palette=['pink', 'palegreen'], low=-1, high=1) edge_vals = [G.get_edge_data(u, v)["weight"] for u, v in G.edges] edge_weights = [ min(max(abs(G.get_edge_data(u, v)["weight"]), 0.5) * 4, 6) for u, v in G.edges ] graph_renderer.edge_renderer.data_source.add(edge_weights, 'weights') graph_renderer.edge_renderer.data_source.add(np.sign(edge_vals), 'signs') if (show_weights): graph_renderer.edge_renderer.glyph = MultiLine(line_color={ 'field': 'signs', 'transform': edge_mapper }, line_alpha=1.0, line_width='weights') else: graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=1.0, line_width=1) graph_renderer.edge_renderer.selection_glyph = MultiLine( line_color='black', line_width='weights') graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color="#CCCCCC", line_width='weights') graph_renderer.selection_policy = NodesAndLinkedEdges() graph_renderer.inspection_policy = NodesAndLinkedEdges() if (labels is not None): tmp = [labels[i] for i in G.nodes] graph_renderer.node_renderer.data_source.data['name'] = tmp neighbours = [] adj = np.array(np.abs(invcov) > 0) neighbours = [ labels[np.where(np.logical_or(adj[i, :], adj[:, i]))] for i in G.nodes ] graph_renderer.node_renderer.data_source.data[ 'neighbours'] = neighbours tooltips = [("idx:", "@index"), ("Name:", "@name"), ("Neighbours:", "@neighbours{safe}")] else: neighbours = [] adj = np.array(np.abs(invcov) > 0) for i in list(G.nodes): neighbours.append( list(map(str, np.where(np.logical_or(adj[i, :], adj[:, i]))[0]))) graph_renderer.node_renderer.data_source.data[ 'neighbours'] = neighbours tooltips = [("idx:", "@index"), ("Neighbours:", "@neighbours{safe}")] # add line breaks for n in neighbours: if len(n) > 3: for i in range(len(n)): if (i % 3 == 0): n[i] = "<br/>" + n[i] bokeh_pl = Plot(plot_width=size, plot_height=size, x_range=Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1)) # bokeh_pl.title.text = title bokeh_pl.add_layout(Title(text=subtitle, text_font_style="italic"), 'above') bokeh_pl.add_layout(Title(text=title, text_font_size="16pt"), 'above') # bokeh_pl.add_tools(HoverTool(tooltips=None), TapTool(), BoxSelectTool()) hover = HoverTool(tooltips=tooltips) bokeh_pl.add_tools(hover, TapTool(), BoxSelectTool()) bokeh_pl.renderers.append(graph_renderer) return (bokeh_pl)
val = val - hourRem + 1 except: val = 0 dfs[myTime].iloc[loc_df1, loc_df2] = val def most_central_edge(G): centrality = betweenness(G, weight='weight') return max(centrality, key=centrality.get) for value in range(4): value = value print(value) G = nx.from_pandas_adjacency(dfs[value]) #PRINT DEGREE DISTRIBUTION============================================ if (value != -1): degree_sequence = sorted([d for n, d in G.degree()], reverse=True) # degree sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots(figsize=(15, 10)) plt.bar(deg, cnt, width=0.80, color='b') plt.title("Degree Histogram") plt.ylabel("Count") plt.xlabel("Degree")
# In[ ]: # This needs more work. The Adjacency Matrix or CrossTab Matrix does not conform to the n x m shape or even logically ! #print dfx_ct1.shape # n samples with m dimensions #plt.scatter(dfx_ct1.loc['165.130.144.83'],dfx_ct1.loc['10.66.34.33']) #plt.show() # In[12]: import networkx as nx G = nx.Graph() G = nx.from_pandas_adjacency(dfx_ct1) G.name = 'Graph from dfx_ct1 adjacency matrix dataframe' # In[13]: print(nx.info(G)) # In[15]: H = nx.DiGraph(G)
import plotly.figure_factory as ff import plotly.io as pio df_adjacency_matrix=pd.read_csv(r".\3.Word Matrices\WordsAdjacencyMatrix.csv",index_col=0) df_words=pd.read_csv(r".\3.Word Matrices\WordsFrequencyShortened.csv",index_col=0) # df_adjacency_matrix= df_adjacency_matrix.iloc[200:205,200:205]#Todo: To remove at the end # df_adjacency_matrix= df_adjacency_matrix.iloc[200:250,200:250]#Todo: To remove at the end # nodes_list=df_words.index.tolist()[200:205] # nodes_list=df_words.index.tolist()[200:250] nodes_list=df_words.index.tolist() nodes_labels=dict(zip(nodes_list,nodes_list)) G = nx.from_pandas_adjacency(df_adjacency_matrix) G.name = 'Graph from pandas adjacency matrix' print(nx.info(G)) nx.draw(G, node_size=20, with_labels=True) plt.show() # compute the best partition partition = community_louvain.best_partition(G,resolution=0.2)#Play with resolution (max=1.0) in order to get less (higher number) or more communities (clusters) # draw the graph plt.axis('off') pos = nx.spring_layout(G) #Initial # pos = nx.circular_layout(G)
def gen_gexf_and_pass_partition_data( data, stopwords=None, word_len_min=2, node_num=30, edge_remove_threshold=0, remove_isolated_node=True, layout='fr', iterations=50, fr_k=None, fa2_square=2, fa2_log_base=100, ): """Generate gexf file for SNA Interactive and Pass partition data :param str,list data: String Data (One post per line) | List Data (One post per element) :param str stopwords: Stopwords separated ',' :param int word_len_min: :param int node_num: Number of nodes :param int edge_remove_threshold: :param bool remove_isolated_node: :param str layout: :param int iterations: :param int fr_k: :param int fa2_square: :param int fa2_log_base: :return: none """ corpus = get_corpus(data=data) matrix = get_matrix(corpus=corpus, stopwords=stopwords, word_len_min=word_len_min) cooccur_matrix = matrix.get('cooccur_matrix') # Get Graph graph = nx.from_pandas_adjacency(cooccur_matrix) # Get Sub Data sub_data = get_sub_data( graph=graph, node_num=node_num, edge_remove_threshold=edge_remove_threshold, remove_isolated_node=remove_isolated_node, matrix=matrix, ) sub_graph = sub_data.get('sub_graph') tf_sum_dict_sorted = sub_data.get('tf_sum_dict_sorted') isolated_nodes = sub_data.get('isolated_nodes') # ------ Set Attributes for gexf file ------ # # Add Node Weight scaled_weight_list = [] for node, freq in tf_sum_dict_sorted[:node_num]: if node in isolated_nodes: continue else: scaled_weight = (freq * (70 ** 2) / tf_sum_dict_sorted[0][1])**(1/2) scaled_weight_list.append((node, scaled_weight)) scaled_weight_dict = dict(scaled_weight_list) for node in scaled_weight_dict: sub_graph.nodes[node]['viz'] = {'size': scaled_weight_dict[node]} # Add edge weight edge_weight_max = max([sub_graph[u][v]['weight'] for u, v in sub_graph.edges]) for u, v in sub_graph.edges: sub_graph[u][v]['viz'] = {'thickness': sub_graph[u][v]['weight'] * 35 / edge_weight_max} # ------ Set Layout ------ # # Fruchterman Reingold if layout == "fr": pos = nx.spring_layout(sub_graph, k=fr_k, iterations=iterations) for node in pos: sub_graph.nodes[node]['viz']['position'] = {'x': pos[node][0], 'y': pos[node][1]} # ForceAtlas2 elif layout == "fa2": forceatlas2 = ForceAtlas2() pos = forceatlas2.forceatlas2_networkx_layout(sub_graph, iterations=iterations) for node in pos: raw_x, raw_y = pos[node] # -- Scaling Pos -- # adj_x, adj_y = [math.log(abs(coord) ** fa2_square, fa2_log_base) for coord in pos[node]] if raw_x < 0: adj_x *= -1 if raw_y < 0: adj_y *= -1 # ----------------- # sub_graph.nodes[node]['viz']['position'] = {'x': adj_x, 'y': adj_y} # -------------------------- # # Generate gexf file write_gexf(graph=sub_graph) # ------ Pass partition data to template ------ # partition = community.best_partition(sub_graph) partition_len = max(partition.values()) + 1 node_freq_per_klass = {n: list() for n in range(partition_len)} for node, klass in partition.items(): node_freq_per_klass[klass].append((node, scaled_weight_dict[node])) top_node_per_klass = [None] * partition_len for klass, node_freq in node_freq_per_klass.items(): top_node_per_klass[klass] = max(node_freq, key=lambda x : x[1])[0] partition_pass_to_template = { 'partition_len': partition_len, 'top_node_per_klass': top_node_per_klass, } return partition_pass_to_template
eIdx = 0 for nOrE in df['New/Existing'].tolist(): if nOrE == 'New': newNaming.append("n{}".format(nIdx)) nIdx += 1 elif nOrE == 'Existing': newNaming.append("e{}".format(eIdx)) eIdx += 1 df['newNaming'] = newNaming nameDict = returnNameToStrainDict(df, loci) distMat = returnDistMatrix(nameDict, loci) distMat_df = pd.DataFrame(distMat, index=nameDict.keys(), columns=nameDict.keys()) graph = nx.from_pandas_adjacency(distMat_df) mst = nx.minimum_spanning_tree(graph) #node_pos = nx.get_node_attributes(mst,'pos') node_pos = nx.spring_layout(mst, k=4 * 1 / np.sqrt(len(mst.nodes())), iterations=80, random_state=1992) labels = nx.get_edge_attributes(mst, 'weight') plt.figure() nx.draw_networkx_edge_labels(mst, node_pos, edge_labels=labels, font_size=4) nx.draw_networkx(mst, pos=node_pos, node_size=10, font_size=1, alpha=0.65) #nx.draw_networkx(mst,pos=node_pos,node_size=90,font_size=5, alpha=0.65) plt.savefig("MST.png", dpi=350)