def to_igraph(network): """ Convert cytoscape.js style graphs to igraph object. :param network: the cytoscape.js style netwrok. :return: the igraph object. """ nodes = network['elements']['nodes'] edges = network['elements']['edges'] network_attr = network['data'] node_count = len(nodes) edge_count = len(edges) g = ig.Graph() # Graph attributes for key in network_attr.keys(): g[key] = network_attr[key] g.add_vertices(nodes) # Add node attributes node_attributes = {} node_id_dict = {} for i, node in enumerate(nodes): data = node['data'] for key in data.keys(): if key not in node_attributes: node_attributes[key] = [None] * node_count # Save index to map if key == 'id': node_id_dict[data[key]] = i node_attributes[key][i] = data[key] for key in node_attributes.keys(): g.vs[key] = node_attributes[key] # Create edges edge_tuples = [] edge_attributes = {} for i, edge in enumerate(edges): data = edge['data'] source = data['source'] target = data['target'] edge_tuple = (node_id_dict[source], node_id_dict[target]) edge_tuples.append(edge_tuple) for key in data.keys(): if key not in edge_attributes: edge_attributes[key] = [None] * edge_count # Save index to map edge_attributes[key][i] = data[key] g.add_edges(edge_tuples) # Assign edge attributes for key in edge_attributes.keys(): if key == 'source' or key == 'target': continue else: g.es[key] = edge_attributes[key] return g
def make_graph_from_post_processor(postprocessor): """ Create a directed graph from a postprocessor of an OpenFOAM simulation. The edges and vertices hold a dictionary attribute 'index' that corresponds to the edges indices on RBC paths. These indices need not be the same as the igraph index. Args: postprocessor (HemoglobinOnSegmentsPostProcessor): postprocessor object Returns: igraph.Graph instance """ path_analyzer = postprocessor.rbcDataPostProcessor.rbc_path_analyzer graph = igraph.Graph(directed=True) for ei in postprocessor.edge_ids(): try: positive_flow = path_analyzer.positive_flow(ei) except FlowReversalError: warnings.warn("Skipping edge {:d} due to flow reversal".format(ei)) continue # add vertices graph.add_vertex() graph.add_vertex() v0 = graph.vs[graph.vcount() - 2] v1 = graph.vs[graph.vcount() - 1] # add oriented edges along flow direction oriented_tuple = (v0, v1) if positive_flow else (v1, v0) original_ei = postprocessor.segment_index_adapter.segment_to_edge_index( ei) graph.add_edge(*oriented_tuple, edge_index=ei, original_edge_index=original_ei) # merge the vertices which are connected through RBC paths by a connecting node mapping = range(graph.vcount()) vids_to_delete = set() for e in graph.es: # build edges that flow in and out of the upstream vertex edges_in, edges_out = incident_edges_to_upstream_vertex( e, path_analyzer) # update mapping from the list of vertices that build a connecting node node_vids = [e_out.tuple[0] for e_out in edges_out ] + [e_in.tuple[1] for e_in in edges_in] new_node_vi = min(node_vids) # unambiguous choice of a new vertex id for vi in node_vids: mapping[vi] = new_node_vi vids_to_delete.update(node_vids) vids_to_delete.remove(new_node_vi) graph.contract_vertices(mapping) # if contract_vertices deleted vertices, remove these indices from vids_to_delete vids_to_delete.difference_update(range(graph.vcount(), len(mapping))) graph.delete_vertices(list(vids_to_delete)) # add edge attributes edge_attribute_names = [ 'length', 'radius_rbc', 'radius_plasma', 'radius_wall', 'ld', 'rbc_velocity', 'rbc_flow' ] for name in edge_attribute_names: graph.es[name] = None for e in graph.es: ei = e['edge_index'] original_ei = e['original_edge_index'] e['length'] = postprocessor.scoord_interval_length(ei) e['radius_plasma'] = postprocessor.graph_data.edge_radius(original_ei) e['radius_rbc'] = max( postprocessor.rbc_radius_factor * e['radius_plasma'], postprocessor.rbc_radius_min) e['radius_wall'] = postprocessor.wall_radius_factor * e['radius_plasma'] e['ld'] = postprocessor.mean_linear_density(ei) e['rbc_velocity'] = postprocessor.mean_velocity(ei) e['rbc_flow'] = postprocessor.mean_rbc_flow(ei) if not graph.is_dag(): warnings.warn('The produced graph is not a directed acyclic graph', UserWarning) return graph
ID2Index = {id: index for index, id in enumerate(index2ID)} # Hack to account for 2 degree capitalized "FROM" fromKey = "From" if (fromKey not in edgesData): fromKey = "FROM" # Converting edges from IDs to new indices # Invert edges so it means a citation between from to to edgesZip = zip(edgesData[fromKey].tolist(), edgesData["To"].tolist()) edgesList = [(ID2Index[toID], ID2Index[fromID]) for fromID, toID in edgesZip if fromID in ID2Index and toID in ID2Index] vertexAttributes = {key: nodesData[key].tolist() for key in nodesData} for key in nodesData: nodesData[key].tolist() graph = ig.Graph(n=len(index2ID), edges=edgesList, directed=True, vertex_attrs=vertexAttributes) # verticesToDelete = np.where(np.logical_or(np.array(graph.indegree())==0,np.array(graph.degree())==0))[0] # graph.delete_vertices(verticesToDelete) graph.vs["KCore"] = graph.shell_index(mode="IN") graph.vs["year"] = [int(s[0:4]) for s in graph.vs["date"]] # graph.vs["Community"] = [str(c) for c in graph.community_infomap().membership]; os.makedirs("../networks", exist_ok=True) xn.igraph2xnet(graph, "../networks/" + queryID + ".xnet")
def build_igraph_from_pp(net, respect_switches=False): """ This function uses the igraph library to create an igraph graph for a given pandapower network. Lines, transformers and switches are respected. Performance vs. networkx: https://graph-tool.skewed.de/performance Input: **net** - pandapower network Example: graph = build_igraph_from_pp(net """ try: import igraph as ig except (DeprecationWarning, ImportError): raise ImportError("Please install python-igraph") g = ig.Graph(directed=True) g.add_vertices(net.bus.shape[0]) g.vs["label"] = net.bus.index.tolist( ) # [s.encode('unicode-escape') for s in net.bus.name.tolist()] pp_bus_mapping = dict( list(zip(net.bus.index, list(range(net.bus.index.shape[0]))))) # add lines nogolines = set(net.switch.element[(net.switch.et == "l") & (net.switch.closed == 0)]) \ if respect_switches else set() for lix in (ix for ix in net.line.index if ix not in nogolines): l = net.line.ix[lix] g.add_edge(pp_bus_mapping[l.from_bus], pp_bus_mapping[l.to_bus]) g.es["weight"] = net.line.length_km.values # add trafos for _, trafo in net.trafo.iterrows(): g.add_edge(pp_bus_mapping[trafo.hv_bus], pp_bus_mapping[trafo.lv_bus], weight=0.01) for _, trafo3w in net.trafo3w.iterrows(): g.add_edge(pp_bus_mapping[trafo3w.hv_bus], pp_bus_mapping[trafo3w.lv_bus], weight=0.01) g.add_edge(pp_bus_mapping[trafo3w.hv_bus], pp_bus_mapping[trafo3w.mv_bus], weight=0.01) # add switches bs = net.switch[(net.switch.et == "b") & (net.switch.closed == 1)] if respect_switches else \ net.switch[(net.switch.et == "b")] for fb, tb in zip(bs.bus, bs.element): g.add_edge(pp_bus_mapping[fb], pp_bus_mapping[tb], weight=0.001) meshed = False for i in range(1, net.bus.shape[0]): if len(g.get_all_shortest_paths(0, i, mode="ALL")) > 1: meshed = True break roots = [pp_bus_mapping[s] for s in net.ext_grid.bus.values] return g, meshed, roots # g, (not g.is_dag())
def displayGraph(data): """ nodes{keyname, name, size}, links{source, target, frequence} format: nodes: keynames nodes_thickness : occurrences edges : relation thickness_edges: frequence""" N = len(data['nodes']) # nombre de nodes L = len(data['links']) # nombre de edges Edges = [(data['links'][k]['source'], data['links'][k]['target']) for k in range(L)] # list of edges print Edges G = ig.Graph(Edges, directed=False) G = ig.Graph(Edges, directed=False) keynames = [] occurrences = [] lcolors = [] lwidth = [] lname = [] for node in data['nodes']: keynames.append(node['name']) occurrences.append(node['size'] * 2) lcolors.append(node['keyname'] * 2) lname.append(node['name']) for edge in data['links']: lwidth.append(edge['width']) layt = G.layout('kk', dim=3) Xn = [layt[k][0] for k in range(N)] # x-coordinates of nodes Yn = [layt[k][1] for k in range(N)] # y-coordinates Zn = [layt[k][2] for k in range(N)] # z-coordinates Xe = [] Ye = [] Ze = [] for e in Edges: Xe += [layt[e[0]][0], layt[e[1]][0], None] # x-coordinates of edge ends Ye += [layt[e[0]][1], layt[e[1]][1], None] Ze += [layt[e[0]][2], layt[e[1]][2], None] trace1 = Scatter3d(x=Xe, y=Ye, z=Ze, mode='lines', name='sentence', line=Line(color='rgb(125,125,125)', width=1), text=lwidth, hoverinfo='text') trace2 = Scatter3d( x=Xn, y=Yn, z=Zn, mode='markers', name='keyword', marker=Marker( symbol='dot', size=occurrences, color=lcolors, #color=group, colorscale='Viridis', line=Line(color='rgb(50,50,50)', width=0.5)), text=lname, hoverinfo='text') axis = dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='') layout = Layout( title="visualization of your text", width=1000, height=1000, showlegend=False, scene=Scene( xaxis=XAxis(axis), yaxis=YAxis(axis), zaxis=ZAxis(axis), ), margin=Margin(t=100), hovermode='closest', annotations=Annotations([ Annotation( showarrow=False, text= "Data source: <a href='http://bost.ocks.org/mike/miserables/miserables.json'>[1] miserables.json</a>", xref='paper', yref='paper', x=0, y=0.1, xanchor='left', yanchor='bottom', font=Font(size=14)) ]), ) data = Data([trace1, trace2]) plotly.offline.plot(data, layout)
while (i < total_snapshots): originalfile_prefix = './powlaw_degree_small_snapshot_graph_for_streaming_sampling/' + 'new_' + dataset[ 0] + '_u_20/' # rewriteEdgelistFromZero(originalfile_prefix+ 'output-prefix.t0000' + str(i) + '.graph', ' ') # rewriteClusteringFromZero(originalfile_prefix+ 'output-prefix.t0000'+ str(i) + '.comms', ' ') originalfile = originalfile_prefix + 'output-prefix.t0000' + str( i) + '.graph' print "original_snapshot" + originalfile fp_originalfile = open(originalfile, 'r') original_snapshot_graph = nx.read_edgelist(fp_originalfile, nodetype=int) fp_originalfile.close() igr = igraph.Graph(n=original_snapshot_graph.number_of_nodes(), edges=nx.convert_node_labels_to_integers( original_snapshot_graph).edges()) # igr = igraph.Graph(n = original_snapshot_graph.number_of_nodes(), edges = nx.convert_node_labels_to_integers(original_snapshot_graph, first_label=0).edges()) org_complete_tcommsfile = originalfile if (org_complete_tcommsfile not in org_complete_tcomms.keys()): org_complete_tcomms[org_complete_tcommsfile] = [] ground_truthfile = originalfile_prefix + 'output-prefix.t0000' + str( i) + '.comms' fp_complete_commsfile = open(ground_truthfile, 'r') for line in fp_complete_commsfile.readlines(): org_complete_tcomms[org_complete_tcommsfile].append( str(i) + ' ' + line) fp_complete_commsfile.close()
def run(): cg_dpath = dpaths['baseline', '2009', 'countGraph'] cg_prefix = prefixs['baseline', '2009', 'countGraph'] gp_dpath = dpaths['baseline', '2009', 'groupPartition'] gp_prefix = prefixs['baseline', '2009', 'groupPartition'] # check_dir_create(gp_dpath) # gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix) gp_original_fpath = '%s/%soriginal.pkl' % (gp_dpath, gp_prefix) gp_drivers_fpath = '%s/%sdrivers.pkl' % (gp_dpath, gp_prefix) # with open(gp_summary_fpath, 'wt') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow([ 'groupName', 'numDrivers', 'numRelations', 'graphComplexity', 'tieStrength', 'contribution', 'benCon' ]) # logger.info('Start handling SP_group_dpath') if not check_path_exist(gp_original_fpath): original_graph = {} for fn in get_all_files(cg_dpath, '%s*' % cg_prefix): count_graph = load_pickle_file('%s/%s' % (cg_dpath, fn)) logger.info('Start handling; %s' % fn) numEdges = len(count_graph) moduloNumber = numEdges / 10 for i, ((did0, did1), w) in enumerate(count_graph.iteritems()): if i % moduloNumber == 0: logger.info('Handling; %.2f' % (i / float(numEdges))) original_graph[did0, did1] = w save_pickle_file(gp_original_fpath, original_graph) else: original_graph = load_pickle_file(gp_original_fpath) # logger.info('igraph converting') igid, did_igid = 0, {} igG = ig.Graph(directed=True) numEdges = len(original_graph) moduloNumber = numEdges / 10 for i, ((did0, did1), w) in enumerate(original_graph.iteritems()): if i % moduloNumber == 0: logger.info('Handling; %.2f' % i / float(numEdges)) if not did_igid.has_key(did0): igG.add_vertex(did0) did_igid[did0] = igid igid += 1 if not did_igid.has_key(did1): igG.add_vertex(did1) did_igid[did1] = igid igid += 1 igG.add_edge(did_igid[did0], did_igid[did1], weight=abs(w)) # logger.info('Partitioning') part = louvain.find_partition(igG, method='Modularity', weight='weight') logger.info('Each group pickling and summary') gn_drivers = {} for i, sg in enumerate(part.subgraphs()): gn = 'G(%d)' % i group_fpath = '%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn) sg.write_pickle(group_fpath) # drivers = [v['name'] for v in sg.vs] weights = [e['weight'] for e in sg.es] graphComplexity = len(weights) / float(len(drivers)) tie_strength = sum(weights) / float(len(drivers)) contribution = sum(weights) / float(len(weights)) benCon = tie_strength / float(len(drivers)) with open(gp_summary_fpath, 'a') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow([ gn, len(drivers), len(weights), graphComplexity, tie_strength, contribution, benCon ]) gl_img_fpath = '%s/%simg-%s.pdf' % (gp_dpath, gp_prefix, gn) layout = sg.layout("kk") if len(drivers) < 100: ig.plot(sg, gl_img_fpath, layout=layout, vertex_label=drivers) else: ig.plot(sg, gl_img_fpath, layout=layout) gn_drivers[gn] = drivers gc_fpath = '%s/%scoef-%s.csv' % (gp_dpath, gp_prefix, gn) with open(gc_fpath, 'wt') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow(['groupName', 'did0', 'did1', 'coef']) for e in sg.es: did0, did1 = [sg.vs[nIndex]['name'] for nIndex in e.tuple] coef = e['weight'] writer.writerow([gn, did0, did1, coef]) save_pickle_file(gp_drivers_fpath, gn_drivers)
def graphgen(N, directed=True, noigraph_gen=False, return_layout_as_object=True, graph_shape='rectangle'): """ This function creates a directed lattice in d=2 where edges go up or right. The ig.Graph.Lattice function does not appear to create directed graphs well. Use plot_graph to test with a small N. Returns: a tuple (g,l) where g is an igraph object, and l is an igraph layout directed=True produces a directed lattice with only up/right paths. Currently the other functions cannot handle the undirected lattice noigraph_gen=True does not generate the igraph object. This is mostly used for debugging. return_layout_as_object=True returns the second return value as an igraph object graph_shape='rectangle' or 'triangle' If chosen to be a triangle, helps cut down on computation time for limit shape computations. This is because you do not want to limit shape to be truncated. igraph does not check for uniqueness when adding vertices by name. Oct 25 2017 The for loop in this function is very slow. An iterator that yields is definitely better since the for loop is run by the igraph creation routine. Oct 24 2017 This is a fairly inefficient function. Probably easier to add vertices by generating a list of names first. noigraph_gen simply retuns edges and vertices """ if dbg >= 1: print('Start generating graph: ' + time.asctime()) verts = vertgen(N, graph_shape=graph_shape) edges = edgegen(N, graph_shape=graph_shape) if dbg >= 1: print('Done generating vertex and edge lists: ' + time.asctime()) # make a graph layout for plotting if not noigraph_gen: if dbg >= 3: print('generating new graph') try: g = ig.Graph(directed=directed) #import ipdb; ipdb.set_trace() g.add_vertices(verts) g.add_edges(edges) except: print('Error in generating igraph') # make layout for plotting if graph_shape == 'rectangle': layoutlist = [(x, y) for x in range(N) for y in range(N)] elif graph_shape == 'triangle': layoutlist = [(x, y) for x in range(N) for y in range(N - x)] if dbg >= 1: print('Done generating igraph object and layout: ' + time.asctime()) if return_layout_as_object: return g, ig.Layout(layoutlist) else: return g, layoutlist else: # if noigraph_gen == True return ([x for x in verts], [x for x in edges])
def orthoFromSampleRecs(nfrec, outortdir, nsample=[], methods=['mixed'], \ foutdiffog=None, outputOGperSampledRecGT=True, colourTreePerSampledRecGT=False, \ graphCombine=None, majRuleCombine=None, **kw): """""" verbose = kw.get('verbose') fam = os.path.basename(nfrec).split('-', 1)[0] if verbose: print "\n# # # %s" % fam # collect the desired sample from the reconciliation file dparserec = parseALERecFile(nfrec, skipLines=True, skipEventFreq=True, nsample=nsample, returnDict=True) lrecgt = dparserec['lrecgt'] if kw.get('userefspetree'): refspetree = dparserec['spetree'] else: refspetree = None colourCombinedTree = kw.get('colourCombinedTree') ddogs = {} dnexustrans = {} drevnexustrans = {} ltaxnexus = [] llabs = [] for i, recgenetree in enumerate(lrecgt): if nsample: g = nsample[i] else: g = i if verbose: print recgenetree if verbose: print "\n# # reconciliation sample %d" % g N = recgenetree.nb_leaves() dlabs = {} if set(['strict', 'mixed']) & set(methods): if verbose: print "\n# strict_ogs:\n" strict_ogs, unclassified, dlabs = getOrthologues( recgenetree, method='strict', refspetree=refspetree, dlabs=dlabs, **kw) n1 = summaryOGs(strict_ogs, dlabs, N, verbose) else: strict_ogs = unclassified = None n1 = 'NA' if 'unicopy' in methods: if verbose: print "\n# unicopy_ogs:\n" unicopy_ogs, notrelevant, dlabs = getOrthologues( recgenetree, method='unicopy', refspetree=refspetree, dlabs=dlabs, **kw) n2 = summaryOGs(unicopy_ogs, dlabs, N, verbose) else: unicopy_ogs = None n2 = 'NA' if 'mixed' in methods: if verbose: print "\n# mixed_ogs:\n" mixed_ogs, unclassified, dlabs = getOrthologues( recgenetree, method='mixed', strict_ogs=strict_ogs, unclassified=unclassified, refspetree=refspetree, dlabs=dlabs, **kw) # n3 = summaryOGs(mixed_ogs, dlabs, N, verbose) else: mixed_ogs = None n3 = 'NA' if foutdiffog or verbose: o12 = str(sum([int(o in strict_ogs) for o in unicopy_ogs ])) if (strict_ogs and unicopy_ogs) else 'NA' o13 = str(sum([int(o in strict_ogs) for o in mixed_ogs ])) if (strict_ogs and mixed_ogs) else 'NA' o23 = str(sum([int(o in unicopy_ogs) for o in mixed_ogs ])) if (mixed_ogs and unicopy_ogs) else 'NA' if verbose: print "\n# summary:\n" print "overlap strict_ogs with unicopy_ogs:", o12 print "overlap strict_ogs with mixed_ogs:", o13 print "overlap unicopy_ogs with mixed_ogs:", o23 if foutdiffog: foutdiffog.write( '\t'.join([fam, str(g), n1, n2, n3, o12, o13, o23]) + '\n') if colourTreePerSampledRecGT or colourCombinedTree: if i == 0: recgenetree, dnexustrans, drevnexustrans, ltaxnexus = indexCleanTreeLabels( recgenetree, dlabs) else: recgenetree, dnexustrans, drevnexustrans, ltaxnexus = indexCleanTreeLabels(recgenetree, dlabs, \ dnexustrans=dnexustrans, drevnexustrans=drevnexustrans, ltaxnexus=ltaxnexus, update=False) ddogs[g] = { 'strict': strict_ogs, 'unicopy': unicopy_ogs, 'mixed': mixed_ogs } if verbose: print "\n# # # # # # # #" if i == 0: # collect the leaf labels; just do once llabs = dlabs.values() llabs.sort() R = len(lrecgt) gs = nsample if nsample else range(R) for method in methods: ltrees = [] nfoutrad = os.path.join(outortdir, method, "%s_%s" % (fam, method)) if colourTreePerSampledRecGT: logs = [ddogs[g][method] for g in gs] writeRecGeneTreesColouredByOrthologs(lrecgt, logs, nfoutrad+"_orthologous_groups.nex", drevnexustrans, \ treenames=["tree_%d" for g in gs], ltax=ltaxnexus, dtranslate=dnexustrans, figtree=True) if outputOGperSampledRecGT: with open(nfoutrad + ".orthologs.per_sampled_tree", 'w') as foutort: for g in gs: ogs = ddogs[g][method] foutort.write('\n'.join([' '.join(x) for x in ogs]) + '\n#\n') if graphCombine or majRuleCombine: ## for later output recgt0 = lrecgt[0] if colourCombinedTree else None # could also use the ALE consensus tree, which has branch supports but has no lengths ## first make a dict of edge frequencies dedgefreq = {} for g in gs: ogs = ddogs[g][method] for og in ogs: if len(og) == 1: orfan = og[0] combo = (orfan, orfan) dedgefreq[combo] = dedgefreq.get(combo, 0) + 1 else: # get all pairs of genes in the OG combogs = combinations(sorted(og), 2) # add the counts for combo in combogs: dedgefreq[combo] = dedgefreq.get(combo, 0) + 1 ## build a graph of connectivity of the genes in OGs, integrating over the sample gOG = igraph.Graph() gOG.add_vertices(len(llabs)) gOG.vs['name'] = llabs # first make a full weighted graph # add the edges to the graph edges, freqs = zip(*dedgefreq.iteritems()) gOG.add_edges(edges) gOG.es['weight'] = freqs if majRuleCombine: ## make a majority rule unweighted graph mjgOG = gOG.copy() # select edges with frequency below the threshold mjdropedges = [] minfreq = majRuleCombine * R for e in mjgOG.es: # use strict majority (assuming the parameter majRuleCombine=0.5, the default) to avoid obtaining family-wide single components if e['weight'] <= minfreq: mjdropedges.append(e.index) # remove the low-freq edges to the graph mjgOG.delete_edges(mjdropedges) if verbose: print "Majority Rule Consensus network: droped %d edges with weight <= %d from the full network (%d edges)" % ( len(mjdropedges), minfreq, len(gOG.es)) # find connected components (i.e. perform clustering) compsOGs = mjgOG.components() # resolve conflicts in orthology classification mjgOG, compsOGs = enforceUnicity(mjgOG, compsOGs, getVertexClustering, communitymethod='components', **kw) # write results writeGraphCombinedOrthologs(nfoutrad, "majrule_combined_%f"%majRuleCombine, mjgOG, compsOGs, llabs, \ colourCombinedTree=colourCombinedTree, recgt=recgt0, drevnexustrans=drevnexustrans, \ ltax=ltaxnexus, dtranslate=dnexustrans, ltreenames=["tree_0"], figtree=True) if graphCombine: # find communities (i.e. perform clustering) in full weighted graph commsOGs = getVertexClustering(gOG, graphCombine) # resolve conflicts in orthology classification gOG, commsOGs = enforceUnicity(gOG, commsOGs, getVertexClustering, maxdrop=20, communitymethod=graphCombine, **kw) # write results writeGraphCombinedOrthologs(nfoutrad, 'graph_combined_%s'%graphCombine, gOG, commsOGs, llabs, \ colourCombinedTree=colourCombinedTree, recgt=recgt0, drevnexustrans=drevnexustrans, \ ltax=ltaxnexus, dtranslate=dnexustrans, ltreenames=["tree_0"], figtree=True)
def parse_obo_graph(path): stored_pickle_file_prefix = 'obo.graphs' stored_pickles_found = False g = { 'biological_process': igraph.Graph(directed=True), 'cellular_component': igraph.Graph(directed=True), 'molecular_function': igraph.Graph(directed=True) } for ns in g: pickle_file_path = "{0}.{1}".format(stored_pickle_file_prefix, ns) if os.path.exists(pickle_file_path): print("Using stored ontology graph: {0}".format(pickle_file_path)) g[ns] = igraph.Graph.Read_Pickle(fname=pickle_file_path) stored_pickles_found = True # key: GO:ID, value = {'ns': 'biological_process', 'idx': 25} terms = dict() if stored_pickles_found is True: print( "Using stored terms data structure: {0}".format(pickle_file_path)) with open("{0}.terms".format(stored_pickle_file_prefix), 'rb') as f: terms = pickle.load(f) # key: namespace, value=int next_idx = { 'biological_process': 0, 'cellular_component': 0, 'molecular_function': 0 } id = None namespace = None name = None # Pass through the file once just to get all the GO terms and their namespaces # This makes the full pass far easier, since terms can be referenced which haven't # been seen yet. if stored_pickles_found is False: for line in open(path): line = line.rstrip() if line.startswith('[Term]'): if id is not None: # error checking if namespace is None: raise Exception( "Didn't find a namespace for term {0}".format(id)) g[namespace].add_vertices(1) idx = next_idx[namespace] g[namespace].vs[idx]['id'] = id g[namespace].vs[idx]['name'] = name next_idx[namespace] += 1 terms[id] = {'ns': namespace, 'idx': idx} # reset for next term id = None namespace = None name = None elif line.startswith('id:'): id = line.split(' ')[1] elif line.startswith('namespace:'): namespace = line.split(' ')[1] elif line.startswith('name:'): m = re.match('name: (.+)', line) if m: name = m.group(1).rstrip() else: raise Exception( "Failed to regex this line: {0}".format(line)) id = None alt_ids = list() namespace = None name = None is_obsolete = False is_a = list() # Now actually parse the rest of the properties if stored_pickles_found is False: for line in open(path): line = line.rstrip() if line.startswith('[Term]'): if id is not None: # make any edges in the graph for is_a_id in is_a: # these two terms should be in the same namespace if terms[id]['ns'] != terms[is_a_id]['ns']: raise Exception( "is_a relationship found with terms in different namespaces" ) #g[namespace].add_edges([(terms[id]['idx'], terms[is_a_id]['idx']), ]) # the line above is supposed to be able to instead be this, according to the # documentation, but it fails: g[namespace].add_edge(terms[id]['idx'], terms[is_a_id]['idx']) # reset for this term id = None alt_ids = list() namespace = None is_obsolete = False is_a = list() elif line.startswith('id:'): id = line.split(' ')[1] elif line.startswith('namespace:'): namespace = line.split(' ')[1] elif line.startswith('is_a:'): is_a.append(line.split(' ')[1]) if stored_pickles_found is False: for ns in g: pickle_file_path = "{0}.{1}".format(stored_pickle_file_prefix, ns) g[ns].write_pickle(fname=pickle_file_path) ## save the terms too so we don't have to redo that parse with open("{0}.terms".format(stored_pickle_file_prefix), 'wb') as f: pickle.dump(terms, f, pickle.HIGHEST_PROTOCOL) return terms, g
def plot_3D(adj_list, df_node_label, title='Graph'): # Importing libs: # Copy to Newtwork x: graph1 = nx.Graph() graph1.add_edges_from(adj_list.values) N = graph1.number_of_nodes() L = graph1.number_of_edges() Edges = [tuple(i) for i in adj_list.values] G = ig.Graph(Edges, directed=True) # Node labels: group = df_node_label['label'].tolist() # Setting plotly layt = G.layout('kk', dim=3) Xn = [layt[k][0] for k in range(N)] # x-coordinates of nodes Yn = [layt[k][1] for k in range(N)] # y-coordinates Zn = [layt[k][2] for k in range(N)] # z-coordinates Xe = [] Ye = [] Ze = [] for e in Edges: Xe += [layt[e[0]][0], layt[e[1]][0], None] # x-coordinates of edge ends Ye += [layt[e[0]][1], layt[e[1]][1], None] Ze += [layt[e[0]][2], layt[e[1]][2], None] # PArameters: trace1 = go.Scatter3d(x=Xe, y=Ye, z=Ze, mode='lines', line=dict(color='rgb(125,125,125)', width=1), hoverinfo='none') trace2 = go.Scatter3d(x=Xn, y=Yn, z=Zn, mode='markers', name='actors', marker=dict(symbol='circle', size=6, color=group, colorscale='Viridis', line=dict(color='rgb(50,50,50)', width=0.5)), text=group, hoverinfo='text') axis = dict(showbackground=False, showline=False, zeroline=False, showgrid=False, showticklabels=False, title='') layout = go.Layout( title=title, width=1000, height=1000, showlegend=False, scene=dict( xaxis=dict(axis), yaxis=dict(axis), zaxis=dict(axis), ), margin=dict(t=100), hovermode='closest', annotations=[ dict(showarrow=False, text="Data source: {}".format(title), xref='paper', yref='paper', x=0, y=0.1, xanchor='left', yanchor='bottom', font=dict(size=14)) ], ) data = [trace1, trace2] # plotly.offline.plot({'data': data, 'layout': layout}, # auto_open=True, image = 'png', image_filename='graph', # output_type='file', image_width=800, image_height=600, # filename='temp-plot.html', validate=False) dload = os.path.expanduser('~/Downloads') title_png = title + '.png' f_load = os.path.join(dload, title_png) f_save = os.path.join( '/Users/marcelogutierrez/Projects/Gamma/capsuleSans/diagrams', title_png) html_file = '{}.html'.format(title) plotly.offline.plot({ "data": data, "layout": layout }, image='png', filename=html_file, image_filename=title, auto_open=True) sleep(3) shutil.move(f_load, f_save)
def create_item_graph(mode='train'): """ Creates graph, whose vertices correspond to items. For each purchase, an edge is added from each searched item to the one that was bought. Edges may be repeated. """ """ Fetch data """ TRAIN_LINES = 413163 TEST_LINES = 177070 df = read_item_data() df['item_id'] = df.index dct_title = df['title'].to_dict() dct_domain = df['domain_id'].to_dict() dct_price = df['price'].to_dict() """ Ratio stuff """ from input.create_ratio import get_ratio dct_ratio_dom = get_ratio(which='domain_id') ratio_df = get_ratio(which='item_id', full=True) ratio_df['popularity'] = 100.0 * ratio_df['bought'] + ratio_df['searched'] dct_ratio_item_b = ratio_df['popularity'].to_dict() """ JSON """ if mode == 'train': check = lambda x: x <= np.round(413163 * 0.8).astype(np.int32) elif mode == 'val': check = lambda x: x > np.round(413163 * 0.8).astype(np.int32) else: check = lambda x: True DATA_PATH = path.join( DATA_DIR, 'test_dataset.jl' if mode == 'test' else 'train_dataset.jl') line_i = 0 """ Create graph vertices """ g = ig.Graph() counter, f_map_func, r_map_func = get_mappings() for k in dct_title.keys(): g.add_vertex(value=k, deg=dct_ratio_item_b[k], domain_id=dct_domain[k], price=dct_price[k], cat='item_id') """ ['item_id','domain_id','category_id','product_id'] """ for k in pd.unique(df['domain_id']): g.add_vertex(value=k, cat='domain_id') for k in pd.unique(df['category_id']): g.add_vertex(value=k, cat='category_id') for k in pd.unique(df['product_id']): g.add_vertex(value=k, cat='product_id') """ Create edges """ E1 = [] E2 = [] with jsonlines.open(DATA_PATH) as reader: for line_i, obj in enumerate(reader): if check(line_i): print(line_i) L = [] for h in obj['user_history']: if h['event_type'] == 'view': #print("Viewed {}".format(dct[h['event_info']])) L.append(h['event_info']) elif h['event_type'] == 'search': #print("Searched {}".format(h['event_info'])) pass L = pd.unique(L) #L_domain = [dct_domain[k] for k in L] for i in range(len(L)): E1.append(L[i]) E2.append(obj['item_bought']) E1 = f_map_func['item_id'](E1) E2 = f_map_func['item_id'](E2) E = list(zip(E1, E2)) g.add_edges(E) #g = g.as_undirected() g.write_pickle(fname=path.join(DATA_DIR, 'graph_domain_id.pkl'))
_sid, _did = int(line['sourceid']), int(line['dstid']) edge = (_sid, _did) if _sid < _did else (_did, _sid) if edge in weighted_edges: # consider multiple edges between the same two nodes total_weight, num_edges = weighted_edges[edge] weighted_edges[edge] = (float(line['mean_travel_time']) + total_weight, num_edges + 1) else: weighted_edges[edge] = (float(line['mean_travel_time']), 1) # merge duplicated edges by averaging their weights for edge in weighted_edges.keys(): total_weight, num_edges = weighted_edges[edge] weighted_edges[edge] = total_weight / num_edges g = ig.Graph( [e for e in weighted_edges.keys()], edge_attrs=dict(weight=[w for w in weighted_edges.values()])) # set vertex indices attribute to keep track of indices in later graph manipulations for index, vertex in enumerate(g.vs): vertex['index'] = index print("The graph has {0} vertices and {1} edges.".format( g.vcount(), g.ecount())) g_gcc = g.clusters().giant() pickle.dump(g_gcc, file_object) print("Graph and its GCC generated...") print("The Giant Connected Component has {0} vertices and {1} edges.".format( g_gcc.vcount(), g_gcc.ecount())) g_gcc_indices_lookup = { vertex['index']: i
def plotly_graph( kmgraph, graph_layout="kk", colorscale=default_colorscale, showscale=True, factor_size=3, edge_linecolor="rgb(180,180,180)", edge_linewidth=1.5, node_linecolor="rgb(255,255,255)", node_linewidth=1.0, ): """Generate Plotly data structures that represent the mapper graph Parameters ---------- kmgraph: dict representing the mapper graph, returned by the function get_mapper_graph() graph_layout: igraph layout; recommended 'kk' (kamada-kawai) or 'fr' (fruchterman-reingold) colorscale: a Plotly colorscale(colormap) to color graph nodes showscale: boolean to display or not the colorbar factor_size: a factor for the node size Returns ------- The plotly traces (dicts) representing the graph edges and nodes """ # define an igraph.Graph instance of n_nodes n_nodes = len(kmgraph["nodes"]) if n_nodes == 0: raise ValueError("Your graph has 0 nodes") G = ig.Graph(n=n_nodes) links = [(e["source"], e["target"]) for e in kmgraph["links"]] G.add_edges(links) layt = G.layout(graph_layout) hover_text = [node["name"] for node in kmgraph["nodes"]] color_vals = [node["color"] for node in kmgraph["nodes"]] node_size = np.array( [factor_size * node["size"] for node in kmgraph["nodes"]], dtype=np.int) Xn, Yn, Xe, Ye = _get_plotly_data(links, layt) edge_trace = dict( type="scatter", x=Xe, y=Ye, mode="lines", line=dict(color=edge_linecolor, width=edge_linewidth), hoverinfo="none", ) node_trace = dict( type="scatter", x=Xn, y=Yn, mode="markers", marker=dict( size=node_size.tolist(), color=color_vals, opacity=1.0, colorscale=colorscale, showscale=showscale, line=dict(color=node_linecolor, width=node_linewidth), colorbar=dict(thickness=20, ticklen=4, x=1.01, tickfont=dict(size=10)), ), text=hover_text, hoverinfo="text", ) return [edge_trace, node_trace]
args = parser.parse_args() if not args.prefix: args.prefix = os.path.splitext(os.path.basename(args.dataset))[0] # Load csv file dataset = np.loadtxt(sys.argv[1]) # Compute nearest neighbors print 'Building kd-tree index...' flann = FLANN() flann.build_index(dataset) # Create the state-space graph graph = ig.Graph(directed=args.directed) graph.add_vertices(np.alen(dataset)) def heat_kernel(i, j, dists, knn=None): return np.exp(-1*dists[i,j]/args.sigma) def locally_scaled_heat_kernel(i, j, dists, knn): return np.exp(-1*dists[i,j]/(np.sqrt(dists[i,args.local_scaling-1])*np.sqrt(dists[knn[i,j],args.local_scaling-1]))) if args.local_scaling: print 'Scaling locally' similarity = locally_scaled_heat_kernel else: similarity = heat_kernel if args.radius:
def build_igraph_from_pp(net, respect_switches=False, buses=None): """ This function uses the igraph library to create an igraph graph for a given pandapower network. Lines, transformers and switches are respected. Performance vs. networkx: https://graph-tool.skewed.de/performance :param net: pandapower network :type net: pandapowerNet :param respect_switches: if True, exclude edges for open switches (also lines that are \ connected via line switches) :type respect_switches: bool, default False :Example: graph, meshed, roots = build_igraph_from_pp(net) """ try: import igraph as ig except (DeprecationWarning, ImportError): raise ImportError("Please install python-igraph with " "`pip install python-igraph` or " "`conda install python-igraph` " "or from https://www.lfd.uci.edu/~gohlke/pythonlibs") g = ig.Graph(directed=True) bus_index = net.bus.index if buses is None else np.array(buses) nr_buses = len(bus_index) g.add_vertices(nr_buses) # g.vs["label"] = [s.encode('unicode-escape') for s in net.bus.name.tolist()] g.vs["label"] = list(bus_index) pp_bus_mapping = dict(list(zip(bus_index, list(range(nr_buses))))) if respect_switches: open_switches = ~net.switch.closed.values.astype(bool) # add lines mask = _get_element_mask_from_nodes(net, "line", ["from_bus", "to_bus"], buses) if respect_switches: mask &= _get_switch_mask(net, "line", "l", open_switches) for line in net.line[mask].itertuples(): g.add_edge(pp_bus_mapping[line.from_bus], pp_bus_mapping[line.to_bus], weight=line.length_km) # add trafos mask = _get_element_mask_from_nodes(net, "trafo", ["hv_bus", "lv_bus"], buses) if respect_switches: mask &= _get_switch_mask(net, "trafo", "t", open_switches) for trafo in net.trafo[mask].itertuples(): g.add_edge(pp_bus_mapping[trafo.hv_bus], pp_bus_mapping[trafo.lv_bus], weight=0.01) # add trafo3w mask = _get_element_mask_from_nodes(net, "trafo3w", ["hv_bus", "mv_bus", "lv_bus"], buses) if respect_switches: mask &= _get_switch_mask(net, "trafo3w", "t3", open_switches) for trafo3w in net.trafo3w[mask].itertuples(): g.add_edge(pp_bus_mapping[trafo3w.hv_bus], pp_bus_mapping[trafo3w.lv_bus], weight=0.01) g.add_edge(pp_bus_mapping[trafo3w.hv_bus], pp_bus_mapping[trafo3w.mv_bus], weight=0.01) # add switches mask = net.switch.et.values == "b" if respect_switches: mask &= ~open_switches for switch in net.switch[mask].itertuples(): g.add_edge(pp_bus_mapping[switch.element], pp_bus_mapping[switch.bus], weight=0.001) meshed = _igraph_meshed(g) roots = [ pp_bus_mapping[b] for b in net.ext_grid.bus.values if b in bus_index ] return g, meshed, roots # g, (not g.is_dag())
def compute_communities(self): '''Compute communities from a matrix with fixed nodes Returns: None, but SemiAnnotate.membership is set as an array with size N - n_fixed with the atlas cell types of all cells from the new dataset. ''' import inspect import igraph as ig import leidenalg # Check whether this version of Leiden has fixed nodes support opt = leidenalg.Optimiser() sig = inspect.getfullargspec(opt.optimise_partition) if 'fixed_nodes' not in sig.args: raise ImportError('This version of the leidenalg module does not support fixed nodes. Please update to a later (development) version') matrix = self.matrix sizes = self.sizes n_fixed = self.n_fixed clustering_metric = self.clustering_metric resolution_parameter = self.resolution_parameter neighbors = self.neighbors L, N = matrix.shape n_fixede = int(np.sum(sizes[:n_fixed])) Ne = int(np.sum(sizes)) # Construct graph from the lists of neighbors edges_d = set() for i, neis in enumerate(neighbors): for n in neis: edges_d.add(frozenset((i, n))) edges = [tuple(e) for e in edges_d] g = ig.Graph(n=N, edges=edges, directed=False) # NOTE: initial membership is singletons except for atlas nodes, which # get the membership they have. initial_membership = [] for isi in range(N): if isi < n_fixed: for ii in range(int(self.sizes[isi])): initial_membership.append(isi) else: initial_membership.append(isi) if len(initial_membership) != Ne: raise ValueError('initial_membership list has wrong length!') # Compute communities with semi-supervised Leiden if clustering_metric == 'cpm': partition = leidenalg.CPMVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) elif clustering_metric == 'modularity': partition = leidenalg.ModularityVertexPartition( g, resolution_parameter=resolution_parameter, initial_membership=initial_membership, ) else: raise ValueError( 'clustering_metric not understood: {:}'.format(clustering_metric)) fixed_nodes = [int(i < n_fixede) for i in range(Ne)] opt.optimise_partition(partition, fixed_nodes=fixed_nodes) membership = partition.membership[n_fixede:] # Convert the known cell types lstring = len(max(self.cell_types, key=len)) self.membership = np.array( [str(x) for x in membership], dtype='U{:}'.format(lstring)) for i, ct in enumerate(self.cell_types): self.membership[self.membership == str(i)] = ct
def networkx_to_igraph(G): mapping = dict(zip(G.nodes(), range(G.number_of_nodes()))) reverse_mapping = dict(zip(range(G.number_of_nodes()), G.nodes())) G = nx.relabel_nodes(G, mapping) G_ig = ig.Graph(len(G), list(zip(*list(zip(*nx.to_edgelist(G)))[:2]))) return G_ig, reverse_mapping
#------------------------------------------------------------------ # we will use three basic features: # number of overlapping words in title overlap_title = [] # temporal distance between the papers temp_diff = [] # number of common authors comm_auth = [] # author citation history cit_hist = [] auth_graph = igraph.Graph(directed=True) #------------------------------------------------------------------ # document similarity for abstract similarity = [] doc_similarity = features_TFIDF.dot(features_TFIDF.T) #------------------------------------------------------------------ # inverse shortest-path (to avoid distance = infinity) inverse_shortest_distances = [] #------------------------------------------------------------------ # keyword overlap overlap_keyword = [] def inverse_shortest_dist(g, source, target): try: return 1. / (len(nx.shortest_path(g, source=source, target=target)) +
for edge_idx in range(edge_attr.shape[0]): if edge_attr[edge_idx, 5] == 1: new_edge = tuple(edge_index[:, edge_idx].tolist()) if new_edge[0] not in node_to_colo: node_to_colo[new_edge[0]] = colo_node_iter colo_node_iter += 1 if new_edge[1] not in node_to_colo: node_to_colo[new_edge[1]] = colo_node_iter colo_node_iter += 1 colonial_edges.append(tuple([node_to_colo[new_edge[0]], node_to_colo[new_edge[1]]])) N = colo_node_iter G=ig.Graph(colonial_edges, directed=True) layt=G.layout(layout='kk') Xn=[layt[k][0] for k in range(N)]# x-coordinates of nodes Yn=[layt[k][1] for k in range(N)]# y-coordinates Xe=[] Ye=[] for e in colonial_edges: Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends Ye+=[layt[e[0]][1],layt[e[1]][1], None] trace1=go.Scatter(x=Xe, y=Ye, mode='lines',
st.subheader("val") if len(val) > 0: st.write( i.score(val.drop(y_label, axis=1), val[y_label])) else: st.write("No Data") st.subheader("test") st.write( i.score(test.drop(y_label, axis=1), test[y_label])) trained_models.append(i) elif option == "Network Graph": st.markdown("### Example ") # https://plotly.com/python/v3/igraph-networkx-comparison/ S = igraph.Graph(directed=True) S.add_vertices([1, 2, 3, 4, 5, 6, 7, 8, 10]) S.vs["id"] = [1, 2, 3, 4, 5, 6, 7, 8] S.vs["label"] = [1, 2, 3, 4, 5, 6, 7, 8] S.add_edges([(1, 2), (2, 3), (4, 5), (1, 6)]) # igraph.drawing.plot(S,'test.png',layout=S.layout_lgl()) # import matplotlib.pyplot as plt # fig,ax=plt.subplots() # igraph.plot(S,target=ax) out_png = igraph.drawing.plot( S, "temp.png", layout=S.layout_lgl()) out_png.save("temp.png") st.image("temp.png") if option2 == "Chain":
def main(df): global model, g texts = df.text.to_list() #1.Разбейте всю коллекцию отзывов на предложения. Лемматизируйте все слова. # функция для удаления стоп-слов mystopwords = stopwords.words('russian') + [ 'это', 'наш', 'тыс', 'млн', 'млрд', 'также', 'т', 'д', 'который', 'прошлый', 'сей', 'свой', 'мочь', 'в', 'я', '-', 'мой', 'ваш', 'и', '5' ] def remove_stopwords(text, mystopwords=mystopwords): try: return " ".join([ token for token in text.lower().split() if not token in mystopwords ]) except: return "" # функция лемматизации def lemmatize(text, morph=MorphAnalyzer()): try: lemmas = [ morph.parse(word)[0].normal_form for word in text.split() ] return ' '.join(lemmas) except: return "" pattern = re.compile('[а-яА-Я]+') def only_words(text, p=pattern): return ' '.join(p.findall(text)).strip() #разбиваем каждый текст на предложения и помещаем в один список sentences = [] pattern = re.compile('[а-яА-Я]+') for text in texts: text = lemmatize(remove_stopwords(text)) text_sentences = sent_tokenize(text) for sentence in text_sentences: sentences.append(pattern.findall(sentence)) model = Word2Vec(min_count=1) model.build_vocab(sentences) model.train(sentences, total_examples=model.corpus_count, epochs=model.iter) #определение ближайших слов result1 = model.wv.most_similar(positive="банк", topn=10) print('result1 = ', result1) #аналогии result2 = model.wv.most_similar(positive=['кредит', 'вклад'], negative=['долг']) print('result2 = ', result2) #определение лишнего слова result3 = model.wv.doesnt_match("банк перевод счет отделение".split()) print('result3 = ', result3) #Полученные результаты (AMOUNT=10000) #result1 = [('клик', 0.655503511428833), ('банком', 0.6381771564483643), ('банка', 0.5996867418289185), ('мобайл', 0.5682080984115601), ('банку', 0.5554714202880859), ('клике', 0.5553926229476929), ('клика', 0.5493252873420715), ('беларусь', 0.545136570930481), ('банке', 0.5433052778244019), ('терроризирует', 0.5427347421646118)] #result2 = [('депозит', 0.7282594442367554), ('посочувствовали', 0.6706955432891846), ('вклада', 0.6341916918754578), ('автокопилку', 0.6184648871421814), ('депозита', 0.6164340972900391), ('вклады', 0.6158702373504639), ('преддефолтный', 0.6100568771362305), ('баррикадной', 0.6076372861862183), ('ргают', 0.6062372922897339), ('потребкредить', 0.5898075103759766)] #result3 = отделение df['text_without_stopwords'] = df.text.apply(remove_stopwords) df['lemmas'] = df['text_without_stopwords'].apply(lemmatize) df['lemmas'] = df['lemmas'].apply(remove_stopwords) vectors = TfidfVectorizer(max_features=500).fit_transform( df['lemmas'][:AMOUNT]) X_reduced = TruncatedSVD(n_components=5, random_state=40).fit_transform(vectors) X_embedded = TSNE(n_components=2, perplexity=5, verbose=0).fit_transform(X_reduced) vis_df = pd.DataFrame({ 'X': X_embedded[:200, 0], 'Y': X_embedded[:200, 1], 'topic': df.title[:200] }) #визуализация TSNE g = sns.FacetGrid(vis_df, hue="topic", size=10).map(plt.scatter, "X", "Y").add_legend() g.savefig("tsne.png") #красный цвет - проблемы с он-лайн обслуживанием, зеленый цвет - отказ банка #визуализация банков на плоскости v1 = model['хороший'] - model['плохой'] v2 = model['быстрый'] - model['медленный'] banks = [ 'сбербанк', 'втб', 'тинькофф', 'россельхозбанк', 'росбанк', 'авангард', 'ситибанк', 'альфабанк' ] banks_x = [] banks_y = [] for bank in banks: banks_x.append(np.dot(v1, model[bank])) banks_y.append(np.dot(v2, model[bank])) fig, ax = plt.subplots() ax.scatter(banks_x, banks_y) for i, txt in enumerate(banks): ax.annotate(txt, (banks_x[i], banks_y[i])) ax.set(xlabel='плохо-хорошо', ylabel='медленно-быстро') fig.savefig('plane.png') #plt.show() # пример построения графа keys = list(model.wv.vocab.keys())[:AMOUNT] g = ig.Graph(directed=True) labels = [] fixes = [] weights = [] positive_words = [ 'любезно', 'готовый', 'хороший', 'уважаемый', 'положительный', 'выбор' ] negative_words = [ 'беспокоить', 'достает', 'неважно', 'неграмотность', 'никак', 'просрочить' ] for word in keys: label = -1 #непомеченные слова fix = False if word in positive_words: label = 1 #положительная метка fix = True if word in negative_words: label = 0 #отрицательная метка fix = True labels.append(label) fixes.append(True) g.add_vertex(word) for word in keys: node = g.vs.select(name=word).indices[0] similar_words = model.most_similar(word, topn=10) for sim in similar_words: try: word1 = sim[0] val = sim[1] new_node = g.vs.select(name=word1).indices[0] g.add_edge(node, new_node, weight=val) weights.append(val) except Exception as err: print('Error', err) m = g.community_label_propagation(initial=labels, weights=weights, fixed=fixes) print('membership = ', m.membership) # массив меток слов print('labels = ', labels) print('weights = ', weights) print('len weights = ', len(weights)) return m
def write_stats(stats, file_obj): """Writes a dictionary of statistics as an igraph graph.""" stats_graph = igraph.Graph() for k, v in stats.items(): stats_graph[k] = v write_graph(stats_graph, file_obj)
# -*- coding: utf-8 -*- import igraph as gr g = gr.Graph() file = open("dic.txt", "r") dic = str(file.read()) code = compile(dic, '<string>', 'exec') exec(code) #print (dic['\xd9\x85\xd8\xac\xdb\x8c\xd8\xaf \xd8\xa8\xd8\xb1\xd8\xb2\xda\xaf\xd8\xb1']) #print (dic["مسعود کیمیایی"][9]) actors = [] f = open("r.txt", "a") for i in dic.keys(): g.add_vertex(i) for j in dic[i]: if j not in actors: g.add_vertex(j) actors.append(j) eid = g.get_eid(i, j, error=False) if (eid != -1): g.es[eid]["weight"] += 1 # f.write(str(i.encode("utf-8"))+" "+str(j.encode("utf-8"))) # f.write("\n") # print(i, " ", j) else:
def load_citation_network_igraph(data_dir, court_name, directed=True): jurisdictions = pd.read_csv(data_dir + 'clean/jurisdictions.csv', index_col='abbrev') all_courts = set(jurisdictions.index) if not((court_name in all_courts) or (court_name == 'all')): raise ValueError('invalid court_name') start = time.time() if court_name == 'all': case_metadata = pd.read_csv(data_dir + 'clean/case_metadata_master.csv') edgelist = pd.read_csv(data_dir + 'clean/edgelist_master.csv') else: net_dir = data_dir + 'clean/' + court_name + '/' if not os.path.exists(net_dir): os.makedirs(net_dir) make_court_subnetwork(court_name, data_dir) case_metadata = pd.read_csv(net_dir + 'case_metadata.csv') edgelist = pd.read_csv(net_dir + 'edgelist.csv') edgelist.drop('Unnamed: 0', inplace=True, axis=1) # create a dictonary that maps court listener ids to igraph ids cl_to_ig_id = {} cl_ids = case_metadata['id'].tolist() for i in range(case_metadata['id'].size): cl_to_ig_id[cl_ids[i]] = i # add nodes V = case_metadata.shape[0] g = ig.Graph(n=V, directed=directed) # g.vs['date'] = case_metadata['date'].tolist() g.vs['name'] = case_metadata['id'].tolist() # create igraph edgelist cases_w_metadata = set(cl_to_ig_id.keys()) ig_edgelist = [] missing_cases = 0 start = time.time() for row in edgelist.itertuples(): cl_ing = row[1] cl_ed = row[2] if (cl_ing in cases_w_metadata) and (cl_ed in cases_w_metadata): ing = cl_to_ig_id[cl_ing] ed = cl_to_ig_id[cl_ed] else: missing_cases += 0 ig_edgelist.append((ing, ed)) # add edges to graph g.add_edges(ig_edgelist) # add vertex attributes g.vs['court'] = case_metadata['court'].tolist() g.vs['year'] = [int(d.split('-')[0]) for d in case_metadata['date'].tolist()] end = time.time() g.simplify(multiple=True) return g
f.close() # 获取数据规模 num_nodes = len(data['nodes']) num_edges = len(data['links']) print('节点数:', num_nodes, '连接数:', num_edges) # 读取节点姓名,连接及权重信息 edges = [(data['links'][k]['source'], data['links'][k]['target']) for k in range(num_edges)] weights = [(data['links'][k]['value']) for k in range(num_edges)] edges_weights = [(data['links'][k]['source'], data['links'][k]['target'], data['links'][k]['value']) for k in range(num_edges)] # 建立一个Graph,并添加节点,连接信息 G_ig = ig.Graph() G_ig.add_vertices(num_nodes) G_ig = ig.Graph(edges, directed=False) #开始社区划分 result, num_comm = comm_detection(g=G_ig, algorithm=k_algorithm, weights_input=weights) print(len(result[0]), len(result[1]), len(result[2])) #开始多次聚类 result = multiple_clustering(comm=result, edge_weight=edges_weights, max_n=max_nodes) print(len(result[0]), len(result[1]), len(result[2])) #开始簇团信息的分类,簇团间和簇团内以及所有连接信息 edges_inside, edges_outside, edges_comm = edges_sort(comm=result, links=edges) # 建立一个Graph,并添加节点及簇团内的连接信息
import igraph #print igraph.__version__ g = igraph.Graph() #34 Members of the club g.add_vertices(34) #Connections from the first matrix g.add_edges([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8)]) g.add_edges([(0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21), (0, 31)]) g.add_edges([ (1, 2), (1, 3), (1, 7), (1, 13), (1, 17), (1, 19), (1, 21), (1, 30), ]) g.add_edges([(2, 3), (2, 7), (2, 8), (2, 9), (2, 13), (2, 27), (2, 28), (2, 32)]) g.add_edges([(3, 7), (3, 12), (3, 13)]) g.add_edges([(4, 6), (4, 10)]) g.add_edges([(5, 6), (5, 10), (5, 16)]) g.add_edges([(6, 16)]) g.add_edges([(8, 30), (8, 32), (8, 33)]) g.add_edges([(9, 33)]) g.add_edges([(13, 33)])
""" Created on Wed Apr 18 15:26:11 2018 @author: Jinglin """ import csv import igraph import numpy import matplotlib.pyplot as plt import louvain import pylab as pl import time graph9bus=igraph.Graph() with open('9busnode.csv','rb') as csvfileNode: csvreaderNode=csv.reader(csvfileNode) mycsvNode=list(csvreaderNode) for row in mycsvNode: graph9bus.add_vertex(name=row[0]) nodeNumber=graph9bus.vcount() SVQ=numpy.zeros((nodeNumber,nodeNumber)) with open('9busbranch.csv','rb') as csvfileBranch: csvreaderBranch=csv.reader(csvfileBranch) mycsvBranch=list(csvreaderBranch) for row in mycsvBranch: B=(1/complex(float(row[2]),float(row[3]))).imag
#1.4 a feladat megoldása def component(network, node): vizsgalt = [] def magic(node_list): for i in node_list: if i not in vizsgalt: vizsgalt.append(i) magic(network.neighbors(i)) magic([node]) return vizsgalt #1.4 b feladat megoldása def delta(network, node_list): cc = network.subgraph(node_list) return cc.ecount() - cc.vcount() if __name__ == "__main__": import igraph print "A feladat: A megadott csúcs-al rendelkezõ komponens összes csúcsának listája:" net = igraph.Graph(8, directed = False) net.add_edges([(0,1),(1,2),(2,3),(3,4),(3,5),(6,7)]) node = 2 comp = component(net, node) print comp print "B feladat: A megadott csúcsok alapján létrejött komponens éleinek számát kivonjuk a csúcsok számából:" #net2 = igraph.Graph.Erdos_Renyi(20,0.08) #nodes= [3,5,15,18,11,6] print delta(net, comp)
def from_graph6(graph6_str): "graph6 ascii-encoded bytes to igraph" nx_graph = networkx.readwrite.from_graph6_bytes( b">>graph6<<" + graph6_str.strip().encode("ascii")) edges = list(nx_graph.edges) return igraph.Graph(edges)