def dump(nodes, weighted_edges, white, black, flatten_colours, flatten_weights): graph = DiGraph() for node in nodes: graph.add_node(node.id, label=node.name) (min_weight, max_weight) = log_range('Weight', map(lambda we: we[0], weighted_edges)) if flatten_colours or flatten_weights: flattened_weights = flatten( dict((weight, weight) for (weight, edge) in weighted_edges)) for (weight, edge) in weighted_edges: colour = flattened_weights[weight] if flatten_colours else ( weight - min_weight) / (max_weight - min_weight) weight = flattened_weights[weight] if flatten_weights else weight graph.add_edge(edge.from_.id, edge.to_.id, weight=weight, viz=rgb(colour, white, black)) #write_graphml(graph, 'uykfe.graphml') write_gexf(graph, 'uykfe.gexf') # fix namespace bug with open('uykfe.gexf') as input: xml = input.read() xml = xml.replace('xmlns:viz="http://www.gexf.net/1.1draft/viz" ', '', 1) with open('uykfe.gexf', 'w') as output: xml = output.write(xml)
def __init__(self, graph, outfile): self.outfile = outfile self.graph = graph self.network = nx.MultiDiGraph() edge_labels = set() # iterate through rdf triples and add to the network graph # NOTE: could also iterate through the graph by contexts... for cx in self.graph.contexts(): for triple in cx.triples((None, None, None)): subj, pred, obj = triple # NOTE: skipping rdf sequences here because treating # as normal triples makes for weird results if pred == rdflib.RDF.first or pred == rdflib.RDF.rest: continue # make sure subject and object are added to the graph as nodes, # if appropriate self._add_nodes(triple) # get the short-hand name for property or edge label name = self._edge_label(pred) # if the object is a literal, add it to the node as a property of the subject if subj in self.network and isinstance(obj, rdflib.Literal) \ or pred == rdflib.RDF.type: if pred == rdflib.RDF.type: ns, val = rdflib.namespace.split_uri(obj) # special case (for now) if val == 'Manuscript' and isinstance(cx.value(subj, rdfns.DC.title), rdflib.BNode): val = 'BelfastGroupSheet' else: val = unicode(obj) self.network.node[self._uri_to_node_id(subj)][name] = normalize_whitespace(val) # otherwise, add an edge between the two resource nodes else: # NOTE: gephi doesn't support multiple edges, and # the d3/json output probably elides them also. # Consider instead: if an edge already exists, # add to the strength of the exesting edge edge_labels.add(name) self.network.add_edge(self._uri_to_node_id(subj), self._uri_to_node_id(obj), label=name, weight=connection_weights.get(name, 1)) print '%d nodes, %d edges in full network' % \ (self.network.number_of_nodes(), self.network.number_of_edges()) # TODO: useful for verbose output? (also report on relations with no weight?) #print 'edge labels: %s' % ', '.join(edge_labels) gexf.write_gexf(self.network, self.outfile)
def full_gexf(request): """Generate the same Belfast Group network data exposed in :meth:`full_js` in the GEXF format, for download and use in tools like Gephi.""" graph = _network_graph() buf = StringIO() gexf.write_gexf(graph, buf) response = HttpResponse(buf.getvalue(), content_type="application/gexf+xml") response["Content-Disposition"] = "attachment; filename=belfastgroup.gexf" return response
def __init__(self, files, outfile): self.outfile = outfile self.graph = rdflib.Graph() for infile in files: self.graph.parse(infile) print '%d triples in %d files' % (len(self.graph), len(files)) self.network = nx.MultiDiGraph() edge_labels = set() # iterate through rdf triples and add to the graph for triple in self.graph: subj, pred, obj = triple if pred == rdflib.RDF.first or pred == rdflib.RDF.rest: continue # FIXME: iterating through all triples results in # rdf sequences (first/rest) being handled weirdly... # make sure subject and object are added to the graph as nodes, # if appropriate self._add_nodes(triple) # get the short-hand name for property or edge label name = self._edge_label(pred) # if the object is a literal, add it to the node as a property of the subject if subj in self.network and isinstance(obj, rdflib.Literal) \ or pred == rdflib.RDF.type: if pred == rdflib.RDF.type: ns, val = rdflib.namespace.split_uri(obj) # special case (for now) if val == 'Manuscript': if isinstance(self.graph.value(subj, DC.title), rdflib.BNode): val = 'BelfastGroupSheet' else: val = unicode(obj) self.network.node[subj][name] = val # otherwise, add an edge between the two resource nodes else: edge_labels.add(name) self.network.add_edge(subj, obj, label=name, weight=connection_weights.get(name, 1)) print '%d nodes, %d edges' % (self.network.number_of_nodes(), self.network.number_of_edges()) # TODO: useful for verbose output? (also report on relations with no weight?) #print 'edge labels: %s' % ', '.join(edge_labels) gexf.write_gexf(self.network, self.outfile)
def write_data(maindir, nxgraph): """ Writes data from networkx graph to .json and .gexf file formats for rendering by other apps. :param maindir: Scrape directory, with subdirectories for each domain :param nxgraph: The networkx graph to be populated :return: None """ data = json_graph.node_link_data(nxgraph) # json formatted data filename = maindir.split('/')[-2] # just the Scrape-directory filename os.chdir(start) # Back to /Grapher with open('%s_graph.json' % filename, 'w') as w: json.dump(data, w) gexf.write_gexf(nxgraph, '%s_graph.gexf' % filename)
def gexf_content(request, mode): """Make network data available as GEXF files for download and use in tools like Gephi.""" if mode == "all": graph = network_data() elif mode == "group-people": # filtered graph of people/places/organizations used for # first BG network graph displayed on the site # - same data used in :meth:`full_js` graph = _network_graph() elif mode == "groupsheets": graph = gexf.read_gexf(settings.GEXF_DATA["bg1"]) buf = StringIO() gexf.write_gexf(graph, buf) response = HttpResponse(buf.getvalue(), content_type="application/gexf+xml") response["Content-Disposition"] = "attachment; filename=belfastgroup-%s.gexf" % mode return response
def export_graph(graph, save_path=_get_module_root_dir(), no_verbose=False): if not save_path or not graph: if not no_verbose: print('Failed to save graph:\nsave_path =', save_path, '\ngraph =', graph) return if not save_path.endswith('/'): save_path += '/' graph = _ensure_right_type(graph) if graph is None: if not no_verbose: print( 'Failed to export graph. graph should be a dict or networkx.classes.digraph.DiGraph', graph) return try: gexf.write_gexf(graph, save_path + 'graph.gexf') except IOError as e: if not no_verbose: print('Failed to export graph: ', e.reason)
def draw_graph(previous_samples, results, session): g = nx.Graph() g.add_nodes_from(previous_samples) labels = {} for sha256, analysis_id, file_name in results: response = session.get( BASE_URL + '/analyses/{}/sub-analyses/root/code-reuse'.format(analysis_id)) response.raise_for_status() gene_count = response.json()['gene_count'] labels[sha256] = '{} ({})'.format(file_name, gene_count) for sha256, (related_samples) in previous_samples.items(): for analysis in related_samples: if analysis['analysis']['sha256'] in previous_samples: g.add_edge(sha256, analysis['analysis']['sha256'], gene_count=analysis['reused_genes']['gene_count']) gexf.write_gexf(g, 'output.gexf') print('graph was saved as output.gexf')
def dump(nodes, weighted_edges, white, black, flatten_colours, flatten_weights): graph = DiGraph() for node in nodes: graph.add_node(node.id, label=node.name) (min_weight, max_weight) = log_range('Weight', map(lambda we: we[0], weighted_edges)) if flatten_colours or flatten_weights: flattened_weights = flatten(dict((weight, weight) for (weight, edge) in weighted_edges)) for (weight, edge) in weighted_edges: colour = flattened_weights[weight] if flatten_colours else (weight - min_weight) / (max_weight - min_weight) weight = flattened_weights[weight] if flatten_weights else weight graph.add_edge(edge.from_.id, edge.to_.id, weight=weight, viz=rgb(colour, white, black)) #write_graphml(graph, 'uykfe.graphml') write_gexf(graph, 'uykfe.gexf') # fix namespace bug with open('uykfe.gexf') as input: xml = input.read() xml = xml.replace('xmlns:viz="http://www.gexf.net/1.1draft/viz" ', '', 1) with open('uykfe.gexf', 'w') as output: xml = output.write(xml)
def export_gexf(G, path): gexf.write_gexf(G, path)
def write(fh, graph, format=None): from networkx.readwrite import gexf gexf.write_gexf(graph, fh)
def __init__(self, graph, outfile): self.outfile = outfile self.graph = graph self.network = nx.Graph() for bg in self.bg_nodes: self.network.add_node(bg, label=bg, type='Organization') # assert the two phases are connected to each other: # self.edge_weights[(self.bg_nodes[0], self.bg_nodes[1])] += 1 ms = set(list(graph.subjects(predicate=rdflib.RDF.type, object=rdfns.BG.GroupSheet))) for m in ms: coverage = graph.value(subject=m, predicate=rdfns.DC.coverage) bg_period = '%s, %s' % (self.bg_label, coverage) if bg_period not in self.bg_nodes: print 'Error: coverage %s doesn\'t map to a recognized Belfast Group period' % coverage continue authors = list(graph.objects(subject=m, predicate=rdfns.DC.creator)) for i, a in enumerate(authors): author_id = str(a) # stringify author uri # if not in the network, add it if author_id not in self.network: name = None # use preferred label instead if possible names = graph.preferredLabel(a) # returns list of labelprop (preflabel or label), value # if we got any matches, grab the first value if names: name = names[0][1] if not name: name = graph.value(a, rdfns.SCHEMA_ORG.name) self.network.add_node(author_id, # label=graph.value(a, rdfns.SCHEMA_ORG.name), label=name, type='Person') # increase connection weight by one for each groupsheet self.edge_weights[(author_id, bg_period)] += 0.4 # make connection between co-authors if len(authors) > (i + 1): for co_author in authors[i+1:]: self.edge_weights[(author_id, str(co_author))] += 0.2 # groupsheet owners are also associated with the group of the same period # and the groupsheet authors owners = list(graph.subjects(predicate=rdfns.SCHEMA_ORG.owns, object=m)) for i, o in enumerate(owners): # same basic logic as for owners owner_id = str(o) if owner_id not in self.network: # use preferred label if available; otherwise, use name names = graph.preferredLabel(o) # returns list of labelprop (preflabel or label), value # if we got any matches, grab the first value if names: name = names[0][1] if not name: name = graph.value(o, rdfns.SCHEMA_ORG.name) self.network.add_node(owner_id, label=name, type='Person') # increase connection weight by one for each groupsheet self.edge_weights[(owner_id, bg_period)] += 0.2 # connected to groupsheet authors for auth in authors: self.edge_weights[(owner_id, str(auth))] += 0.2 # connected to other groupsheet owners if len(owners) > (i + 1): for co_owner in owners[i+1:]: self.edge_weights[(owner_id, str(co_owner))] += 0.2 # convert dict into list of tuple that can be easily added to the network graph edge_bunch = [(s, t, w) for (s, t), w in self.edge_weights.iteritems()] self.network.add_weighted_edges_from(edge_bunch) print '%d nodes, %d edges in Belfast Group network based on groupsheets' \ % (self.network.number_of_nodes(), self.network.number_of_edges()) gexf.write_gexf(self.network, self.outfile)
def save_graph(G, filepath): gexf.write_gexf(G, filepath)
def export_to_gexf(self, filename=None): from networkx.readwrite.gexf import write_gexf filename = filename or (self.filename + '.gexf') write_gexf(self.dep_graph, filename)
def export_to_gexf(self,fname): write_gexf(self.G,fname)
def export_to_gexf(self, fname): write_gexf(self.G, fname)
# Merge duplicate author names authors = [(author, HumanName(author)) for author in solar_coauthorship.nodes] lnfi = defaultdict(list) # Last name, first initial for author, parsed in authors: if len(parsed.first) > 0: lnfi[parsed.last, parsed.first[0]].append((author, parsed)) for (last_name, first_initial), group in lnfi.items(): # Filter non-initials only filtered_group = [(author, parsed) for author, parsed in group if len(parsed.first.rstrip(".")) > 1] if len(filtered_group) >= 1: # Sort by descending degree sorted_group = sorted( filtered_group, key=lambda g: nx.degree(solar_coauthorship, g[0]), reverse=True, ) first_name = sorted_group[0][1].first for alt_author, alt_parsed in sorted_group[1:]: if SequenceMatcher(None, first_name, alt_parsed.first).ratio() >= 0.8: alt_parsed.first = first_name else: print( f"Warning: Multiple first name candidates for last name " f"{last_name} and first initial {first_initial}: {first_name} " f"and {alt_parsed.first}") write_gexf(solar_coauthorship, "solar/solar_coauthorship.gexf.gz")
G_start = G_betweenness_gt_0.copy() df_decompose_g = pd.DataFrame({ # 'iteration': [],| # 'tag': [], # 'betweeness': [], # 'nodes': [], # 'edges': [], # 'filename': [] }) df_decompose_g = df_decompose_g.append(make_row(-1, 'start', 0, G_start), ignore_index=True) gexf.write_gexf(G_start, df_decompose_g.iloc[0, :]['filename']) # %% k = 500 for i in tqdm.tqdm(range(k)): temp_betweenness_df = df_from_betweeness(G_start) tag_str, betweenness_value = temp_betweenness_df.iloc[0, :].tag, temp_betweenness_df.iloc[0, :].betweenness df_decompose_g = df_decompose_g.append(make_row(i, tag_str, betweenness_value, G_start), ignore_index=True) print(f"{i:02d}-{tag_str}: {betweenness_value} >>>> {df_decompose_g.iloc[i+1, :]['filename']}") G_start.remove_node(tag_str) # print(df_decompose_g.iloc[i+1,:]['filename']) gexf.write_gexf(G_start, df_decompose_g.iloc[i+1,:]['filename']) df_decompose_g.to_csv('3mj/decomposed_3mj.csv') # %% df_decompose_g.edges.plot.line()
def save_gexf(self, pth): write_gexf(G=self.g, path=pth, prettyprint=True)
import pandas as pd import networkx as nx from networkx.readwrite.gexf import write_gexf df = pd.read_pickle('sp500.pkl') G = nx.Graph() for row in df.itertuples(): G.add_node(row.symbol, type='company') G.add_node(row.Name, type='officer') G.add_edge(row.symbol, row.Name) write_gexf(G, 'graph.gexf')
def export_to_gexf(self, filename): write_gexf(self.graph, filename)