def __init__(self, graph, outfile): self.outfile = outfile self.graph = graph self.network = nx.MultiDiGraph() edge_labels = set() # iterate through rdf triples and add to the network graph # NOTE: could also iterate through the graph by contexts... for cx in self.graph.contexts(): for triple in cx.triples((None, None, None)): subj, pred, obj = triple # NOTE: skipping rdf sequences here because treating # as normal triples makes for weird results if pred == rdflib.RDF.first or pred == rdflib.RDF.rest: continue # make sure subject and object are added to the graph as nodes, # if appropriate self._add_nodes(triple) # get the short-hand name for property or edge label name = self._edge_label(pred) # if the object is a literal, add it to the node as a property of the subject if subj in self.network and isinstance(obj, rdflib.Literal) \ or pred == rdflib.RDF.type: if pred == rdflib.RDF.type: ns, val = rdflib.namespace.split_uri(obj) # special case (for now) if val == 'Manuscript' and isinstance(cx.value(subj, rdfns.DC.title), rdflib.BNode): val = 'BelfastGroupSheet' else: val = unicode(obj) self.network.node[self._uri_to_node_id(subj)][name] = normalize_whitespace(val) # otherwise, add an edge between the two resource nodes else: # NOTE: gephi doesn't support multiple edges, and # the d3/json output probably elides them also. # Consider instead: if an edge already exists, # add to the strength of the exesting edge edge_labels.add(name) self.network.add_edge(self._uri_to_node_id(subj), self._uri_to_node_id(obj), label=name, weight=connection_weights.get(name, 1)) print '%d nodes, %d edges in full network' % \ (self.network.number_of_nodes(), self.network.number_of_edges()) # TODO: useful for verbose output? (also report on relations with no weight?) #print 'edge labels: %s' % ', '.join(edge_labels) gexf.write_gexf(self.network, self.outfile)
def _node_label(self, res): # NOTE: consider adding/calculating a preferredlabel # for important nodes in our data name = None # *first* use preferred label if available names = self.graph.preferredLabel(res) # returns list of labelprop (preflabel or label), value # if we got any matches, grab the first value if names: name = names[0][1] # second check for schema.org name, if we have one if not name: name = self.graph.value(res, rdfns.SCHEMA_ORG.name) if name: return normalize_whitespace(name) title = self.graph.value(res, rdfns.DC.title) if title: # if title is a bnode, convert from list/collection if isinstance(title, rdflib.BNode): title_list = RdfCollection(self.graph, title) title = 'Group sheet: ' + '; '.join(title_list) # truncate list if too long if len(title) > 50: title = title[:50] + ' ...' # otherwise, title should be a literal (no conversion needed) return normalize_whitespace(title) # as a fall-back, use type for a label type = self.graph.value(res, rdflib.RDF.type) if type: ns, short_type = rdflib.namespace.split_uri(type) return short_type