def visualize_related_nodes(path, tag, outp_title): """Construct the knowledge graph limited to the ancestors and descendants of a given node. Useful for understanding the bottleneck scores. path -- the location of the content, e.g. knoweldge-maps/content tag -- the tag to find ancestors and descendants of dot_file -- the output .dot file svg_file -- the output .svg file """ dot_file = '%s.dot' % outp_title svg_file = '%s.svg' % outp_title json_file = '%s.json' % outp_title nodes = formats.read_nodes(path) # TODO should we always read all nodes? nodes = graphs.remove_missing_links(nodes) graph = graphs.Graph.from_node_dependencies(nodes) ancestors = graphs.ancestors_set(nodes, graph, tag) descendants = graphs.descendants_set(nodes, graph, tag) relevant = set([tag]).union(ancestors).union(descendants) nodes = {tag: node for tag, node in nodes.items() if tag in relevant} nodes = graphs.remove_missing_links(nodes) graph = graphs.Graph.from_node_dependencies(nodes) formats.write_graph_dot(nodes, graph, open(dot_file, 'w')) os.system('dot -Tsvg %s -o %s' % (dot_file, svg_file)) formats.write_graph_json(nodes, graph, open(json_file, 'w'))
def generate_full_graph(path, outp_title): """Construct the full knowledge graph. path -- the location of the content, e.g. knoweldge-maps/content dot_file -- the output .dot file svg_file -- the output .svg file """ dot_file = '%s.dot' % outp_title svg_file = '%s.svg' % outp_title json_file = '%s.json' % outp_title nodes = formats.read_nodes(path) nodes = graphs.remove_missing_links(nodes) graph = graphs.Graph.from_node_dependencies(nodes) formats.write_graph_dot(nodes, graph, open(dot_file, 'w')) os.system('dot -Tsvg %s -o %s' % (dot_file, svg_file)) formats.write_graph_json(nodes, graph, open(json_file, 'w'))
urlparams = '?action=query&redirects&prop=extracts&exintro&explaintext&exsectionformat=plain&exsentences=1&format=xml&titles=%s'\ % urllib.quote_plus(node_title) rquest = urllib2.Request(wiki_ep + urlparams) xmlresp = parseXML(urllib2.urlopen(rquest)) extxt = xmlresp.getElementsByTagName('extract') if len(extxt): summary = extxt[0].firstChild.wholeText.replace('\n',' ') else: summary = '' # cache the wiki summary temp, tag = os.path.split(node_dir) content_path, _ = os.path.split(temp) wiki_summary_file = formats.wiki_summary_file(content_path, tag) with open(wiki_summary_file, 'w') as wikif: wikif.write(summary.encode('utf-8')) if __name__=="__main__": nodes = formats.read_nodes(config.CONTENT_PATH) for node_tag in nodes: node = nodes[node_tag] node_dir = os.path.join(config.CONTENT_PATH, 'nodes', node.tag) summary_file = formats.summary_file(config.CONTENT_PATH, node.tag) wiki_summary_file = formats.wiki_summary_file(config.CONTENT_PATH, node.tag) if not os.path.exists(summary_file) and not os.path.exists(wiki_summary_file): if node.title: ttl = node.title else: ttl = node.tag write_wiki_summary(ttl, node_dir)