import sys import urllib import json from wekeypedia.wikipedia_page import WikipediaPage as Page, url2title, url2lang from wekeypedia.wikipedia_network import WikipediaNetwork from wekeypedia.exporter.nx_json import NetworkxJson source = "List_of_geometry_topics" hl = WikipediaNetwork("") hl.keywords = hl.get_page_links(source) hl.build() exporter = NetworkxJson(hl.graph) exporter.nx_export("adjacency", "_data/network.json") # print hl.keywords print "" def fetch_page(source): print "📄 fetching: %s" % source.encode('utf-8-sig') p = Page() r = p.fetch_from_api_title(source.strip(), { "redirects":"true", "rvparse" : "true", "prop": "info|revisions", "inprop": "url", "rvprop": "content" }) with open("path_points/%s.json" % (source), "w") as f: json.dump(r, f) with codecs.open("_path_points/%s.md" % (source), "w", "utf-8-sig") as file:
da_list = [] da_list = map(parse_source_line, open(da_source, "r")) return da_list def store_mapping(mapping): m = Mapping(mapping) m.csv("%s.mapping.csv" % (sys.argv[1])) def get_wikipedia_network_structure(concepts): wkn = WikipediaNetwork(concepts) wkn.build() store_mapping(wkn.mapping) return wkn.graph if len(sys.argv) > 1: source = sys.argv[1] concepts = open_source_list(source) graph = get_wikipedia_network_structure(concepts) for export_type in ["node_link", "adjacency"]: target_name = "%s.%s.json" % (source, export_type) exporter = NetworkxJson(graph) exporter.nx_export(export_type, target_name)