def link_politician(graph, key): # Take the winner of every election and try to link them to a /government/politician on # Freebase. We only take winners because we're unlikely to find candidates who didn't win under # the /government/politician category. # To have better matching information, we compute the election years and division that this # candidate won. This is the reason for this big groupby thingy. query = """select distinct ?candidate ?candidateName ?divisionName ?year where { ?run rdf:type type:ProvincialCandidateRun . ?run prop:election ?elec . ?elec prop:generalElection ?genelec . ?genelec prop:year ?year . ?elec prop:division ?division . ?division dc:description ?divisionName . ?run prop:runningCandidate ?candidate . ?candidate foaf:name ?candidateName . ?run prop:won true . } """ res = sorted(graph.query(query, initNs={'rdf': RDF, 'type': ns_type, 'prop': ns_property, 'dc': DC, 'foaf': FOAF})) for candidate, runs in groupby(res, lambda t: t[0]): runs = list(runs) # all runs have the same candidate name, we use the first. candidate_name = runs[0][1] runs_as_str = ', '.join("%s (%s)" % (dn, y) for c, cn, dn, y in runs) print("Looking for %s, who won election(s) in: %s" % (candidate_name, runs_as_str)) query = [{ 'mid': None, 'id': None, 'name': candidate_name, 'type': '/government/politician', }] fbres = query_freebase(query, key) to_link = confirm_result(fbres) if to_link is not None: graph.add((candidate, OWL.sameAs, to_link))
def link_genelection(graph, key): # Takes every election (subject of type ProvincialGeneralElection) and try to link them to a # /government/general_election of the same name on Freebase. query = """select distinct ?node ?year where { ?node rdf:type type:ProvincialGeneralElection . ?node prop:year ?year . } """ res = graph.query(query, initNs={'rdf': RDF, 'type': ns_type, 'prop': ns_property}) for node, year in res: existing_links = list(graph.triples((node, OWL.sameAs, None))) if existing_links: print("Year %s is already linked to %s. Skipping." % (year, existing_links[0][2])) continue print("Attempting to link elections for year %s" % year) query = [{ 'mid': None, 'id': None, 'name': None, 'type': '/government/general_election', 'a:name~=': "election", 'b:name~=': "quebec", 'c:name~=': str(year), }] fbres = query_freebase(query, key) to_link = confirm_result(fbres) if to_link is not None: graph.add((node, OWL.sameAs, to_link))
def link_division(graph, key): # Takes every division (subject of type ProvincialDivision) and try to link them to a # /government/political_district of the same name on Freebase. query = """select distinct ?node ?name where { ?node rdf:type type:ProvincialDivision . ?node dc:description ?name . } """ res = graph.query(query, initNs={'rdf': RDF, 'type': ns_type, 'dc': DC}) for node, name in res: existing_links = list(graph.triples((node, OWL.sameAs, None))) if existing_links: print("Division %s is already linked to %s. Skipping." % (name, existing_links[0][2])) continue print("Attempting to link division %s" % name) query = [{ 'mid': None, 'id': None, 'name': None, 'type': '/government/political_district', 'name~=': name, }] fbres = query_freebase(query, key) to_link = confirm_result(fbres) if to_link is not None: graph.add((node, OWL.sameAs, to_link))
def pull_genelection(graph, key): # Pulls election date for every linked general election query = """select distinct ?node ?year ?freebaseNode where { ?node rdf:type type:ProvincialGeneralElection . ?node owl:sameAs ?freebaseNode . ?node prop:year ?year . filter regex(str(?freebaseNode), "^http://rdf.freebase.com") . } """ res = graph.query(query, initNs={'rdf': RDF, 'type': ns_type, 'prop': ns_property, 'owl': OWL}) for node, year, freebaseNode in res: print("Fetching date for %s election" % year) query = { 'mid': node2mid(freebaseNode), 'name': None, '/time/event/start_date': None, } fbres = query_freebase(query, key) start_date = datetime.strptime(fbres['/time/event/start_date'], '%Y-%m-%d').date() election_name = fbres['name'] print("Got date %s for election %s" % (start_date, election_name)) graph.add((node, ns_property.startDate, Literal(start_date)))