コード例 #1
0
ファイル: scrape_candidates.py プロジェクト: hsoft/dgeq-rdf
def incorporate_row(row, graph):
    general_election_id = ns_election['72'] # change after the next elections
    bsq = int(row[6])
    division_id = next(graph.subjects(ns_property.dgeqDivisionId, Literal(bsq)))
    lastname = row[9]
    firstname = row[10]
    gender = 'female' if row[7] == 'F' else 'male'
    candidate_id = ns_candidate[str2id(lastname + '_' + firstname)]
    graph.add((candidate_id, FOAF.gender, Literal(gender)))
    query = """select distinct ?run ?party where {
        ?elecnode rdf:type type:ProvincialElection .
        ?elecnode prop:generalElection <%s> .
        ?elecnode prop:division <%s> .
        ?run prop:election ?elecnode .
        ?run prop:runningCandidate <%s> .
        ?run prop:runningFor ?party .
    }
    """ % (general_election_id, division_id, candidate_id)
    qres = graph.query(query, initNs={'rdf': RDF, 'prop': ns_property, 'type': ns_type})
    assert len(qres) == 1
    run_id, party_id = list(qres)[0]
    graph.add((run_id, ns_property.dgeqRunId, Literal(int(row[1]))))
    graph.add((run_id, ns_property.dateRegistered, Literal(row[11])))
    graph.add((party_id, RDF.type, FOAF.Organization))
    graph.add((party_id, FOAF.name, Literal(row[3])))
    agent_name = row[5]
    try:
        lastname, firstname = agent_name.split(', ')
        agent_id = ns_agent[str2id(lastname + '_' + firstname)]
        save_person(graph, agent_id, firstname, lastname)
        graph.add((run_id, ns_property.registrationAgent, agent_id))
    except ValueError:
        pass # seriously, those damn freaking messed up names... we don't care about them.
コード例 #2
0
ファイル: scrape_elections.py プロジェクト: hsoft/dgeq-rdf
def get_graph_from_soup(soup, dgeqid):
    graph = Graph()
    # extract election year
    header = soup('h2', id='e')[0].get_text()
    election_year = str2int(re.findall(r"\d{4}", header)[0])
    print(election_year)
    table = soup('table', class_='tableau')[0]
    rows = table('tr')
    current_division = None
    general_election_id = ns_election[dgeqid]
    graph.add((general_election_id, RDF.type, ns_type.ProvincialGeneralElection))
    graph.add((general_election_id, ns_property.year, Literal(election_year)))
    for row in rows:
        if not row.td: # header row, skip
            continue
        if 'circonscription-precedante' in row.td.attrs.get('class', ()): # division name row
            name = row.td.get_text().strip() 
            current_division = ns_division[str2id(name)]
            graph.add((current_division, RDF.type, ns_type.ProvincialDivision))
            graph.add((current_division, DC.description, Literal(name)))
            first = True
            election_id = BNode()
            graph.add((election_id, RDF.type, ns_type.ProvincialElection))
            graph.add((election_id, ns_property.generalElection, general_election_id))
            graph.add((election_id, ns_property.division, current_division))
        elif row.td.attrs.get('colspan') == '4': # summary row
            summary = row.td.get_text().strip()
            matches = re.findall(r"(?<=:)[\s\d]+", summary)
            graph.add((election_id, ns_property.validVoteCount, Literal(str2int(matches[0]))))
            graph.add((election_id, ns_property.invalidVoteCount, Literal(str2int(matches[1]))))
            graph.add((election_id, ns_property.totalVoteCount, Literal(str2int(matches[2]))))
            graph.add((election_id, ns_property.admissibleVoterCount, Literal(str2int(matches[3]))))
        else: # normal row
            cells = row.find_all('td')
            name = cells[0].get_text().strip()
            m = re.match(r"(.+?), (.+) \((.+)\)", name)
            lastname = m.group(1)
            firstname = m.group(2)
            party_id = str2party(m.group(3))
            candidate_id = ns_candidate[str2id(lastname + '_' + firstname)]
            votes = str2int(cells[1].get_text())
            graph.add((candidate_id, RDF.type, ns_type.ProvincialCandidate))
            save_person(graph, candidate_id, firstname, lastname)
            run_id = BNode()
            graph.add((run_id, RDF.type, ns_type.ProvincialCandidateRun))
            graph.add((run_id, ns_property.runningCandidate, candidate_id))
            graph.add((run_id, ns_property.election, election_id))
            graph.add((run_id, ns_property.runningFor, party_id))
            graph.add((run_id, ns_property.voteCount, Literal(votes)))
            if first:
                graph.add((run_id, ns_property.won, Literal(True)))
                first = False
    return graph