Esempio n. 1
0
def parse_boulder_csv(file, options):
    """Parse Boulder Audit CSV file.
    It has one line per contest per batch.
    A sample is in testdata/test-boulder-csv.txt
    The first line is a header line like this, including columns for each choice.
    The first valid choice for a contest is deemed to be the 3rd entry
    The last valid choice for a contest is always 'Under Votes'.
    Entries named "Cast Votes" or "Blank" are skipped.
"MBB Name","Contest Name","Contest Ballots","YES","NO","Cast Votes","Over Votes","Under Votes",,,,,,,,

    """

    election = options.election

    reader = csv.DictReader(open(file), delimiter=",")

    au = util.AuditUnit()

    for r in reader:
        batch = [r['MBB Name'] + options.batchid]
        contest = r['Contest Name']
        ballots = r['Contest Ballots']

        #TODO: If this is a primary, see how to get party information

        if options.contest != None and options.contest != contest:
            continue

        for choice in reader.fieldnames[3:]:
            # Skip the "Cast Votes" column, in among the subtotals
            if choice in ["Cast Votes", "Blank"]:
                continue

            # Use standard names for votes that are Under or Over
            db_choice = choice
            if db_choice == "Under Votes":
                db_choice = "Under"
            if db_choice == "Over Votes":
                db_choice = "Over"

            # If the batch or contest has changed, push out the previous units
            if batch != au.batches or contest != au.contest:
                logging.debug("now batch '%s' contest '%s' at line %d" %
                              (batch, contest, reader.reader.line_num))
                util.pushAuditUnit(au, min_ballots=options.min_ballots)
                au = util.AuditUnit(
                    election, contest, 'U', batch,
                    ballots)  # FIXME: try to extract type from batch name

            au.update(db_choice, r[choice])
            """
            if choice == 'Under Votes':
                break
            """

    util.pushAuditUnit(au, min_ballots=options.min_ballots)
Esempio n. 2
0
def parse_sequoia(file, options):
    """Parse Sequoia precinct results in "text with headers" format:
    a tab-separated .txt file.
    It has one line per candidate per contest per precinct.
    The model of this format is the Denver 2008 data sample
    in "testdata/test-sequoia-precinct.txt".
    If the data is to be aggregated for privacy (the default), the data
    should be sorted by batch (precinct).
    The first line has the column headers:
PRECINCT_NAME	CANDIDATE_FULL_NAME	contest_party_id	candidate_party_id	CONTEST_TYPE	contest_id	CONTEST_ORDER	CANDIDATE_ORDER	CONTEST_FULL_NAME	TOTAL	PRECINCT_ID	precinct_order	contest_vote_for	PROCESSED_DONE	PROCESSED_STARTED	CONTEST_TOTAL	IS_WRITEIN	undervote	overvote

    Question - can separate Absentee, Early and In-precinct count be generated?
    """

    election = options.election

    reader = csv.DictReader(open(file), delimiter="\t")

    au_AB = util.AuditUnit()

    for r in reader:
        batch = [r['PRECINCT_NAME'] + options.batchid]
        contest = r['CONTEST_FULL_NAME']
        #TODO: If this is a primary, see how to get party information
        #if r['Party_Code']:
        #    contest += ":" + r['Party_Code']

        if options.contest != None and options.contest != contest:
            continue

        choice = r['CANDIDATE_FULL_NAME']

        choice = choice.strip()

        # If the batch or contest has changed, push out the previous units
        if batch != au_AB.batches or contest != au_AB.contest:
            logging.debug("now batch '%s' contest '%s' at line %d" %
                          (batch, contest, reader.reader.line_num))
            util.pushAuditUnit(au_AB, min_ballots=options.min_ballots)
            au_AB = util.AuditUnit(election, contest, 'AB', batch)

        au_AB.update(choice, r['TOTAL'])
        if r['CANDIDATE_ORDER'] == '1':  # duplicated for each candidate - silly
            au_AB.update('Under', r['undervote'])
            au_AB.update('Over', r['overvote'])
Esempio n. 3
0
def parse_swdb_csv(file, options):
    """Parse csv dump of California Statewide Database (SWDB) - a comma-separated .csv file.
    If the data is to be aggregated for privacy (the default), the data
    should be sorted by batch (precinct).
    Question - can separate Absentee, Early and In-precinct count be generated?
    """

    election = options.election

    reader = csv.DictReader(open(file), delimiter=",")

    au = util.AuditUnit()

    for r in reader:
        batch = [r['SVPREC_KEY'] + options.batchid]
        contest = "Congress%.2d" % int(r['CDDIST'])
        ballots = r['TOTVOTE']

        #TODO: If this is a primary, see how to get party information
        #if r['Party_Code']:
        #    contest += ":" + r['Party_Code']

        if options.contest != None and options.contest != contest:
            continue

        for choice in [
                'CNGDEM', 'CNGGRN', 'CNGREP', 'CNGLIB', 'CNGPAF', 'CNGAIP'
        ]:
            # If the batch or contest has changed, push out the previous units
            if batch != au.batches or contest != au.contest:
                logging.debug("now batch '%s' contest '%s' at line %d" %
                              (batch, contest, reader.reader.line_num))
                util.pushAuditUnit(au, min_ballots=options.min_ballots)
                au = util.AuditUnit(election, contest, 'U', batch, ballots)

            au.update(choice, r[choice])

        # Undervotes and Overvotes are not included in the input, but their sum is
        # implicit in the number of ballots minus the votes for candidates.
        # So make contest_ballots() work by assuming that all the unaccounted for ballots are undervoted.
        au.update("Under", str(int(ballots) - au.contest_ballots()))

    util.pushAuditUnit(au, min_ballots=options.min_ballots)
Esempio n. 4
0
def parse_hart_csv(file, options):
    """Parse a csv file of election data.  The model of this format
    is a Hart precinct spreadsheet from Orange County:
     testdata/test-orange-hart.csv
     or http://www.sos.ca.gov/elections/sov/2009-special/precinct-data/data/orange-20090519.csv
    """

    election = options.election

    reader = csv.DictReader(open(file))

    au_AB = util.AuditUnit()
    au_EV = util.AuditUnit()
    au_ED = util.AuditUnit()

    for r in reader:
        batch = [r['Precinct Name'] + options.batchid]
        contest = r['Contest Title']
        if r['Contest Party']:
            contest += ":" + r['Contest Party']

        if options.contest != None and options.contest != contest:
            continue

        choice = r['Candidate Name']

        choice = choice.strip()
        # Do a bit of normalization - replace multiple spaces with just one
        while choice.find("  ") != -1:
            choice = choice.replace("  ", " ")

        # If the batch or contest has changed, push out the previous units
        if batch != au_AB.batches or contest != au_AB.contest:
            AB_ballots = r['Absentee Mail Ballots']
            EV_ballots = r['Absentee Walk-in Ballots']
            ED_ballots = r['Election Ballots']

            logging.debug("now batch '%s' contest '%s' at line %d" %
                          (batch, contest, reader.reader.line_num))
            util.pushAuditUnit(au_AB, min_ballots=options.min_ballots)
            au_AB = util.AuditUnit(election, contest, 'AB', batch, AB_ballots)
            util.pushAuditUnit(au_EV, min_ballots=options.min_ballots)
            au_EV = util.AuditUnit(election, contest, 'EV', batch, EV_ballots)
            util.pushAuditUnit(au_ED, min_ballots=options.min_ballots)
            au_ED = util.AuditUnit(election, contest, 'ED', batch, ED_ballots)

        au_AB.update(choice, r['Absentee Mail Votes'])
        au_EV.update(choice, r['Absentee Walk-in Votes'])
        au_ED.update(choice, r['Election Votes'])
        if r['Candidate Seq Nbr'] == '1':  # duplicated for each candidate
            au_AB.update('Under', r['Absentee Mail Blank Votes'])
            au_AB.update('Over', r['Absentee Mail Over Votes'])
            au_EV.update('Under', r['Absentee Walk-in Blank Votes'])
            au_EV.update('Over', r['Absentee Walk-in Over Votes'])
            au_ED.update('Under', r['Election Blank Votes'])
            au_ED.update('Over', r['Election Over Votes'])
Esempio n. 5
0
def parse_csv(file, options):
    """Parse a csv file of election data.
    It has one line per candidate per contest per precinct.
    The model of this format is the San Mateo precinct
    spreadsheet in "testdata/test-san-mateo-dp-92-p.csv".
    If the data is to be aggregated for privacy (the default), the data
    should be sorted by batch (precinct).
    """

    election = options.election

    reader = csv.DictReader(open(file))

    au_AB = util.AuditUnit()
    au_EV = util.AuditUnit()
    au_ED = util.AuditUnit()

    for r in reader:
        batch = [r['Precinct_name'] + options.batchid]
        contest = r['Contest_title']
        if r['Party_Code']:
            contest += ":" + r['Party_Code']

        if options.contest != None and options.contest != contest:
            continue

        choice = r['candidate_name']

        choice = choice.strip()
        # Do a bit of normalization - replace multiple spaces with just one
        while choice.find("  ") != -1:
            choice = choice.replace("  ", " ")

        # If the batch or contest has changed, push out the previous units
        if batch != au_AB.batches or contest != au_AB.contest:
            logging.debug("now batch '%s' contest '%s' at line %d" %
                          (batch, contest, reader.reader.line_num))
            util.pushAuditUnit(au_AB, min_ballots=options.min_ballots)
            au_AB = util.AuditUnit(election, contest, 'AB', batch)
            util.pushAuditUnit(au_EV, min_ballots=options.min_ballots)
            au_EV = util.AuditUnit(election, contest, 'EV', batch)
            util.pushAuditUnit(au_ED, min_ballots=options.min_ballots)
            au_ED = util.AuditUnit(election, contest, 'ED', batch)

        au_AB.update(choice, r['absentee_votes'])
        au_EV.update(choice, r['early_votes'])
        au_ED.update(choice, r['election_votes'])
        if r['cand_seq_nbr'] == '1':  # duplicated for each candidate - silly
            au_AB.update('Under', r['absentee_under_votes'])
            au_AB.update('Over', r['absentee_over_votes'])
            au_EV.update('Under', r['early_under_votes'])
            au_EV.update('Over', r['early_over_votes'])
            au_ED.update('Under', r['election_under_votes'])
            au_ED.update('Over', r['election_over_votes'])
Esempio n. 6
0
def parse_xml_crystal(file, options):
    """Extract relevant data from each contest in a given crystalreports xml
    file"""

    import lxml.etree as ET

    election = options.election

    if os.path.basename(file) == "cumulative.xml":
        # if it's the borind default, use alternate naming scheme:
        #  parent directory of canonical path
        batch = os.path.basename(os.path.dirname(os.path.realpath(file)))
    else:
        batch = os.path.basename(file)[0:-4]  # trim directory and ".xml"

    # filter out this confounding unprefixed namespace attribute
    # ...or figure out how to parse it...
    filterout = "xmlns = 'urn:crystal-reports:schemas'"
    import StringIO
    newfile = StringIO.StringIO()
    newfile.write(open(file).read().replace(filterout, ""))
    newfile.seek(0)

    root = ET.parse(newfile).getroot()
    logging.debug("root = %s" % root)

    # The Hart system forces the use of some odd contest names.
    # This is a table of fixes for what Boulder needed in the 2008 general
    replacements = [
        (", Vote For 1", ""),
        ("THE EARNINGS FROM THE INVESTMENT",
         "ST VRAIN VALLEY SCHOOL DISTRICT NO. RE-1J BALLOT ISSUE NO. 3B"),
        ("BALLOT ITEM REMOVED ", ""),
    ]

    values = {}

    for contesttree in root.xpath('//FormattedAreaPair[@Type="Group"]'):
        tree = contesttree.xpath(
            'FormattedArea[@Type="Header"]//FormattedReportObject[@FieldName="{@district_info}"]/FormattedValue'
        )
        if len(tree) != 1:
            logging.error(
                "Error: number of Headers should be 1, not %d.  Line %d" %
                (len(tree), contesttree.sourceline))
            logging.debug(ET.tostring(contesttree, pretty_print=True))

        contest_name = tree[0].text

        while contest_name.find("  ") != -1:
            contest_name = contest_name.replace("  ", " ")

        for old, new in replacements:
            contest_name = contest_name.replace(old, new)

        # NOTE: this may not work in primary, if there are multiple
        # contests with the same name per election, one for each party
        contest = contest_name.strip()

        if options.contest != None and options.contest != contest:
            logging.debug("skipping %s" % (contest))
            continue

        au_AB = util.AuditUnit(election, contest, "AB", [batch])
        au_EV = util.AuditUnit(election, contest, "EV", [batch])
        au_ED = util.AuditUnit(election, contest, "ED", [batch])

        logging.debug("Contest: %s (%s)" % (contest, tree[0].text))
        """
        We don't need the Header: we get contest from the node itself
        tree_head = extract_values(contesttree.xpath(
		                 'FormattedArea[@Type="Header"]' ),
                                fields )
        or maybe look at just '{@district_info}': 'Contest',
        if tree_head['Contest'] != tree[0].text:
            print "head = ", tree_head, " contest = ", tree[0].text
        """

        #logging.debug("tree:\n" + ET.tostring(contesttree, pretty_print=True))

        # Get undervotes and overvotes from Footer
        absenteer = extract_values(
            contesttree.xpath('FormattedArea[@Type="Footer"]'),
            {
                '{@_Combine_Under}': 'Under',  # Report combined AB/Early here
                '{@AB_Under_votes}': 'Under',
                '{@_Combine_Over}': 'Over',  # Report combined AB/Early here
                '{@AB_Over_Votes}': 'Over'
            })

        if absenteer == {}:  # E.g. "No Candidate for Race"
            continue
        try:
            au_AB.update('Under', absenteer['Under'])
            au_AB.update('Over', absenteer['Over'])
        except KeyError, key:
            print(
                "Parsing error in file %s\n line: %s\n KeyError Exception for key: '%s'\n contest: %s\n absenteer: %s\n tree:\n%s"
                % (file, contesttree.sourceline, key, contest, absenteer,
                   ET.tostring(contesttree, pretty_print=True)))

        earlyr = extract_values(
            contesttree.xpath('FormattedArea[@Type="Footer"]'), {
                '{@EA_Under_Votes}': 'Under',
                '{@EA_Over_Votes}': 'Over'
            })

        au_EV.update('Under', earlyr['Under'])
        au_EV.update('Over', earlyr['Over'])

        electionr = extract_values(
            contesttree.xpath('FormattedArea[@Type="Footer"]'), {
                '{sp_cumulative_rpt.c_under_votes_election}': 'Under',
                '{sp_cumulative_rpt.c_over_votes_election}': 'Over'
            })

        au_ED.update('Under', electionr['Under'])
        au_ED.update('Over', electionr['Over'])

        #logging.debug(contesttree.getchildren())

        parties = set()
        # For each candidate or option
        for c in contesttree.xpath('.//FormattedAreaPair[@Type="Details"]'):
            cv = extract_values(
                c,
                {
                    '{@_Display_Candidate_Name}': 'Name',
                    '{sp_cumulative_rpt.party}': 'Party',
                    #'{@Tl_total_cand}': 'Election day',  #??
                    '{sp_cumulative_rpt.c_votes_election}':
                    'Election day',  #??
                    '{@_Combine_AB_EA}': 'Absentee',  # report combined here
                    '{@AB_Votes}': 'Absentee',
                    '{@EA_Votes}': 'Early'
                })

            choice = cv['Name']
            choice = choice.strip()
            while choice.find("  ") != -1:
                choice = choice.replace("  ", " ")
            cv['Name'] = choice

            logging.debug("candidate: %s" % cv['Name'])

            au_AB.update(cv['Name'], cv['Absentee'])
            au_EV.update(cv['Name'], cv['Early'])
            au_ED.update(cv['Name'], cv['Election day'])

            parties.add(cv['Party'])

        assert len(parties) > 0  # or == 1 for primary?
        party = parties.pop() or ""

        key = "%s:%s" % (contest, party)

        values[key] = [au_AB, au_EV, au_ED]
Esempio n. 7
0
def parse_swdb(file, options):
    """Parse swdb file.
    "file" can be a file, url, or string suitable for openAnything().
    Also needs a source of the "codes" to annotate the choice names.
    """

    one_contest_prefixes = ('PRS', 'SEN', 'PR_')
    dist_contest_prefixes = ('CNG', 'ASS')
    contest_prefixes = one_contest_prefixes + dist_contest_prefixes

    """
    choices = {}
    totals = {}

    codes_name = "003.codes"
    codes = openanything.openAnything(codes_name)
    for l in codes:
        (code, choice, total) = l.rstrip().split('\t')
        if code.startswith(contest_prefixes):
            choices[code] = choice
            totals[code] = total

        elif code.endswith(('VOTE', 'REG', 'DIST')):
            # FIXME - deal with this later
            continue

        else:
            print "unrecognized code: %s in line %s" % (code, l)
    """

    reader = Dbf(file)

    au = util.AuditUnit(options.election)

    #for r in reader:
    reader_iter = iter(reader)

    rec = 0

    while True:
        try:
            r = reader[rec]
        except (IndexError, StopIteration):
            break
        except:
            import traceback
            traceback.print_exc(1)
            logging.error("Dbf error: %s\nrecord %d" % (r, rec))
            rec = rec + 1
            continue

        rec = rec + 1

        #batch = r["SRPREC"]
        batch = r["SRPREC_KEY"]
        #batch = r["SVPREC"]
        #batch = r["SVPREC_KEY"]
        if batch.startswith('SOV') or batch.endswith('TOT'):
            continue

        # state-wide data marks absentee with trailing "A",
        # county data marks them with "_A"
        if batch.endswith('A'):
            type = "AB"
            if batch.endswith('_A'):
                batch = batch[0:-2]
            else:
                batch = batch[0:-1]
        else:
            type = "BA"

        addist = r['ADDIST']
        cddist = r['CDDIST']
        #sddist = r['SDDIST']

        for code in reader.fieldNames:
            if code.endswith(('PREC', 'VOTE', 'REG', 'DIST', 'SVPREC_KEY')):
                continue

            code_full = code
            contest = code[:3]
            if  code.startswith('ASS'):
                code_full = code[:3] + ("%02d" % addist) + code[-3:]
                contest = code_full[:5]
            elif  code.startswith('CNG'):
                code_full = code[:3] + ("%02d" % cddist) + code[-3:]
                contest = code_full[:5]
            elif  code.startswith('PR_'):
                contest = code[:-1]
            else:
                contest = code[:3]

            if options.contest != None and options.contest != contest:
                continue

            # until we fully figure out how to get the district numbers...
            # contest = contests[code]

            try:
                au = util.AuditUnit(options.election, contest, type, [batch])
                au.update(code_full[len(contest):], str(r[code]))
                util.pushAuditUnit(au, min_ballots = options.min_ballots)
            except:
                print "Error looking up code %s (%s) for %s-%s" % (code, code_full, batch, type)
                continue