def _parse_header(self, header_lines=None): if header_lines is None: header_lines = self._context['header_lines'] self._breaks = get_column_breaks(header_lines) header_cols = split_into_columns(header_lines, self._breaks) parties = [''] * len(header_cols[0]) candidates = [''] * len(header_cols[0]) has_parties = False for row in header_cols: for i in range(len(row)): col = row[i] if not col: continue if has_parties or col == "Scattering" or col == "Totals": sep = " " if candidates[i] else "" candidates[i] += sep + col else: sep = " " if parties[i] else "" parties[i] += sep + col if col == "Totals": has_parties = True return candidates, parties
def parse_header_lines(self, lines): breaks = get_column_breaks(lines) parsed_lines = split_into_columns(lines, breaks) # Initialize the candidate and party lists. # For this particular set of election results, there isn't any # party information. candidates = ["" for i in range(len(parsed_lines[0]))] parties = ["" for i in range(len(parsed_lines[0]))] for line in parsed_lines: for i in range(len(line)): if candidates[i] and line[i]: candidates[i] += " " candidates[i] += line[i] if candidates[-4] == "Write-In": candidates[-4] = "Write-in" assert candidates[-1] == "Total" assert candidates[-2] == "Under Votes" assert candidates[-3] == "Over Votes" assert candidates[-4] == "Write-in" # Remove first column, "County" return candidates[1:], parties[1:]
def test_get_column_breaks(self): lines = [ " Democratic Republican Scattering Totals ", " Dwayne Arlan ", ] breaks = get_column_breaks(lines) self.assertEqual(breaks, [16, 32, 55, 73]) lines = [ " Democratic Republican Scattering Totals", " Wesley Greg", " Whitead Hoversten", ] breaks = get_column_breaks(lines) self.assertEqual(breaks, [16, 33, 55, 73]) lines = [ " Doug Gross Steve Sukup Bob Vander ", " Plaats Write-In ", " Republican Republican Votes Totals", " Republican", ] breaks = get_column_breaks(lines) self.assertEqual(breaks, [16, 31, 45, 58, 73]) lines = [ " County Michael A. Mauro Matt Schultz Jake Porter Write-in Over Votes Under Total" ] breaks = get_column_breaks(lines) self.assertEqual(breaks, [7, 21, 40, 63, 118, 153, 168, 182]) lines = [ " Total", " Jim Hahn Shawn Write-In Under Votes Over Votes", " Hamerlinck", ] breaks = get_column_breaks(lines) self.assertEqual(breaks, [12, 23, 39, 54, 68, 81])
def parse_header_lines(self, lines): breaks = get_column_breaks(lines) parsed_lines = split_into_columns(lines, breaks) candidates = [] for line in parsed_lines: for i in range(len(candidates), len(line)): candidates.append("") for i in range(len(line)): if candidates[i] and line[i]: candidates[i] += " " candidates[i] += line[i] return candidates
def exit(self): if 'candidates' in self._context: return breaks = get_column_breaks(self._header_lines) parsed_header_lines = split_into_columns(self._header_lines, breaks) for row in parsed_header_lines: row[5] = "{} {}".format(row[5], row[6]).strip() row[7] = "{} {}".format(row[7], row[8]).strip() del row[6] del row[7] for i in range(len(parsed_header_lines)): row = parsed_header_lines[i] for j in range(len(row)): row[j] = re.sub(r'I$', ' /', row[j]) if row[j] == "libertarian": row[j] = "Libertarian" if row[j] == "PATTYJUDGE": row[j] = "PATTY JUDGE" self._context['parties'] = [] for i in range(5): party = "{} {}".format(parsed_header_lines[0][i], parsed_header_lines[1][i]).strip() self._context['parties'].append(party) for i in range(len(parsed_header_lines[0]) - 5): self._context['parties'].append('') self._context['candidates'] = [] for i in range(5): candidate_bits = [] for row in parsed_header_lines[2:]: candidate_bits.append(row[i]) candidate = " ".join(candidate_bits).strip() self._context['candidates'].append(candidate) self._context['candidates'] += parsed_header_lines[0][5:]
def parse_header_lines(self, lines): parties = [] candidates = [] breaks = get_column_breaks(lines) parsed_lines = split_into_columns(lines, breaks) parsed_lines = self.merge_columns(parsed_lines) # Go through the header columns column by column for col_idx in range(len(parsed_lines[0])): party_done = False for row_idx in range(len(parsed_lines)): val = parsed_lines[row_idx][col_idx] if row_idx == 0: # Handle the first row specially to ensure that all the # candidate and party lists have the same number of values if val in ("OVER VOTES", "UNDER VOTES", "SCATTERING", "TOTAL"): parties.append("") candidates.append(val) else: parties.append(val) candidates.append("") else: if party_done and val: space = " " if candidates[col_idx] else "" candidates[col_idx] += space + val else: if val: space = " " if parties[col_idx] else "" parties[col_idx] += space + val if val in ("", "Party", "Liberation"): party_done = True assert candidates[-1] == "TOTAL" assert candidates[-2] == "SCATTERING" assert candidates[-3] == "UNDER VOTES" assert candidates[-4] == "OVER VOTES" return candidates, parties
def parse_header_lines(self, lines): breaks = get_column_breaks(lines) parsed_lines = split_into_columns(lines, breaks) candidates = ["" for i in range(len(parsed_lines[0]))] parties = ["" for i in range(len(parsed_lines[0]))] for cols in parsed_lines: for i in range(len(cols)): col = cols[i] m = party_re.match(col) if m: collection = parties else: collection = candidates if collection[i] and col: collection[i] += " " collection[i] += col return candidates, parties
def _parse_header(self, header_lines=None): #candidate_col_vals = ["Write-In", "Votes", "Totals"] party_col_vals = ["Democratic", "Iowa Green", "Party", "Republican", "Libertarian", "Nominated by", "Petition", "Constitution", "Socialist", "Workers Party"] if header_lines is None: header_lines = self._context['header_lines'] #print(header_lines) self._breaks = get_column_breaks(header_lines) header_cols = split_into_columns(header_lines, self._breaks) parties = ['']*len(header_cols[0]) candidates = ['']*len(header_cols[0]) for row in header_cols: for i in range(len(row)): col = row[i] if not col: continue if col in party_col_vals: sep = " " if parties[i] else "" parties[i] += sep + col else: sep = " " if candidates[i] else "" candidates[i] += sep + col # Some result headers do not include parties, grab the party from the # contest headers we parsed earlier if parties[0] == '' and 'party' in self._context: parties[0] = self._context['party'] # HACK: Misaligned columns in first page of 2002 general Governor # results. Fix it. if (self._context['office'] == "Governor" and not self._context['primary']): return self._general_gov_candidates_parties() return candidates, parties
def parse_header_lines(self, lines): parties = [] if len(lines) == 1: candidates = self.parse_header_line_simple(lines[0]) else: breaks = get_column_breaks(lines) parsed_lines = split_into_columns(lines, breaks) candidates = ["" for col in parsed_lines[0]] for cols in parsed_lines: for i in range(len(cols)): space = " " if len(candidates[i]) else "" candidates[i] = candidates[i] + space + cols[i] candidates = self.merge_candidate_cols(candidates) candidates = [self.clean_candidate(c) for c in candidates] assert candidates[-1] == "TOTAL" assert candidates[-2] == "SCATTERING" assert candidates[-3] == "UNDER VOTES" assert candidates[-4] == "OVER VOTES" return candidates, parties