def parse_sos_dot_iowa_dot_gov_csv(): '''Parses the csv of IA candidates into a list of candidates using the standardized candi model''' candidates = [] with open('temp/ia_temp.csv', 'r') as f: rows = list(reader(f)) for row in rows: # Clean row (fixing tabula-py conversion errors I think) row = [row[0]] + [col for col in row[1:] if col != ''] # Skip header rows if 'For the Office Of...' in row: continue # Skip no candidate rows if 'No Candidate' in row: continue # Skip weird formatting errors if len(row) == 1: continue # Personal information full_name = row[2] first_name = full_name.split(' ')[0] last_name = full_name.split(' ')[-1] # Office state = 'IA' office = row[0] # If office is blank, then fill in from above row if office == '': office = candidates[-1].office district = candidates[-1].district # Record districts for applicable offices elif 'District' in office.split(' '): district = int(office.split(' ')[-1]) generalized_office = ' '.join(office.split(' ')[:office.split(' ').index('District')]) office = generalized_office else: district = 'N/A' date_filed = row[6] # Party affiliation party = row[1] # Contact information mailing_address = row[3] work_phone = row[4] email = row[5] # Clean email for candidates who didn't provide if email == 'Did not provide': email = 'N/A' # Construct Candidate candidates.append(Candidate(full_name = full_name, first_name = first_name, last_name = last_name, state = state, office = office, district = district, date_filed = date_filed, party = party, mailing_address = mailing_address, work_phone = work_phone, email = email)) return candidates
class TestCandidate(TestCase): def setUp(self): self.candidate = Candidate("foo bar baz") def test_dictionary_when_not_empty(self): self.assertTrue("foo" in self.candidate.dictionary()) def test_dictionary_when_empty(self): self.assertFalse("" in Candidate("").dictionary()) def test_dictionary_when_none(self): with self.assertRaises(AttributeError, msg="'NoneType' object has no attribute 'split'"): "" in Candidate(None).dictionary() def test_read_from_file(self): candidates = src.candidate.read_from_file("test1_candidate_5_plaintexts.txt") self.assertEqual(len(candidates), 5) self.assertTrue("punners" in candidates[-1].dictionary())
def test_candidate_creation(): '''To verify that candidates can be created.''' candidate = Candidate(first_name = 'Barack', last_name = 'Obama') assert(candidate.first_name == 'Barack') assert(candidate.last_name == 'Obama')
def setUp(self): self.candidate = Candidate("foo bar baz")
def test_dictionary_when_none(self): with self.assertRaises(AttributeError, msg="'NoneType' object has no attribute 'split'"): "" in Candidate(None).dictionary()
def test_dictionary_when_empty(self): self.assertFalse("" in Candidate("").dictionary())
def get_ks_candidates_general_2018(): '''Parses the candidate listing page on the KS Secretary of State's website to construct a list of candidates for the 2018 Primary using the standardized candi model.''' # Get table from the KS Secretary of State's website and parse it into a list of rows with stripped strings content = get_kssos_dot_org_response(elecid='26').content soup = BeautifulSoup(content, 'html.parser') tds = soup.find_all('td') rows = [tds[i:i + 25] for i in range(0, len(tds), 25)] cleaned_rows = [[col.getText().strip() for col in row] for row in rows] # Convert each row into a Candidate object candidates = [] for row in cleaned_rows: def get(col, error='', default='N/A'): '''Returns the data at a cleaned row, unless the data is equal to error, in which case default is returned.''' data = row[col] if data == error: return default return data # Personal information full_name = get(col=0) first_name = get(col=7) last_name = get(col=9) # Office state = 'KS' office = get(col=1) if office in standardized_offices: office = standardized_offices[office] district = get(col=2, error='0') position = get(col=3, error='0') division = get(col=4, error='0') date_filed = get(col=22) # Party affiliation party = get(col=5) # Contact information home_address = get(col=11) if home_address != 'N/A': # Add state and zip to home address home_address += ', ' + get(col=12) + ', KS, ' + get(col=13) mailing_address = get(col=14) if mailing_address != 'N/A': # Add state and zip to mailing address mailing_address += ', ' + get(col=15) + ', KS, ' + get(col=16) home_phone = get(col=17, error='(000) 000-0000') work_phone = get(col=18, error='(000) 000-0000') cell_phone = get(col=19, error='(000) 000-0000') email = get(col=20) website = get(col=21) # Construct Candidate candidate = Candidate(full_name=full_name, first_name=first_name, last_name=last_name, state=state, office=office, district=district, position=position, division=division, date_filed=date_filed, party=party, home_address=home_address, mailing_address=mailing_address, home_phone=home_phone, work_phone=work_phone, cell_phone=cell_phone, email=email, website=website) candidates.append(candidate) return candidates