def scrape_old_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.h3.contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) def get_count(cell): if len(cell.contents) == 0: return 0 else: return int(cell.contents[0]) results_tbl = vote_page.findAll('table')[1] yes_count = get_count(results_tbl.findAll('td')[1]) no_count = get_count(results_tbl.findAll('td')[3]) excused_count = get_count(results_tbl.findAll('td')[5]) absent_count = get_count(results_tbl.findAll('td')[7]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.table for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def scrape_new_vote(self, url): vote_page = self.soup_parser(self.urlopen(url)) header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0] chamber_name = header.split(', ')[1] if chamber_name.startswith('House'): chamber = 'lower' else: chamber = 'upper' location = ' '.join(chamber_name.split(' ')[1:]) if location.startswith('of Representatives'): location = '' motion = ', '.join(header.split(', ')[2:]) yes_count = int(vote_page.find( id="ctl00_contentMain_tdAyes").contents[0]) no_count = int(vote_page.find( id="ctl00_contentMain_tdNays").contents[0]) excused_count = int(vote_page.find( id="ctl00_contentMain_tdExcused").contents[0]) absent_count = int(vote_page.find( id="ctl00_contentMain_tdAbsent").contents[0]) other_count = excused_count + absent_count passed = yes_count > no_count vote = Vote(chamber, None, motion, passed, yes_count, no_count, other_count, excused_count=excused_count, absent_count=absent_count, location=location) vote.add_source(url) vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes") for td in vote_tbl.findAll('td'): if td.contents[0] == 'Yea': vote.yes(td.findPrevious().contents[0]) elif td.contents[0] == 'Nay': vote.no(td.findPrevious().contents[0]) elif td.contents[0] in ['Excused', 'Absent']: vote.other(td.findPrevious().contents[0]) return vote
def parse_vote(self, bill, actor, date, text, line): url = "http://leg1.state.va.us%s" % line['href'] abbr = {'S': 'upper', 'H': 'lower'} with self.soup_context(url) as vote_data: house = abbr[re.findall('\d+/\d+\/\d+\s+([\bHouse\b|\bSenate\b])', self.unescape(unicode(vote_data)))[0]] vote = Vote(house, date, text, None, 0, 0, 0) for cast in vote_data.findAll('p'): if cast.string is None: continue cleaned = cast.string.replace('\r\n', ' ') split_start = cleaned.find('--') voted = cleaned[0:split_start].strip() split_end = cleaned.find('--', split_start + 2) if split_end == -1: continue names = [] maybe_names = cleaned[split_start+2:split_end].split(", ") t_name = '' #not sure how to skip iterations, so. for i in range(len(maybe_names)): if re.match('\w\.\w\.', maybe_names[i]): names.append(t_name + ', ' + maybe_names[i]) t_name = '' else: if t_name != '': names.append(t_name) t_name = maybe_names[i] for voter in names: sanitized = voter.replace('.', '').lower() if voted=='YEAS': vote.yes(voter) elif voted=='NAYS': vote.no(voter) else: vote.other(voter.strip()) vote['other_count'] = len(vote['other_votes']) vote['yes_count'] = len(vote['yes_votes']) vote['no_count'] = len(vote['no_votes']) vote['passed'] = (vote['yes_count'] > vote['no_count']) bill.add_vote(vote)