コード例 #1
0
    def scrape_old_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.h3.contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        def get_count(cell):
            if len(cell.contents) == 0:
                return 0
            else:
                return int(cell.contents[0])

        results_tbl = vote_page.findAll('table')[1]
        yes_count = get_count(results_tbl.findAll('td')[1])
        no_count = get_count(results_tbl.findAll('td')[3])
        excused_count = get_count(results_tbl.findAll('td')[5])
        absent_count = get_count(results_tbl.findAll('td')[7])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.table
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
コード例 #2
0
    def scrape_new_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        yes_count = int(vote_page.find(
            id="ctl00_contentMain_tdAyes").contents[0])
        no_count = int(vote_page.find(
            id="ctl00_contentMain_tdNays").contents[0])
        excused_count = int(vote_page.find(
            id="ctl00_contentMain_tdExcused").contents[0])
        absent_count = int(vote_page.find(
            id="ctl00_contentMain_tdAbsent").contents[0])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes")
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
コード例 #3
0
 def parse_vote(self, bill, actor, date, text, line):
     url = "http://leg1.state.va.us%s" % line['href']
     abbr = {'S': 'upper', 'H': 'lower'}
     with self.soup_context(url) as vote_data:
         house = abbr[re.findall('\d+/\d+\/\d+\s+([\bHouse\b|\bSenate\b])', self.unescape(unicode(vote_data)))[0]]
         vote = Vote(house, date, text, None, 0, 0, 0)
         for cast in vote_data.findAll('p'):
             if cast.string is None:
                 continue
             cleaned = cast.string.replace('\r\n', ' ')
             split_start = cleaned.find('--')
             voted = cleaned[0:split_start].strip()
             split_end = cleaned.find('--', split_start + 2)
             if split_end == -1:
                 continue
             names = []
             maybe_names = cleaned[split_start+2:split_end].split(", ")
             t_name = ''
             #not sure how to skip iterations, so.
             for i in range(len(maybe_names)):
                if re.match('\w\.\w\.', maybe_names[i]):
                    names.append(t_name + ', ' + maybe_names[i])
                    t_name = ''
                else:
                    if t_name != '':
                        names.append(t_name)
                    t_name = maybe_names[i]
             for voter in names:
                 sanitized = voter.replace('.', '').lower()
                 if voted=='YEAS':
                     vote.yes(voter)
                 elif voted=='NAYS':
                     vote.no(voter)
                 else:
                     vote.other(voter.strip())
         vote['other_count'] = len(vote['other_votes'])			
         vote['yes_count'] = len(vote['yes_votes'])
         vote['no_count'] = len(vote['no_votes'])
         vote['passed'] = (vote['yes_count'] > vote['no_count'])
         bill.add_vote(vote)