Example #1
0
    def scrape_votes(self, link, chamber, bill):
        with self.lxml_context(link) as votes_page:
            page_tables = votes_page.cssselect("table")
            votes_table = page_tables[0]
            votes_elements = votes_table.cssselect("td")
            # Eliminate table headings and unnecessary element
            votes_elements = votes_elements[3 : len(votes_elements)]
            ve = self.grouper(5, votes_elements)
            for actor, date, name_and_text, name, text in ve:
                if "cow" in text.text_content() or "COW" in text.text_content():
                    continue
                vote_date = dt.datetime.strptime(date.text_content(), "%m/%d/%Y")
                motion_and_votes = text.text_content().lstrip("FINAL VOTE - ")
                motion, sep, votes = motion_and_votes.partition(".")
                if "passed" in votes:
                    passed = True
                else:
                    passed = False

                votes_match = re.search("([0-9]+)-([0-9]+)-?([0-9]+)?", votes)
                yes_count = votes_match.group(1)
                no_count = votes_match.group(2)
                other_count = votes_match.group(3)

                if other_count == None:
                    other_count = 0

                vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count)
                vote.add_source(link)
                bill.add_vote(vote)
Example #2
0
    def scrape_vote(self, bill, name, url):
        match = re.match('^(Senate|House) Vote on [^,]*,(.*)$', name)

        if not match:
            return

        chamber = {'Senate': 'upper', 'House': 'lower'}[match.group(1)]
        motion = match.group(2).strip()

        if motion.startswith('FINAL PASSAGE'):
            type = 'passage'
        elif motion.startswith('AMENDMENT'):
            type = 'amendment'
        elif 'ON 3RD READINT' in motion:
            type = 'reading:3'
        else:
            type = 'other'

        vote = Vote(chamber, None, motion, None,
                    None, None, None)
        vote['type'] = type
        vote.add_source(url)

        with self.urlopen(url) as text:
            (fd, temp_path) = tempfile.mkstemp()
            with os.fdopen(fd, 'wb') as w:
                w.write(text)
            html = pdf_to_lxml(temp_path)
            os.remove(temp_path)

            vote_type = None
            total_re = re.compile('^Total--(\d+)$')
            body = html.xpath('string(/html/body)')
            for line in body.replace(u'\xa0', '\n').split('\n'):
                line = line.replace(' ', '').strip()
                if not line:
                    continue

                if line in ('YEAS', 'NAYS', 'ABSENT'):
                    vote_type = {'YEAS': 'yes', 'NAYS': 'no',
                                 'ABSENT': 'other'}[line]
                elif vote_type:
                    match = total_re.match(line)
                    if match:
                        vote['%s_count' % vote_type] = int(match.group(1))
                    elif vote_type == 'yes':
                        vote.yes(line)
                    elif vote_type == 'no':
                        vote.no(line)
                    elif vote_type == 'other':
                        vote.other(line)

        # The PDFs oddly don't say whether a vote passed or failed.
        # Hopefully passage just requires yes_votes > not_yes_votes
        if vote['yes_count'] > (vote['no_count'] + vote['other_count']):
            vote['passed'] = True
        else:
            vote['passed'] = False

        bill.add_vote(vote)
Example #3
0
    def parse_vote(self, bill, action, act_chamber, act_date, url):
        url = "http://www.legis.state.ak.us/basis/%s" % url
        info_page = self.soup_parser(self.urlopen(url))

        tally = re.findall('Y(\d+) N(\d+)\s*(?:\w(\d+))*\s*(?:\w(\d+))*'
                           '\s*(?:\w(\d+))*', action)[0]
        yes, no, o1, o2, o3 = map(lambda x: 0 if x == '' else int(x), tally)
        yes, no, other = int(yes), int(no), (int(o1) + int(o2) + int(o3))

        votes = info_page.findAll('pre', text=re.compile('Yeas'),
                                  limit=1)[0].split('\n\n')

        motion = info_page.findAll(text=re.compile('The question being'))[0]
        motion = re.findall('The question being:\s*"(.*)\?"',
                            motion, re.DOTALL)[0].replace('\n', ' ')

        vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other)

        for vote_list in votes:
            vote_type = False
            if vote_list.startswith('Yeas: '):
                vote_list, vote_type = vote_list[6:], vote.yes
            elif vote_list.startswith('Nays: '):
                vote_list, vote_type = vote_list[6:], vote.no
            elif vote_list.startswith('Excused: '):
                vote_list, vote_type = vote_list[9:], vote.other
            elif vote_list.startswith('Absent: '):
                vote_list, vote_type = vote_list[9:], vote.other
            if vote_type:
                for name in vote_list.split(','):
                    vote_type(name.strip())

        vote.add_source(url)
        return vote
Example #4
0
    def parse_vote_new(self, bill, chamber, url):
        vote_page = BeautifulSoup(self.urlopen(url))
        table = vote_page.table
        info_row = table.findAll('tr')[1]

        date = info_row.td.contents[0]
        date = dt.datetime.strptime(date, '%m/%d/%Y')
        motion = info_row.findAll('td')[1].contents[0]
        yes_count = int(info_row.findAll('td')[2].contents[0])
        no_count = int(info_row.findAll('td')[3].contents[0])
        abs_count = int(info_row.findAll('td')[4].contents[0])
        passed = info_row.findAll('td')[5].contents[0] == 'Pass'

        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, abs_count)
        vote.add_source(url)

        for tr in table.findAll('tr')[3:]:
            if len(tr.findAll('td')) != 2:
                continue

            name = tr.td.contents[0].split(' of')[0]
            type = tr.findAll('td')[1].contents[0]
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            else:
                vote.other(name)

        bill.add_vote(vote)
Example #5
0
    def add_vote(self, bill, chamber, date, line, text):
        votes = re.findall(r'Ayes (\d+)\, Noes (\d+)', text)
        (yes, no) = int(votes[0][0]), int(votes[0][1])

        vtype = 'other'
        for regex, type in motion_classifiers.iteritems():
            if re.match(regex, text):
                vtype = type
                break

        v = Vote(chamber, date, text, yes > no, yes, no, 0, type=vtype)

        # fetch the vote itself
        link = line.xpath('//a[contains(@href, "/votes/")]')
        if link:
            link = link[0].get('href')
            v.add_source(link)

            filename, resp = self.urlretrieve(link)

            if 'av' in link:
                self.add_house_votes(v, filename)
            elif 'sv' in link:
                self.add_senate_votes(v, filename)

        bill.add_vote(v)
Example #6
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(', ')[1]

            if location.startswith('House'):
                chamber = 'lower'
            elif location.startswith('Senate'):
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = ' '.join(location.split(' ')[1:]).strip()
            if not committee or committee.startswith('of Representatives'):
                committee = None

            motion = ', '.join(header.split(', ')[2:]).strip()

            yes_count = int(
                page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote['type'] = type

            if committee:
                vote['committee'] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == 'Yea':
                    vote.yes(td.getprevious().text.strip())
                elif td.text == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif td.text in ('Excused', 'Absent'):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Example #7
0
    def scrape_votes(self, link, chamber, bill):
        with self.urlopen(link) as votes_page_html:
            votes_page = lxml.html.fromstring(votes_page_html)
            page_tables = votes_page.cssselect('table')
            votes_table = page_tables[0]
            votes_elements = votes_table.cssselect('td')
            # Eliminate table headings and unnecessary element
            votes_elements = votes_elements[3:len(votes_elements)]
            ve = grouper(5, votes_elements)
            for actor, date, name_and_text, name, text in ve:
                if 'cow' in text.text_content() or 'COW' in text.text_content():
                    continue
                vote_date = dt.datetime.strptime(date.text_content(), '%m/%d/%Y')
                motion_and_votes = text.text_content().lstrip('FINAL VOTE - ')
                motion, sep, votes = motion_and_votes.partition('.')
                if 'passed' in votes:
                    passed = True
                else:
                    passed = False

                votes_match = re.search('([0-9]+)-([0-9]+)-?([0-9]+)?', votes)
                yes_count = votes_match.group(1)
                no_count = votes_match.group(2)
                other_count = votes_match.group(3)

                if other_count == None:
                    other_count = 0

                vote = Vote(chamber, vote_date, motion, passed, \
                            yes_count, no_count, other_count)
                vote.add_source(link)
                bill.add_vote(vote)
Example #8
0
    def scrape_vote(self, bill, name, url):
        match = re.match("^(Senate|House) Vote on [^,]*,(.*)$", name)

        if not match:
            return

        chamber = {"Senate": "upper", "House": "lower"}[match.group(1)]
        motion = match.group(2).strip()

        if motion.startswith("FINAL PASSAGE"):
            type = "passage"
        elif motion.startswith("AMENDMENT"):
            type = "amendment"
        elif "ON 3RD READINT" in motion:
            type = "reading:3"
        else:
            type = "other"

        vote = Vote(chamber, None, motion, None, None, None, None)
        vote["type"] = type
        vote.add_source(url)

        with self.urlopen(url) as text:
            (fd, temp_path) = tempfile.mkstemp()
            with os.fdopen(fd, "wb") as w:
                w.write(text)
            html = pdf_to_lxml(temp_path)
            os.remove(temp_path)

            vote_type = None
            total_re = re.compile("^Total--(\d+)$")
            body = html.xpath("string(/html/body)")
            for line in body.replace(u"\xa0", "\n").split("\n"):
                line = line.replace(" ", "").strip()
                if not line:
                    continue

                if line in ("YEAS", "NAYS", "ABSENT"):
                    vote_type = {"YEAS": "yes", "NAYS": "no", "ABSENT": "other"}[line]
                elif vote_type:
                    match = total_re.match(line)
                    if match:
                        vote["%s_count" % vote_type] = int(match.group(1))
                    elif vote_type == "yes":
                        vote.yes(line)
                    elif vote_type == "no":
                        vote.no(line)
                    elif vote_type == "other":
                        vote.other(line)

        # The PDFs oddly don't say whether a vote passed or failed.
        # Hopefully passage just requires yes_votes > not_yes_votes
        if vote["yes_count"] > (vote["no_count"] + vote["other_count"]):
            vote["passed"] = True
        else:
            vote["passed"] = False

        bill.add_vote(vote)
Example #9
0
    def scrape_old_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.h3.contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        def get_count(cell):
            if len(cell.contents) == 0:
                return 0
            else:
                return int(cell.contents[0])

        results_tbl = vote_page.findAll('table')[1]
        yes_count = get_count(results_tbl.findAll('td')[1])
        no_count = get_count(results_tbl.findAll('td')[3])
        excused_count = get_count(results_tbl.findAll('td')[5])
        absent_count = get_count(results_tbl.findAll('td')[7])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.table
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #10
0
    def scrape_votes(self, bill, sponsor, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = raw_vote_data.strip().split('%s by %s - ' % (bill['bill_id'], sponsor))[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
                motion = raw_vote[0]

                vote_date = re.search('(\d+/\d+/\d+)', motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y') 

                passed = ('Passed' in motion) or ('Adopted' in raw_vote[1])
                vote_regex = re.compile('\d+$')
                aye_regex = re.compile('^.+voting aye were: (.+) -')
                no_regex = re.compile('^.+voting no were: (.+) -')
                yes_count = None
                no_count = None
                other_count = 0
                ayes = []
                nos = []
                
                for v in raw_vote[1:]:
                    if v.startswith('Ayes...') and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith('Noes...') and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(', ')
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(', ')

                if yes_count and no_count:
                    passed = yes_count > no_count
                else:
                    yes_count = no_count = 0


                vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count) 
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                bill.add_vote(vote)

        return bill
Example #11
0
    def scrape_lower_vote(self, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            table = page.xpath("/html/body/table/tr[3]/td/table/tr/"
                               "td[3]/table/tr/td/table[3]")[0]

            motion = ""
            for part in ("Amendment Number", "Reading Number",
                         "Floor Actions"):
                motion += page.xpath("string(//*[contains(text(), '%s')])" %
                                     part).strip() + " "

            motion = motion.strip()

            date = page.xpath(
                'string(//*[contains(text(), "Date:")]/following-sibling::*)')
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            yeas = page.xpath('string(//*[contains(text(), "Yeas")])')
            yeas = int(yeas.split(' - ')[1])

            nays = page.xpath('string(//*[contains(text(), "Nays")])')
            nays = int(nays.split(' - ')[1])

            nv = page.xpath('string(//*[contains(text(), "Not Voting")])')
            nv = int(nv.split(' - ')[1])

            passed = yeas > (nays + nv)

            vote = Vote('lower', date, motion, passed, yeas, nays, nv)
            vote.add_source(url)

            for tr in table.xpath("tr/td/table/tr"):
                text = tr.xpath("string()")
                text = re.sub(r"\s+", r" ", text)

                name = " ".join(text.split()[1:])

                if text[0] == "Y":
                    vote.yes(name)
                elif text[0] == "N":
                    vote.no(name)
                elif text[0] in ("-", "C"):
                    vote.other(name)

            return vote
Example #12
0
    def scrape_new_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        yes_count = int(vote_page.find(
            id="ctl00_contentMain_tdAyes").contents[0])
        no_count = int(vote_page.find(
            id="ctl00_contentMain_tdNays").contents[0])
        excused_count = int(vote_page.find(
            id="ctl00_contentMain_tdExcused").contents[0])
        absent_count = int(vote_page.find(
            id="ctl00_contentMain_tdAbsent").contents[0])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes")
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #13
0
    def parse_vote(self, bill, actor, date, motion, url):
        with self.urlopen(url) as page:
            vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                                 '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                                 '(\d+)(.*)',
                                 re.MULTILINE | re.DOTALL)
            match = vote_re.search(page)
            yes_count = int(match.group(1))
            no_count = int(match.group(3))
            other_count = int(match.group(6))

            if yes_count > no_count:
                passed = True
            else:
                passed = False

            if actor == 'upper' or actor == 'lower':
                vote_chamber = actor
                vote_location = ''
            else:
                vote_chamber = ''
                vote_location = actor

            vote = Vote(vote_chamber, date,
                        motion, passed, yes_count, no_count,
                        other_count,
                        location=vote_location)
            vote.add_source(url)

            yes_votes = re.split('\s{2,}', match.group(2).strip())
            no_votes = re.split('\s{2,}', match.group(4).strip())
            other_votes = re.split('\s{2,}', match.group(7).strip())

            for yes in yes_votes:
                if yes:
                    vote.yes(yes)
            for no in no_votes:
                if no:
                    vote.no(no)
            for other in other_votes:
                if other:
                    vote.other(other)

            bill.add_vote(vote)
Example #14
0
    def scrape_vote(self, bill, chamber, url):
        with self.urlopen(url) as page:
            page = page.replace(' ', ' ')
            page = lxml.html.fromstring(page)

            info_row = page.xpath("//table[1]/tr[2]")[0]

            date = info_row.xpath("string(td[1])")
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            motion = info_row.xpath("string(td[2])")
            yes_count = int(info_row.xpath("string(td[3])"))
            no_count = int(info_row.xpath("string(td[4])"))
            other_count = int(info_row.xpath("string(td[5])"))
            passed = info_row.xpath("string(td[6])") == 'Pass'

            if motion == 'Shall the bill pass?':
                type = 'passage'
            elif motion == 'Shall the bill be read the third time?':
                type = 'reading:3'
            elif 'be amended as' in motion:
                type = 'amendment'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed,
                        yes_count, no_count, other_count)
            vote.add_source(url)

            for tr in page.xpath("//table[1]/tr")[3:]:
                if len(tr.xpath("td")) != 2:
                    continue

                name = tr.xpath("string(td[1])").split(' of')[0]

                type = tr.xpath("string(td[2])").strip()
                if type == 'Yea':
                    vote.yes(name)
                elif type == 'Nay':
                    vote.no(name)
                else:
                    vote.other(name)

            bill.add_vote(vote)
Example #15
0
 def scrape_votes(self, vote_text, vote_url, house, date, bill):
     votes_parts = vote_text.split(";")
     voters = []
                             
     motion_text, sep, after = vote_text.partition(" The votes were as follows:")
                             
     for vp in votes_parts:
         before, sep, after = vp.partition("(s)")
         voters_list = after.split(", ")
         voters_list[0] = voters_list[0].lstrip(" ")
         voters_list[-1] = voters_list[-1].rstrip(". ")                          
         voters.append(voters_list)
                             
     #Ayes, Ayes with reservations, Noes, Excused
                             
     vote_counts = [0, 0, 0, 0]
                             
     for i, t in enumerate(votes_parts):
         match = re.search("[0-9]+", t)
         if (match != None):
             vote_counts[i] = int(match.group(0))
                             
     if(house == 'H'):
         vote_house = "lower"
     else:
         vote_house = "upper"
                             
     vote = Vote(vote_house, date, motion_text, True, \
             vote_counts[0], vote_counts[2], vote_counts[1] + vote_counts[3])
     vote.add_source(vote_url)
                             
     for yes_voter in voters[0]:
         vote.yes(yes_voter)
     for no_voter in voters[2]:
         vote.no(no_voter)
     for other_voter in voters[1]:
         vote.other(other_voter)
     for other_voter in voters[2]:
         vote.other(other_voter)  
     
     bill.add_vote(vote)    
Example #16
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        with self.urlopen(vote_url) as html:

            # sometimes the link is broken, will redirect to NO_VOTE_URL
            if html.response.url == NO_VOTE_URL:
                return

            doc = lxml.html.fromstring(html)
            paragraphs = doc.xpath('//h1/following-sibling::p')

            # first paragraph has motion and vote total
            top_par = paragraphs[0].text_content()
            lines = top_par.splitlines()
            # 3rd line is the motion except in cases where first line is gone
            motion = lines[2] or lines[1]
            # last line is "__ YEA and __ Nay"
            yeas, nays = self.yeanay_re.match(lines[-1]).groups()
            yeas = int(yeas)
            nays = int(nays)

            # second paragraph has date
            date = self.date_re.match(paragraphs[1].text_content()).groups()[0]
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            filename = 'vote%s-%s' % (self.sequence.next(), bill_id)
            vote = Vote('lower', date, motion, yeas>nays, yeas, nays, 0,
                        session=session, bill_id=bill_id, bill_chamber=chamber,
                        filename=filename)
            vote.add_source(vote_url)

            # first table has YEAs
            for name in doc.xpath('//table[1]/tr/td/font/text()'):
                vote.yes(name.strip())

            # second table is nays
            for name in doc.xpath('//table[2]/tr/td/font/text()'):
                vote.no(name.strip())

            self.save_vote(vote)
Example #17
0
    def scrape_upper_vote(self, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            text = page.xpath('string(//pre)')

            motion = ""
            motion += re.search(r'Amendment Number:\s([^\s]+)?',
                                text, re.MULTILINE).group(0).strip()
            motion += " " + re.search(r'Reading Number .:\s([^\s]+)?',
                                      text, re.MULTILINE).group(0).strip()
            motion += " " + re.search(r'Floor Actions ..:\s([^\s]+)?',
                                      text, re.MULTILINE).group(0).strip()

            yeas = int(re.search(r'Yeas\s-\s(\d+)', text,
                                 re.MULTILINE).group(1))
            nays = int(re.search(r'Nays\s-\s(\d+)', text,
                                 re.MULTILINE).group(1))
            nv = int(re.search(r'Not\sVoting\s-\s(\d+)', text,
                               re.MULTILINE).group(1))

            date = re.search(r'Date:\s(\d+/\d+/\d+)', text,
                             re.MULTILINE).group(1)
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            passed = yeas > (nays + nv)

            vote = Vote('upper', date, motion, passed, yeas, nays, nv)
            vote.add_source(url)

            pattern = r'_\s%s\s(\w+)'
            for match in re.finditer(pattern % 'Y ', text, re.MULTILINE):
                vote.yes(match.group(1))
            for match in re.finditer(pattern % 'N ', text, re.MULTILINE):
                vote.no(match.group(1))
            for match in re.finditer(pattern % 'EX', text, re.MULTILINE):
                vote.other(match.group(1))

            return vote
Example #18
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = 'http://www.dccouncil.washington.dc.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s'
        url = base_url % (vote_type_id, bill['bill_id'])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id('VoteDate').text)

            # check if voice vote / approved boxes have an 'x'
            voice = (doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] ==
                     'x')
            passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0]
                      == 'x')

            yes_count = extract_int(doc.xpath(
                '//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(doc.xpath(
                '//span[@id="VoteCount2"]/b/text()')[0])
            other_count = 13 - (yes_count+no_count)   # a bit lazy

            vote = Vote('upper', vote_date, vote_type, passed, yes_count,
                        no_count, other_count, voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath('//u'):
                member = member_u.text
                vote_text = member_u.xpath('../../i/text()')[0]
                if 'YES' in vote_text:
                    vote.yes(member)
                elif 'NO' in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Example #19
0
 def scrape_votes(self, vote_page, bill, url): 
     date_match = re.search("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", vote_page.text_content())
     date_match = date_match.group(0)
     
     vote_date = dt.datetime.strptime(date_match, '%m/%d/%Y')
     
     votes = {"Yeas":0, "Nays":0, "Absent":0, "Excused":0}
     
     for type, number in votes.items():
         match = re.search(type + ": [0-9]+", vote_page.text_content())
         match = match.group(0)
         match = match.split(" ")
         number = match[1]
         
     passed = votes["Yeas"] > votes["Nays"] 
     
     chamber_match = re.search("(Senate|House) vote", vote_page.text_content())
     chamber_match = chamber_match.group(0)
     chamber_match = chamber_match.split(" ")
     chamber_match = chamber_match[0]
     
     if chamber_match == "Senate":
         chamber = "upper"
         title = "Senator"
     else:
         chamber = "lower"
         title = "Representative"
         
         
     motion_match = vote_page.cssselect('td[align="center"]')
     motion_match = motion_match[2]
     motion = motion_match.text_content()
     
     vote = Vote(chamber, vote_date, motion, passed, votes["Yeas"], votes["Nays"], votes["Absent"] + votes["Excused"])
     vote.add_source(url)   
     
     vote_elements = vote_page.cssselect('span[class="RollCall"]')
     
     vote_types = []
     
     for ve in vote_elements:
         voters = ve.text_content().split(", ")
         
         if len(voters) == 1:
             voters = voters[0].split(" and ")
             
         before, itself, after = voters[0].partition(title)
         voters[0] = after.lstrip("s ")
         voters[-1] = voters[-1].lstrip("and ")
             
         vote_types.append(voters)              
         
     for v in vote_types[0]:
         vote.yes(v)
     
     for v in vote_types[1]:
         vote.no(v)
         
     for v in vote_types[2]:
         vote.other(v)
  
     for v in vote_types[3]:
         vote.other(v)
     
     bill.add_vote(vote)
Example #20
0
    def scrape_votes(self, bill, file_type, number, session):
        vote_url = 'http://www.legislature.state.oh.us/votes.cfm?ID=' + session + '_' + file_type + '_' + str(number)
        with self.urlopen(vote_url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
            
            save_date = None
            for el in root.xpath('/html/body/table/tr[3]/td/table/tr[1]/td[2][@class="bigPanel"]/blockquote/font/table'):
                for mr in root.xpath('/html/body/table/tr[3]/td/table/tr[1]/td[2][@class="bigPanel"]/blockquote/font/table/tr[position() > 1]'):
                    
                    yes_count = 0
                    yes_placement = 0
                    no_count = 0
                    no_placement = 0 

                    date = mr.xpath('string(td/font/a)')
                    date = date.lstrip()
                    date = date.rstrip()
                    info = mr.xpath('string(td[2]/font)')  

                    #makes sure that date is saved 
                    if len(date.split()) > 0:
                        date = datetime.strptime(date, "%m/%d/%Y")
                        save_date = date

                    #figures out the number of votes for each way
                    #also figures out placement of yes and no voters starts for later iteration
                    if info.split()[0] == 'Yeas':
                                                
                        #yes votes
                        yes_count = info.split()[2]

                        #no votes
                        for voter in range(3, len(info.split())):
                           if info.split()[voter] == '-':
                            no_count = info.split()[voter + 1]
                            no_placement = voter + 2
                            yes_placement = voter - 2
                                 
                    #motion and chamber
                    if info.split()[-1] == 'details':
                        motion = info[0:len(info)-10]
                        motion = motion.lstrip()
                        motion = motion.rstrip()
                        chamber = motion.split()[0]
                        
                        if chamber == "Senate":
                            chamber = "upper"
                        else:
                            chamber = "lower"

                    #pass or not (only by which has more. need to see look up how they are passed)
                    if yes_count > no_count:
                        passed = True
                    else:
                        passed = False

                    vote = Vote(chamber, save_date, motion, passed, int(yes_count), int(no_count), other_count = 0)

                    #adding in yea voters
                    for voters in range(3, yes_placement):
                        legis = ""
                        initials = 0                        

                        #checks to see if the next name is actually an initial
                        if len(info.split()[voters+1]) < 2:
                            legis = legis + info.split()[voters] + " " + info.split()[voters + 1]
                        elif len(info.split()[voters]) < 2:
                            initials = 1
                        else:
                            legis = legis + info.split()[voters]
                        
                        if initials < 1:
                            vote.yes(legis)
                    
                    #adding in no voters
                    for voters in range(no_placement, len(info.split())):
                        legis = ""                                         
                        initials = 0

                        #checks to see if the next name is actually an initial
                        if (info.split()[voters] != info.split()[-1]) and (len(info.split()[voters+1]) < 2):
                            legis = legis + info.split()[voters] + " " + info.split()[voters + 1]
                        elif len(info.split()[voters]) < 2:
                            initals = 1
                        else:
                            legis = legis + info.split()[voters]

                        if initials < 1:
                            vote.no(legis)
                    
                    #gets rid of blank votes
                    if yes_count > 0 or no_count > 0:
                        vote.add_source(vote_url)
                        bill.add_vote(vote)   
Example #21
0
    def scrape_bill(self, chamber, session, bill_id):
        session_id = self.get_session_id(session)
        url = base_url + 'DocumentsForBill.asp?Bill_Number=%s&Session_ID=%s' % (
                                                            bill_id, session_id)
        with self.urlopen(url) as docs_for_bill:
            root = html.fromstring(docs_for_bill)
            bill_title = root.xpath(
                            '//div[@class="ContentPageTitle"]')[1].text.strip()
            # Depending on the progress the bill has made through the house
            # some table might not exist, the links that have javascript:Show****
            # have a table with related documents/calanders/agendas/versions
            # I am skipping the sponsors link because that information is on the
            # bill overview page where all of the actions are found.
            doc_section_links = root.xpath(
                                    '//a[contains(@href, "javascript:Show")]')
            bill = Bill(session, chamber, bill_id, bill_title)
            bill.type = self.get_bill_type(bill_id[:-4])
            bill.add_source(url)
            for link in doc_section_links:
                link_id = utils.parse_link_id(link)
                link_text = link.text_content().strip()
                div_path = '//div[@id="%s"]/table//tr' % link_id
                if link_text == 'Show Versions':
                    # the first row has only a comment
                    for tr in root.xpath(div_path)[1:]:
                        tds = tr.cssselect('td') # list(tr.iterchildren('td'))
                        if len(tds) >= 4:
                            bill_version = tds[1].text_content().strip()
                            bill_html = tds[2].xpath('string(font/a/@href)')
                            bill_pdf = tds[3].xpath('string(font/a/@href)')
                            bill.add_version(bill_version, 
                                                    bill_html, pdf_url=bill_pdf)
                elif link_text == 'Show Summaries/Fact Sheets':
                    for tr in root.xpath(div_path)[1:]:
                        # the first row has only a comment
                        tds = tr.cssselect('td')
                        if len(tds) > 1:
                            fact_sheet = tds[1].text_content().strip()
                            fact_sheet_url = tds[1].xpath(
                                                        'string(font/a/@href)')
                            bill.add_document(fact_sheet,
                                             fact_sheet_url, type="fact sheet")
                elif link_text in ('Show Senate Agendas', 'Show House Agendas'):
                    agenda_type = 'House Agenda' if re.match('House', link_text) else 'Senate Agenda'
                    for tr in root.xpath(div_path)[2:]:
                        # the first row has only a comment
                        # the second row is the table header
                        tds = tr.cssselect('td')
                        if len(tds) >= 8:
                            agenda_committee = tds[0].text_content().strip()
                            agenda_revised = tds[1].text.strip()
                            agenda_cancelled = tds[2].text.strip()
                            agenda_date = tds[3].text_content().strip()
                            agenda_time = tds[4].text_content().strip()
                            agenda_room = tds[5].text_content().strip()
                            agenda_pdf = tds[6].xpath('string(a/@href)').strip()
                            agenda_html = tds[7].xpath('string(a/@href)').strip()
                            bill.add_document(agenda_committee, 
                                                agenda_html, type=agenda_type)
                elif link_text in ('Show Senate Calendars',
                                    'Show House Calendar'):
                    cal_type = 'house calendar' if re.match('House', link_text) else 'senate calendar'
                    for tr in root.xpath(div_path)[2:]:
                        # the first row has only a comment
                        # the second row is the table header
                        tds = tr.cssselect('td')
                        if len(tds) >= 6:
                            calendar_name = tds[0].text_content().strip()
                            calendar_number = tds[1].text_content().strip()
                            calendar_modified = True if tds[2].xpath('img') else False 
                            calendar_date = tds[3].text_content().strip()
                            calendar_html = tds[5].xpath('string(a/@href)')
                            bill.add_document(calendar_name, 
                                                calendar_html, type="calendar")
                elif link_text == 'Show Adopted Amendments':
                    for tr in root.xpath(div_path)[1:]:
                        tds = tr.cssselect('td')
                        amendment_title = tds[1].text_content().strip()
                        amendment_link = tds[2].xpath('string(font/a/@href)')
                        bill.add_document(amendment_title, 
                                            amendment_link, type='amendment')        
                elif link_text == 'Show Proposed Amendments':
                    for tr in root.xpath(div_path)[1:]:
                        tds = tr.cssselect('td')
                        if len(tds) >= 3: 
                            amendment_title = tds[1].text_content().strip()
                            amendment_link = tds[2].xpath('string(font/a/@href)')
                            bill.add_document(amendment_title,
                                               amendment_link, type='amendment')        
                elif link_text == 'Show Bill Videos':
                    for tr in root.xpath(div_path)[2:]:
                        tds = tr.cssselect('td')
                        if len(tds) >= 3:
                            video_title = tds[1].text_content().strip()
                            video_link = tds[2].xpath('string(a/@href)')
                            video_date = tds[0].text_content().strip()
                            bill.add_document(video_title, video_link, 
                                                date=video_date, type='video')

        # action_url = 'http://www.azleg.gov/FormatDocument.asp?inDoc=/legtext/49leg/2r/bills/hb2001o.asp'
        # again the actions page may or may not have a given table and the order
        # of the actions depends on the chamber the bill originated in. 
        ses_num = utils.legislature_to_number(session)
        action_url = base_url + 'FormatDocument.asp?inDoc=/legtext/%s/bills/%so.asp' % (ses_num, bill_id.lower())
        with self.urlopen(action_url) as action_page:
            bill.add_source(action_url)
            root = html.fromstring(action_page)
            action_tables = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')
            for table in action_tables:
                rows = table.cssselect('tr')
                house = False if chamber == 'upper' else True
                action = table.cssselect('td')[0].text_content().strip()[:-1]
                if action == 'SPONSORS':
                    if len(rows[0]) == 4:
                        for row in rows:
                            tds = row.cssselect('td')
                            sponsors = [tds[i:i+2:] for i in range(1, len(tds), 2)]
                            bill.add_sponsor(sponsors[0][1].text_content().strip(), 
                                             sponsors[0][0].text_content().strip(),
                                             sponsor_link=sponsors[0][0].xpath('string(a/@href)'))
                elif action == 'COMMITTEES':
                    # the html for this table has meta tags that give the chamber
                    # and the committee abreviation
                    # <meta name="HCOMMITTEE" content="RULES">
                    # question for actions: in the case of committees would House
                    # Rules be better for an actor? 
                    for row in rows[1:]:
                        tds = row.cssselect('td')
                        meta_tag = row.cssselect('meta')[0]
                        actor = "%s:%s" % (meta_tag.get('name'), meta_tag.get('content'))
                        committee = meta_tag.get('content')
                        act = 'committee:reffered'
                        date = datetime.datetime.strptime(tds[1].text_content().strip(), '%m/%d/%y')
                        bill.add_action(actor, act, date, type='committee:referred')
                        if len(tds) == 5:
                            if re.match('\d{2}/\d{2}/\d{2}', tds[3].text_content().strip()):
                                date = datetime.datetime.strptime(tds[3].text_content().strip(), '%m/%d/%y')
                            else:
                                date = datetime.datetime.strptime(tds[1].text_content().strip(), '%m/%d/%y')
                            act = tds[4].text_content().strip()
                            status = 'other'
                            bill.add_action(actor, act, date, type=status, status=status)
                        elif len(tds) == 6:
                            where, committee = actor.split(':')
                            where = 'lower' if where == 'HCOMMITTEE' else 'upper'
                            date = datetime.datetime.strptime(tds[3].text_content().strip(), '%m/%d/%y')
                            vote = tds[4].text_content().strip()[1:-1]
                            if len(vote.split('-')) == 4:
                                yes, no, nv, exc = vote.split('-')
                            else:
                                yes, no, excused, absent, nv = vote.split('-')
                            motion = tds[5].text_content().strip()
                            passed = True if yes > no else False
                            vote = Vote(where, date, motion, passed, int(yes), int(no), int(nv), committee=committee)
                            vote.add_source(tds[0].xpath('string(a/@href)').strip())
                            
                            bill.add_vote(vote)
                elif action in ('HOUSE FIRST READ', 'HOUSE SECOND READ'):
                    aType = 'other'
                    if re.search('HOUSE FIRST', action):
                        aType = 'committee:referred'
                    bill.add_action('lower', action, utils.get_date(rows[0][1]),
                                     type=aType)
                elif action in ('SENATE FIRST READ', 'SENATE SECOND READ'):
                    aType = 'other'
                    if re.search('SECOND', action):
                        aType = 'committee:referred'
                    bill.add_action('upper', action, utils.get_date(rows[0][1]),
                                     type=aType)
                elif action in ('TRANSMIT TO HOUSE', 'TRANSMIT TO SENATE'):
                    actor = 'lower' if re.match('HOUSE', action) else 'upper'
                    house = True if actor == 'lower' else False
                    date = utils.get_date(rows[0][1])
                    bill.add_action(actor, action, date)
                elif re.match('COW ACTION \d', action):
                    actor = 'lower' if house else 'upper'
                    for row in rows[1:]:
                        date = utils.get_date(row[1])
                        bill.add_action(actor, action, date, motion=row[2].text_content().strip())
                elif action in ('HOUSE FINAL READ', 'SENATE FINAL READ', 'THIRD READ'):
                    actor = 'lower' if house else 'upper'
                    for row in rows[1:]:
                        if row[0].text_content().strip() == 'Vote Detail':
                            if len(row.getchildren()) == 10:
                                detail, date, ayes, nays, nv, exc, emer, rfe, two_thirds, result = [ x.text_content().strip() for x in row ]
                                print action_url
                                passed = True if result == 'PASSED' else False
                                motion = action
                                date = datetime.datetime.strptime(date, '%m/%d/%y') if date else ''
                                vote = Vote(actor, date, motion, passed, int(ayes), int(nays), int(nv),
                                             excused=int(exc), emergency=emer,  rfe=rfe, 
                                             two_thirds_vote=two_thirds, type="passage")
                                vote.add_source(row[0].xpath('string(a/@href)').strip())
                                bill.add_vote(vote)
                            elif len(row.getchildren()) == 11:
                                detail, date, ayes, nays, nv, exc, emer, amend, rfe, two_thirds, result = [ x.text_content().strip() for x in row ]
                                passed = True if result == 'PASSED' else False
                                motion = action
                                date = datetime.datetime.strptime(date, '%m/%d/%y') if date else ''
                                vote = Vote(actor, date, motion, passed, int(ayes), int(nays), int(nv),
                                             excused=int(exc), emergency=emer, amended=amend,
                                              rfe=rfe, two_thirds_vote=two_thirds, type="passage")
                                vote.add_source(row[0].xpath('string(a/@href)').strip())
                                bill.add_vote(vote)
                        
                elif action == 'TRANSMITTED TO':
                    actor = 'lower' if house else 'upper'
                    act = action + ": " + rows[0][1].text_content().strip()
                    date = rows[0][2].text_content().strip()
                    date = datetime.datetime.strptime(date, '%m/%d/%y')
                    bill.add_action(actor, act, date, type='governor:received')
                    # need action and chaptered, chaptered version if they exists
                    act, date, chapter, version = '', '', '', ''
                    for row in rows[1:]:
                        if row[0].text_content().strip() == 'ACTION:':
                            act = row[1].text_content().strip()
                            date = datetime.datetime.strptime(row[2].text_content().strip(), '%m/%d/%y')
                        elif row[0].text_content().strip() == 'CHAPTER':
                            chapter = row[1].text_content().strip()
                        elif row[0].text_content().strip() == 'CHAPTERED VERSION':
                            version = row[1].text_content.strip()
                    if act:
                        action_type = 'governor:signed' if act == 'SIGNED' else 'governor:vetoed'
                        if chapter:
                            bill.add_action('governor', act, date, 
                                            type=action_type, chapter=chapter, 
                                            chaptered_version=version)
                        else:
                            bill.add_action('governor', act, date, 
                                                type=action_type)
        self.save_bill(bill)
        self.log("saved: " + bill['bill_id'])
Example #22
0
    def parse_status(self, bill, url):
        chamber = bill['chamber']
        session = bill['session']
        bill_id = bill['bill_id']
        status = self.soup_parser(self.urlopen(url))
        bill.add_source(url)
        act_table = status.table

        # Get actions
        for row in act_table.findAll('tr')[1:]:
            act_date = row.td.find(text=True)
            act_date = dt.datetime.strptime(act_date, "%m/%d/%Y")
            action = row.findAll('td')[1].find(text=True)

            # If not specified, assume action occurred
            # in originating house
            actor = chamber

            split_action = action.split('/')
            if len(split_action) > 1:
                actor = split_action[0]

                if actor == 'House':
                    actor = 'lower'
                elif actor == 'Senate':
                    actor = 'upper'
                elif actor == 'LFA':
                    actor = 'Office of the Legislative Fiscal Analyst'

                action = '/'.join(split_action[1:]).strip()

            if action == 'Governor Signed':
                actor = 'Governor'

            bill.add_action(actor, action, act_date)

            # Check if this action is a vote
            links = row.findAll('a')
            if len(links) > 1:
                vote_url = links[-1]['href']

                # Committee votes are of a different format that
                # we don't handle yet
                if not vote_url.endswith('txt'):
                    continue

                vote_url = '/'.join(url.split('/')[:-1]) + '/' + vote_url
                vote_page = self.urlopen(vote_url)

                vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                                    '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                                     '(\d+)(.*)',
                                    re.MULTILINE | re.DOTALL)
                match = vote_re.search(vote_page)
                yes_count = match.group(1)
                no_count = match.group(3)
                other_count = match.group(6)

                if int(yes_count) > int(no_count):
                    passed = True
                else:
                    passed = False

                if actor == 'upper' or actor == 'lower':
                    vote_chamber = actor
                    vote_location = ''
                else:
                    vote_chamber = ''
                    vote_location = actor

                vote = Vote(vote_chamber, act_date,
                            action, passed, yes_count, no_count,
                            other_count,
                            location=vote_location)
                vote.add_source(vote_url)

                yes_votes = re.split('\s{2,}', match.group(2).strip())
                no_votes = re.split('\s{2,}', match.group(4).strip())
                other_votes = re.split('\s{2,}', match.group(7).strip())

                map(vote.yes, yes_votes)
                map(vote.no, no_votes)
                map(vote.other, other_votes)

                bill.add_vote(vote)
Example #23
0
 def scrape_votes(self, chamber, url, bill, date, **kwargs):
     """
     Scrapes the votes from a vote detail page with the legislator's names
     this handles all of the votes and expects the following keyword
     arguments: motion ... hmm I guess thats it :)
     """
     o_args = {}
     passed = '' # to test if we need to compare vote counts later
     v_type = kwargs.pop('type')
     if 'passed' in kwargs:
         passed = {'PASSED': True, 'FAILED': False}[kwargs.pop('passed')]
     if 'AMEND' in kwargs:
         o_args['amended'] = kwargs.pop('AMEND').text_content().strip()
     if 'motion' in kwargs:
         motion = kwargs.pop('motion')
     if 'EMER' in kwargs and kwargs['EMER'].text_content().strip():
         o_args['EMER'] = kwargs.pop('EMER').text_content().strip()
     if '2/3 VOTE' in kwargs and kwargs['2/3 VOTE'].text_content().strip():
         o_args['2/3 VOTE'] = kwargs.pop('2/3 VOTE').text_content().strip()
     if 'committee' in kwargs:
         o_args['committee'] = utils.get_committee_name(kwargs.pop('committee'),
                                                         chamber)
     
     with self.urlopen(url) as vote_page:
         root = html.fromstring(vote_page)
         vote_table = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')[0]
         vote_count = vote_table.xpath('following-sibling::p/following-sibling::text()')
         vote_string = vote_count[0].replace(u'\xa0', '').strip()
         v_count = re.compile(r'\b[A-Z]*\s*[A-z]*:\s\d*')
         v_list = v_count.findall(vote_string)
         o_count = 0
         for x in v_list:
             k, v = x.split(':')
             # make NOT VOTING not_voting
             k = k.strip().replace(' ', '_').lower()
             v = int(v.strip())
             if k == 'ayes':
                 yes_count = int(v)
             elif k == 'nays':
                 no_count = int(v)
             else:
                 o_args.update({str(k):v})
                 o_count += int(v)
         if passed == '':
             passed = yes_count > no_count
             if 'committee' not in o_args:
                 if chamber == 'upper' and passed:
                     if 'EMER' in o_args or '2/3 VOTE' in o_args:
                         passed = yes_count > 20
                     else:
                         passed = yes_count > 16
                 elif chamber == 'lower' and passed:
                     if 'EMER' in o_args or '2/3 VOTE' in o_args:
                         passed = yes_count > 40
                     else:
                         passed = yes_count > 31
                         
         vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                     o_count, type=v_type, **o_args)
         vote.add_source(url)
         # grab all the tables descendant tds
         tds = vote_table.xpath('descendant::td')
         # pair 'em up
         matched = [ tds[y:y+2] for y in range(0, len(tds), 2) ]
         for name, v in iter(matched):
             v = v.text_content().strip()
             name = name.text_content().strip()
             if name == 'Member Name':
                 continue
             if v == 'Y':
                 vote.yes(name)
             elif v == 'N':
                 vote.no(name)
             else:
                 vote.other(name)
         bill.add_vote(vote)
Example #24
0
    def get_vote(self, bill, url):
        url = 'http://www.ncga.state.nc.us' + url + '&bPrintable=true'
        chamber = {'H': 'lower', 'S': 'upper'}[
            re.findall('sChamber=(\w)', url)[0]]

        data = self.urlopen(url)
        soup = self.soup_parser(data)

        motion = soup.findAll('a', href=re.compile('BillLookUp\.pl'))[0] \
                     .findParents('tr', limit=1)[0].findAll('td')[1] \
                     .font.contents[-1]

        vote_time = soup.findAll('b', text='Time:')[0].next.strip()
        vote_time = dt.datetime.strptime(vote_time, '%b %d %Y  %I:%M%p')

        vote_mess = soup.findAll('td', text=re.compile('Total Votes:'))[0]
        (yeas, noes, nots, absent, excused) = map(lambda x: int(x),
                                                  re.findall(
                'Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. '
                'Absent: (\d+)\s+Exc. Vote: (\d+)', vote_mess, re.U)[0])

        # chamber, date, motion, passed, yes_count, no_count, other_count
        v = Vote(chamber, vote_time, motion, (yeas > noes),
                 yeas, noes, nots + absent + excused)

        # eh, it's easier to just get table[2] for this..
        vote_table = soup.findAll('table')[2]

        for row in vote_table.findAll('tr'):
            if 'Democrat' in self.flatten(row):
                continue

            cells = row.findAll('td')
            if len(cells) == 1:
                # I can't find any examples of ties in the House,
                # nor information on who would break them.
                if not self.lt_gov and chamber == 'upper':
                    full_name = soup.findAll(
                        'td', text=re.compile('Lieutenant Governor'))[0] \
                        .parent.findAll('span')[0].contents[0]
                    (first_name, last_name, middle_name, suffix) = split_name(
                        full_name)

                    self.lt_gov = Person(full_name, first_name=first_name,
                                         last_name=last_name,
                                         middle_name=middle_name,
                                         suffix=suffix)

                    self.lt_gov.add_role('Lieutenant Governor',
                                         bill['session'])

                    self.save_person(self.lt_gov)

                if 'VOTES YES' in self.flatten(cells[0]):
                    v['passed'] = True
                    v.yes(full_name)
                else:
                    v['passed'] = False
                    v.no(full_name)
                continue
            elif len(cells) == 2:
                vote_type, a = cells
                bunch = [self.flatten(a)]
            elif len(cells) == 3:
                vote_type, d, r = cells
                bunch = [self.flatten(d), self.flatten(r)]
            else:
                continue

            # why doesn't .string work? ... bleh.
            vote_type = vote_type.font.b.contents[0]

            if 'Ayes' in vote_type:
                adder = v.yes
            elif 'Noes' in vote_type:
                adder = v.no
            else:
                adder = v.other

            for party in bunch:
                party = map(lambda x: x.replace(
                        ' (SPEAKER)', ''), party[
                        (party.index(':') + 1):].split(';'))

                if party[0] == 'None':
                    party = []

                for x in party:
                    adder(x)

        v.add_source(url)
        bill.add_vote(v)
Example #25
0
    def parse_vote_details(self, url):
        """
        Grab the details of a specific vote, such as how each legislator
        voted.
        """

        def find_vote(letter):
            return vote_page.findAll('span', {'class': 'font8text'},
                                     text=letter)

        with self.urlopen(url) as vote_page:
            vote_page = BeautifulSoup(vote_page)
            header = vote_page.find('div', {'class': 'subHdrGraphic'})

            if 'Senate' in header.string:
                chamber = 'upper'
            else:
                chamber = 'lower'

            # we'll use the link back to the bill as a base to
            # get the motion/date
            linkback = vote_page.find(
                'a', href=re.compile('billinfo')).parent.parent
            date = linkback.find('div').string
            date = dt.datetime.strptime(date, "%A, %B %d, %Y")
            motion = linkback.findNextSibling('div')
            if motion.a:
                motion = "%s %s" % (motion.a.string,
                                    motion.contents[-1].string.strip())
            elif motion.span:
                motion = "%s %s" % (motion.span.string.strip(),
                                    motion.contents[-1].string.strip())
            else:
                motion = motion.string.strip().replace('&nbsp;', '')

            yes_count = int(vote_page.find('div', text='YEAS').next.string)
            no_count = int(vote_page.find('div', text='NAYS').next.string)
            lve_count = int(vote_page.find('div', text='LVE').next.string)
            nv_count = int(vote_page.find('div', text='N/V').next.string)
            other_count = lve_count + nv_count

            passed = yes_count > no_count
            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote.add_source(url)

            # find the votes by the inner text. because background colors lie.
            yes_votes = [vote.yes, find_vote('Y')]
            no_votes = [vote.no, find_vote('N')]
            nv_votes = [vote.other, find_vote('E') + find_vote('X')]

            for (action, votes) in (yes_votes, no_votes, nv_votes):
                for a_vote in votes:
                    action(a_vote.parent.findNextSibling('span').string)

            if len(vote['yes_votes']) != yes_count:
                raise ScrapeError('wrong yes count %d/%d' %
                                  (len(vote['yes_votes']), yes_count))
            if len(vote['no_votes']) != no_count:
                raise ScrapeError('wrong no count %d/%d' %
                                  (len(vote['no_votes']), no_count))
            if len(vote['other_votes']) != other_count:
                raise ScrapeError('wrong other count %d/%d' %
                                  (len(vote['other_votes']), other_count))
        return vote
Example #26
0
    def get_vote(self, bill, url):
        url = "http://www.ncga.state.nc.us" + url + "&bPrintable=true"
        chamber = {"H": "lower", "S": "upper"}[re.findall("sChamber=(\w)", url)[0]]

        data = self.urlopen(url)
        soup = self.soup_parser(data)

        motion = (
            soup.findAll("a", href=re.compile("BillLookUp\.pl"))[0]
            .findParents("tr", limit=1)[0]
            .findAll("td")[1]
            .font.contents[-1]
        )

        vote_time = soup.findAll("b", text="Time:")[0].next.strip()
        vote_time = dt.datetime.strptime(vote_time, "%b %d %Y  %I:%M%p")

        vote_mess = soup.findAll("td", text=re.compile("Total Votes:"))[0]
        (yeas, noes, nots, absent, excused) = map(
            lambda x: int(x),
            re.findall(
                "Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. " "Absent: (\d+)\s+Exc. Vote: (\d+)", vote_mess, re.U
            )[0],
        )

        # chamber, date, motion, passed, yes_count, no_count, other_count
        v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused)

        # eh, it's easier to just get table[2] for this..
        vote_table = soup.findAll("table")[2]

        for row in vote_table.findAll("tr"):
            if "Democrat" in self.flatten(row):
                continue

            cells = row.findAll("td")
            if len(cells) == 1:
                # I can't find any examples of ties in the House,
                # nor information on who would break them.
                if not self.lt_gov and chamber == "upper":
                    full_name = (
                        soup.findAll("td", text=re.compile("Lieutenant Governor"))[0]
                        .parent.findAll("span")[0]
                        .contents[0]
                    )
                    (first_name, last_name, middle_name, suffix) = split_name(full_name)

                    self.lt_gov = Person(
                        full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix
                    )

                    self.lt_gov.add_role("Lieutenant Governor", bill["session"])

                    self.save_person(self.lt_gov)

                if "VOTES YES" in self.flatten(cells[0]):
                    v["passed"] = True
                    v.yes(full_name)
                else:
                    v["passed"] = False
                    v.no(full_name)
                continue
            elif len(cells) == 2:
                vote_type, a = cells
                bunch = [self.flatten(a)]
            elif len(cells) == 3:
                vote_type, d, r = cells
                bunch = [self.flatten(d), self.flatten(r)]
            else:
                continue

            # why doesn't .string work? ... bleh.
            vote_type = vote_type.font.b.contents[0]

            if "Ayes" in vote_type:
                adder = v.yes
            elif "Noes" in vote_type:
                adder = v.no
            else:
                adder = v.other

            for party in bunch:
                party = map(lambda x: x.replace(" (SPEAKER)", ""), party[(party.index(":") + 1) :].split(";"))

                if party[0] == "None":
                    party = []

                for x in party:
                    adder(x)

        v.add_source(url)
        bill.add_vote(v)