Example #1
0
 def apply_votes(self, bill):
     """Given a bill (and assuming it has a status_url in its dict), parse all of the votes
     """
     bill_votes = votes.all_votes_for_url(self, bill['status_url'])
     for (chamber,vote_desc,pdf_url,these_votes) in bill_votes:
         try:
             date = vote_desc.split("-")[-1]
         except IndexError:
             self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc))
             continue
         yes_votes = []
         no_votes = []
         other_votes = []
         for voter,vote in these_votes.iteritems():
             if vote == 'Y': 
                 yes_votes.append(voter)
             elif vote == 'N': 
                 no_votes.append(voter)
             else:
                 other_votes.append(voter)
         passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf
         vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url)
         for voter in yes_votes:
             vote.yes(voter)
         for voter in no_votes:
             vote.no(voter)
         for voter in other_votes:
             vote.other(voter)
         bill.add_vote(vote)
Example #2
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(', ')[1]

            if location.startswith('House'):
                chamber = 'lower'
            elif location.startswith('Senate'):
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = ' '.join(location.split(' ')[1:]).strip()
            if not committee or committee.startswith('of Representatives'):
                committee = None

            motion = ', '.join(header.split(', ')[2:]).strip()

            yes_count = int(
                page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote['type'] = type

            if committee:
                vote['committee'] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == 'Yea':
                    vote.yes(td.getprevious().text.strip())
                elif td.text == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif td.text in ('Excused', 'Absent'):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Example #3
0
    def parse_vote_new(self, bill, chamber, url):
        vote_page = BeautifulSoup(self.urlopen(url))
        table = vote_page.table
        info_row = table.findAll('tr')[1]

        date = info_row.td.contents[0]
        date = dt.datetime.strptime(date, '%m/%d/%Y')
        motion = info_row.findAll('td')[1].contents[0]
        yes_count = int(info_row.findAll('td')[2].contents[0])
        no_count = int(info_row.findAll('td')[3].contents[0])
        abs_count = int(info_row.findAll('td')[4].contents[0])
        passed = info_row.findAll('td')[5].contents[0] == 'Pass'

        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, abs_count)
        vote.add_source(url)

        for tr in table.findAll('tr')[3:]:
            if len(tr.findAll('td')) != 2:
                continue

            name = tr.td.contents[0].split(' of')[0]
            type = tr.findAll('td')[1].contents[0]
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            else:
                vote.other(name)

        bill.add_vote(vote)
Example #4
0
    def scrape(self, chamber, session):
        self.validate_session(session)

        if chamber == 'upper':
            other_chamber = 'lower'
            bill_id = 'SB 1'
        else:
            other_chamber = 'upper'
            bill_id = 'HB 1'

        b1 = Bill(session, chamber, bill_id, 'A super bill')
        b1.add_source('http://example.com/')
        b1.add_version('As Introduced', 'http://example.com/SB1.html')
        b1.add_document('Google', 'http://google.com')
        b1.add_sponsor('primary', 'Bob Smith')
        b1.add_sponsor('secondary', 'Johnson, Sally')

        d1 = datetime.datetime.strptime('1/29/2010', '%m/%d/%Y')
        v1 = Vote('upper', d1, 'Final passage', True, 2, 0, 0)
        v1.yes('Smith')
        v1.yes('Johnson')

        d2 = datetime.datetime.strptime('1/30/2010', '%m/%d/%Y')
        v2 = Vote('lower', d2, 'Final passage', False, 0, 1, 1)
        v2.no('Bob Smith')
        v2.other('S. Johnson')

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, 'introduced', d1)
        b1.add_action(chamber, 'read first time', d2)
        b1.add_action(other_chamber, 'introduced', d2)

        self.save_bill(b1)
Example #5
0
    def scrape_vote(self, bill, name, url):
        match = re.match('^(Senate|House) Vote on [^,]*,(.*)$', name)

        if not match:
            return

        chamber = {'Senate': 'upper', 'House': 'lower'}[match.group(1)]
        motion = match.group(2).strip()

        if motion.startswith('FINAL PASSAGE'):
            type = 'passage'
        elif motion.startswith('AMENDMENT'):
            type = 'amendment'
        elif 'ON 3RD READINT' in motion:
            type = 'reading:3'
        else:
            type = 'other'

        vote = Vote(chamber, None, motion, None,
                    None, None, None)
        vote['type'] = type
        vote.add_source(url)

        with self.urlopen(url) as text:
            (fd, temp_path) = tempfile.mkstemp()
            with os.fdopen(fd, 'wb') as w:
                w.write(text)
            html = pdf_to_lxml(temp_path)
            os.remove(temp_path)

            vote_type = None
            total_re = re.compile('^Total--(\d+)$')
            body = html.xpath('string(/html/body)')
            for line in body.replace(u'\xa0', '\n').split('\n'):
                line = line.replace(' ', '').strip()
                if not line:
                    continue

                if line in ('YEAS', 'NAYS', 'ABSENT'):
                    vote_type = {'YEAS': 'yes', 'NAYS': 'no',
                                 'ABSENT': 'other'}[line]
                elif vote_type:
                    match = total_re.match(line)
                    if match:
                        vote['%s_count' % vote_type] = int(match.group(1))
                    elif vote_type == 'yes':
                        vote.yes(line)
                    elif vote_type == 'no':
                        vote.no(line)
                    elif vote_type == 'other':
                        vote.other(line)

        # The PDFs oddly don't say whether a vote passed or failed.
        # Hopefully passage just requires yes_votes > not_yes_votes
        if vote['yes_count'] > (vote['no_count'] + vote['other_count']):
            vote['passed'] = True
        else:
            vote['passed'] = False

        bill.add_vote(vote)
Example #6
0
    def scrape_vote(self, bill, name, url):
        match = re.match("^(Senate|House) Vote on [^,]*,(.*)$", name)

        if not match:
            return

        chamber = {"Senate": "upper", "House": "lower"}[match.group(1)]
        motion = match.group(2).strip()

        if motion.startswith("FINAL PASSAGE"):
            type = "passage"
        elif motion.startswith("AMENDMENT"):
            type = "amendment"
        elif "ON 3RD READINT" in motion:
            type = "reading:3"
        else:
            type = "other"

        vote = Vote(chamber, None, motion, None, None, None, None)
        vote["type"] = type
        vote.add_source(url)

        with self.urlopen(url) as text:
            (fd, temp_path) = tempfile.mkstemp()
            with os.fdopen(fd, "wb") as w:
                w.write(text)
            html = pdf_to_lxml(temp_path)
            os.remove(temp_path)

            vote_type = None
            total_re = re.compile("^Total--(\d+)$")
            body = html.xpath("string(/html/body)")
            for line in body.replace(u"\xa0", "\n").split("\n"):
                line = line.replace(" ", "").strip()
                if not line:
                    continue

                if line in ("YEAS", "NAYS", "ABSENT"):
                    vote_type = {"YEAS": "yes", "NAYS": "no", "ABSENT": "other"}[line]
                elif vote_type:
                    match = total_re.match(line)
                    if match:
                        vote["%s_count" % vote_type] = int(match.group(1))
                    elif vote_type == "yes":
                        vote.yes(line)
                    elif vote_type == "no":
                        vote.no(line)
                    elif vote_type == "other":
                        vote.other(line)

        # The PDFs oddly don't say whether a vote passed or failed.
        # Hopefully passage just requires yes_votes > not_yes_votes
        if vote["yes_count"] > (vote["no_count"] + vote["other_count"]):
            vote["passed"] = True
        else:
            vote["passed"] = False

        bill.add_vote(vote)
Example #7
0
def record_votes(root):
    for el in root.xpath(u'//p[starts-with(., "Yeas \u2014")]'):
        text = ''.join(el.getprevious().itertext())
        text.replace('\n', ' ')
        m = re.search(r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+'
                      '(?P<type>adopted|passed'
                      '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+'
                      'by\W+\(Record\W+(?P<record>\d+)\):\W+'
                      '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+'
                      '(?P<present>\d+)\W+Present', text)
        if m:
            yes_count = int(m.group('yeas'))
            no_count = int(m.group('nays'))
            other_count = int(m.group('present'))

            bill_id = m.group('bill_id')
            if bill_id.startswith('H') or bill_id.startswith('CSHB'):
                bill_chamber = 'lower'
            elif bill_id.startswith('S') or bill_id.startswith('CSSB'):
                bill_chamber = 'upper'
            else:
                continue

            motion = get_motion(m)
            type = get_type(motion)

            vote = Vote(None, None, motion, True,
                        yes_count, no_count, other_count)
            vote['bill_id'] = bill_id
            vote['bill_chamber'] = bill_chamber
            vote['session'] = '81'
            vote['method'] = 'record'
            vote['record'] = m.group('record')
            vote['filename'] = m.group('record')
            vote['type'] = type

            for name in names(el):
                vote.yes(name)

            el = el.getnext()
            if el.text and el.text.startswith('Nays'):
                for name in names(el):
                    vote.no(name)
                el = el.getnext()

            while el.text and re.match(r'Present|Absent', el.text):
                for name in names(el):
                    vote.other(name)
                el = el.getnext()

            vote['other_count'] = len(vote['other_votes'])
            yield vote
        else:
            pass
Example #8
0
    def scrape_votes(self, bill, bill_type, number, session):
        vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' +
                    session + '_' + bill_type + '_' + str(number))

        with self.urlopen(vote_url) as page:
            page = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

            for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
                date = datetime.datetime.strptime(jlink.text,
                                                  "%m/%d/%Y").date()

                details = jlink.xpath("string(../../../td[2])")

                chamber = details.split(" - ")[0]
                if chamber == 'House':
                    chamber = 'lower'
                elif chamber == 'Senate':
                    chamber = 'upper'
                else:
                    raise ScrapeError("Bad chamber: %s" % chamber)

                motion = details.split(" - ")[1].split("\n")[0].strip()

                vote_row = jlink.xpath("../../..")[0].getnext()

                yea_div = vote_row.xpath(
                    "td/font/div[contains(@id, 'Yea')]")[0]
                yeas = []
                for td in yea_div.xpath("table/tr/td"):
                    name = td.xpath("string()")
                    if name:
                        yeas.append(name)

                no_div = vote_row.xpath(
                    "td/font/div[contains(@id, 'Nay')]")[0]
                nays = []
                for td in no_div.xpath("table/tr/td"):
                    name = td.xpath("string()")
                    if name:
                        nays.append(name)

                yes_count = len(yeas)
                no_count = len(nays)

                vote = Vote(chamber, date, motion, yes_count > no_count,
                            yes_count, no_count, 0)

                for yes in yeas:
                    vote.yes(yes)
                for no in nays:
                    vote.no(no)

                bill.add_vote(vote)
Example #9
0
    def get_text_vote_results(self, bill, vote_date, motion_name, vote_data):
        vote = Vote(bill['chamber'], vote_date, motion_name, None, 0, 0, 0)
        counting_yeas = False
        counting_nays = False

        for line in vote_data.splitlines():
            if line.find("Motion:") == 0:
                line = line.strip().upper()
                for x in ['DO CONCUR', 'DO PASS', 'DO ADOPT', ]:
                    if line.find(x) >= 0:
                        vote['passed'] = True
            elif ((line.find("Yeas:") == 0) or (line.find("Ayes:") == 0)):
                counting_yeas = True
                counting_nays = False
            elif ((line.find("Nays:") == 0) or (line.find("Noes") == 0)):
                counting_yeas = False
                counting_nays = True
            elif line.find("Total ") == 0:
                if not (counting_yeas or counting_nays):
                    vote['other_count'] += int(line.split()[1].strip())
            elif line == '':
                counting_yeas = False
                counting_nays = False

            if counting_yeas:
                if line.find("Total ") == 0:
                    vote['yes_count'] = int(line.split()[1].strip())
                    line = ""
                if line.find(":") != -1:
                    line = line[line.find(":")+1:]
                for name in line.split(","):
                    name = name.strip()
                    if name != '':
                        if name[-1] == '.':
                            name = name[0:-1]
                        vote.yes(name)

            if counting_nays:
                if line.find("Total ") == 0:
                    vote['no_count'] = int(line.split()[1].strip())
                    line = ""
                if line.find(":") != -1:
                    line = line[line.find(":")+1:]
                for name in line.split(","):
                    name = name.strip()
                    if name != '':
                        if name[-1] == '.':
                            name = name[0:-1]
                        vote.no(name)

        return vote
Example #10
0
    def scrape_old_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.h3.contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        def get_count(cell):
            if len(cell.contents) == 0:
                return 0
            else:
                return int(cell.contents[0])

        results_tbl = vote_page.findAll('table')[1]
        yes_count = get_count(results_tbl.findAll('td')[1])
        no_count = get_count(results_tbl.findAll('td')[3])
        excused_count = get_count(results_tbl.findAll('td')[5])
        absent_count = get_count(results_tbl.findAll('td')[7])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.table
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #11
0
    def scrape_votes(self, bill, sponsor, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = raw_vote_data.strip().split('%s by %s - ' % (bill['bill_id'], sponsor))[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
                motion = raw_vote[0]

                vote_date = re.search('(\d+/\d+/\d+)', motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y') 

                passed = ('Passed' in motion) or ('Adopted' in raw_vote[1])
                vote_regex = re.compile('\d+$')
                aye_regex = re.compile('^.+voting aye were: (.+) -')
                no_regex = re.compile('^.+voting no were: (.+) -')
                yes_count = None
                no_count = None
                other_count = 0
                ayes = []
                nos = []
                
                for v in raw_vote[1:]:
                    if v.startswith('Ayes...') and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith('Noes...') and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(', ')
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(', ')

                if yes_count and no_count:
                    passed = yes_count > no_count
                else:
                    yes_count = no_count = 0


                vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count) 
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                bill.add_vote(vote)

        return bill
Example #12
0
    def scrape_lower_vote(self, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            table = page.xpath("/html/body/table/tr[3]/td/table/tr/"
                               "td[3]/table/tr/td/table[3]")[0]

            motion = ""
            for part in ("Amendment Number", "Reading Number",
                         "Floor Actions"):
                motion += page.xpath("string(//*[contains(text(), '%s')])" %
                                     part).strip() + " "

            motion = motion.strip()

            date = page.xpath(
                'string(//*[contains(text(), "Date:")]/following-sibling::*)')
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            yeas = page.xpath('string(//*[contains(text(), "Yeas")])')
            yeas = int(yeas.split(' - ')[1])

            nays = page.xpath('string(//*[contains(text(), "Nays")])')
            nays = int(nays.split(' - ')[1])

            nv = page.xpath('string(//*[contains(text(), "Not Voting")])')
            nv = int(nv.split(' - ')[1])

            passed = yeas > (nays + nv)

            vote = Vote('lower', date, motion, passed, yeas, nays, nv)
            vote.add_source(url)

            for tr in table.xpath("tr/td/table/tr"):
                text = tr.xpath("string()")
                text = re.sub(r"\s+", r" ", text)

                name = " ".join(text.split()[1:])

                if text[0] == "Y":
                    vote.yes(name)
                elif text[0] == "N":
                    vote.no(name)
                elif text[0] in ("-", "C"):
                    vote.other(name)

            return vote
Example #13
0
    def scrape_new_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        yes_count = int(vote_page.find(
            id="ctl00_contentMain_tdAyes").contents[0])
        no_count = int(vote_page.find(
            id="ctl00_contentMain_tdNays").contents[0])
        excused_count = int(vote_page.find(
            id="ctl00_contentMain_tdExcused").contents[0])
        absent_count = int(vote_page.find(
            id="ctl00_contentMain_tdAbsent").contents[0])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes")
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #14
0
    def parse_vote(self, bill, actor, date, motion, url):
        with self.urlopen(url) as page:
            vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                                 '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                                 '(\d+)(.*)',
                                 re.MULTILINE | re.DOTALL)
            match = vote_re.search(page)
            yes_count = int(match.group(1))
            no_count = int(match.group(3))
            other_count = int(match.group(6))

            if yes_count > no_count:
                passed = True
            else:
                passed = False

            if actor == 'upper' or actor == 'lower':
                vote_chamber = actor
                vote_location = ''
            else:
                vote_chamber = ''
                vote_location = actor

            vote = Vote(vote_chamber, date,
                        motion, passed, yes_count, no_count,
                        other_count,
                        location=vote_location)
            vote.add_source(url)

            yes_votes = re.split('\s{2,}', match.group(2).strip())
            no_votes = re.split('\s{2,}', match.group(4).strip())
            other_votes = re.split('\s{2,}', match.group(7).strip())

            for yes in yes_votes:
                if yes:
                    vote.yes(yes)
            for no in no_votes:
                if no:
                    vote.no(no)
            for other in other_votes:
                if other:
                    vote.other(other)

            bill.add_vote(vote)
Example #15
0
    def scrape_vote(self, bill, chamber, url):
        with self.urlopen(url) as page:
            page = page.replace('&nbsp;', ' ')
            page = lxml.html.fromstring(page)

            info_row = page.xpath("//table[1]/tr[2]")[0]

            date = info_row.xpath("string(td[1])")
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            motion = info_row.xpath("string(td[2])")
            yes_count = int(info_row.xpath("string(td[3])"))
            no_count = int(info_row.xpath("string(td[4])"))
            other_count = int(info_row.xpath("string(td[5])"))
            passed = info_row.xpath("string(td[6])") == 'Pass'

            if motion == 'Shall the bill pass?':
                type = 'passage'
            elif motion == 'Shall the bill be read the third time?':
                type = 'reading:3'
            elif 'be amended as' in motion:
                type = 'amendment'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed,
                        yes_count, no_count, other_count)
            vote.add_source(url)

            for tr in page.xpath("//table[1]/tr")[3:]:
                if len(tr.xpath("td")) != 2:
                    continue

                name = tr.xpath("string(td[1])").split(' of')[0]

                type = tr.xpath("string(td[2])").strip()
                if type == 'Yea':
                    vote.yes(name)
                elif type == 'Nay':
                    vote.no(name)
                else:
                    vote.other(name)

            bill.add_vote(vote)
Example #16
0
    def scrape_votes(self, bill_page, bill, insert, year):
        root = lxml.html.fromstring(bill_page)
        for link in root.xpath('//a[contains(text(), "Passage")]'):
            motion = link.text
            if 'Assembly' in motion:
                chamber = 'lower'
            else:
                chamber = 'upper'
            vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (
                insert, link.get('href'))
            bill.add_source(vote_url)
            with self.urlopen(vote_url) as page:
                page = page.decode("utf8").replace(u"\xa0", " ")
                root = lxml.html.fromstring(page)

                date = root.xpath('string(/html/body/center/font)').split()[-1]
                date = date + "-" + str(year)
                date = datetime.strptime(date, "%m-%d-%Y")
                yes_count = int(root.xpath('string(/html/body/center/table/tr/td[1])').split()[0])
                no_count = int(root.xpath('string(/html/body/center/table/tr/td[2])').split()[0])
                excused = int(root.xpath('string(/html/body/center/table/tr/td[3])').split()[0])
                not_voting = int(root.xpath('string(/html/body/center/table/tr/td[4])').split()[0])
                absent = int(root.xpath('string(/html/body/center/table/tr/td[5])').split()[0])
                other_count = excused + not_voting + absent
                passed = yes_count > no_count

                vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                            other_count, not_voting=not_voting, absent=absent)

                for el in root.xpath('/html/body/table[2]/tr'):
                    name = el.xpath('string(td[1])').strip()
                    full_name = ''
                    for part in name:
                        full_name = full_name + part + " "
                    name = str(name)
                    vote_result = el.xpath('string(td[2])').split()[0]

                    if vote_result == 'Yea':
                        vote.yes(name)
                    elif vote_result == 'Nay':
                        vote.no(name)
                    else:
                        vote.other(name)
                bill.add_vote(vote)
Example #17
0
    def scrape_votes(self, bill_page, bill, chamber, insert, motion, year):
        root = lxml.etree.fromstring(bill_page, lxml.etree.HTMLParser())
        url_path = ('/html/body/div[@id="content"]/table[5]/tr/td/a')
        for mr in root.xpath(url_path):
            url_end = mr.xpath('string(@href)')
            vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (insert, url_end)
            bill.add_source(vote_url)    
            with self.urlopen(vote_url) as page:
                page = page.decode("utf8").replace(u"\xa0", " ")
                root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                date = root.xpath('string(/html/body/center/font)').split()[-1]
                date = date + "-" + str(year)
                date = datetime.strptime(date, "%m-%d-%Y")
                yes_count = root.xpath('string(/html/body/center/table/tr/td[1])').split()[0]
                no_count = root.xpath('string(/html/body/center/table/tr/td[2])').split()[0]
                excused = root.xpath('string(/html/body/center/table/tr/td[3])').split()[0]
                not_voting = root.xpath('string(/html/body/center/table/tr/td[4])').split()[0]
                absent = root.xpath('string(/html/body/center/table/tr/td[5])').split()[0]
                other_count = 0    
                if yes_count > no_count:
                    passed = True
                else:
                    passed = False
                
                vote = Vote(chamber, date, motion, passed, int(yes_count), int(no_count), other_count, not_voting = int(not_voting), absent = int(absent))

                for el in root.xpath('/html/body/table[2]/tr'):
                    name = el.xpath('string(td[1])').strip()
                    full_name = ''
                    for part in name:
                        full_name = full_name + part + " "
                    name = str(name)
                    vote_result = el.xpath('string(td[2])').split()[0]
                        
                    if vote_result == 'Yea':
                        vote.yes(name)
                    elif vote_result == 'Nay':
                        vote.no(name)
                    else:
                        vote.other(name)
                bill.add_vote(vote)
Example #18
0
 def scrape_votes(self, vote_text, vote_url, house, date, bill):
     votes_parts = vote_text.split(";")
     voters = []
                             
     motion_text, sep, after = vote_text.partition(" The votes were as follows:")
                             
     for vp in votes_parts:
         before, sep, after = vp.partition("(s)")
         voters_list = after.split(", ")
         voters_list[0] = voters_list[0].lstrip(" ")
         voters_list[-1] = voters_list[-1].rstrip(". ")                          
         voters.append(voters_list)
                             
     #Ayes, Ayes with reservations, Noes, Excused
                             
     vote_counts = [0, 0, 0, 0]
                             
     for i, t in enumerate(votes_parts):
         match = re.search("[0-9]+", t)
         if (match != None):
             vote_counts[i] = int(match.group(0))
                             
     if(house == 'H'):
         vote_house = "lower"
     else:
         vote_house = "upper"
                             
     vote = Vote(vote_house, date, motion_text, True, \
             vote_counts[0], vote_counts[2], vote_counts[1] + vote_counts[3])
     vote.add_source(vote_url)
                             
     for yes_voter in voters[0]:
         vote.yes(yes_voter)
     for no_voter in voters[2]:
         vote.no(no_voter)
     for other_voter in voters[1]:
         vote.other(other_voter)
     for other_voter in voters[2]:
         vote.other(other_voter)  
     
     bill.add_vote(vote)    
Example #19
0
    def scrape_votes(self, bill_url, bill, chamber, insert, motion):
        with self.urlopen(bill_url) as page:
            root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())
            url_path = ('/html/body/div[@id="content"]/table[5]/tr/td/a')
            for mr in root.xpath(url_path):
                url_end = mr.xpath('string(@href)')
                vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (insert, url_end)
                
                with self.urlopen(vote_url) as page:
                    root = lxml.etree.fromstring(page, lxml.etree.HTMLParser())

                    date = root.xpath('string(/html/body/center/font)').split()[-1]
                    yes_count = root.xpath('string(/html/body/center/table/tr/td[1])').split()[0]
                    no_count = root.xpath('string(/html/body/center/table/tr/td[2])').split()[0]
                    excused = root.xpath('string(/html/body/center/table/tr/td[3])').split()[0]
                    not_voting = root.xpath('string(/html/body/center/table/tr/td[4])').split()[0]
                    absent = root.xpath('string(/html/body/center/table/tr/td[5])').split()[0]
                    
                    if yes_count > no_count:
                        passed = True
                    else:
                        passed = False

                    vote = Vote(chamber, date, motion, passed, yes_count, no_count, '', not_voting = not_voting, absent = absent)

                    for el in root.xpath('/html/body/table[2]/tr'):
                        name = el.xpath('string(td[1])').strip()
                        full_name = ''
                        for part in name:
                            full_name = full_name + part + " "
                        name = str(name)
                        vote_result = el.xpath('string(td[2])').split()[0]
                        
                        if vote_result == 'Yea':
                            vote.yes(name)
                        elif vote_result == 'Nay':
                            vote.no(name)
                        else:
                            vote.other(name)
                    bill.add_vote(vote)
Example #20
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        with self.urlopen(vote_url) as html:

            # sometimes the link is broken, will redirect to NO_VOTE_URL
            if html.response.url == NO_VOTE_URL:
                return

            doc = lxml.html.fromstring(html)
            paragraphs = doc.xpath('//h1/following-sibling::p')

            # first paragraph has motion and vote total
            top_par = paragraphs[0].text_content()
            lines = top_par.splitlines()
            # 3rd line is the motion except in cases where first line is gone
            motion = lines[2] or lines[1]
            # last line is "__ YEA and __ Nay"
            yeas, nays = self.yeanay_re.match(lines[-1]).groups()
            yeas = int(yeas)
            nays = int(nays)

            # second paragraph has date
            date = self.date_re.match(paragraphs[1].text_content()).groups()[0]
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            filename = 'vote%s-%s' % (self.sequence.next(), bill_id)
            vote = Vote('lower', date, motion, yeas>nays, yeas, nays, 0,
                        session=session, bill_id=bill_id, bill_chamber=chamber,
                        filename=filename)
            vote.add_source(vote_url)

            # first table has YEAs
            for name in doc.xpath('//table[1]/tr/td/font/text()'):
                vote.yes(name.strip())

            # second table is nays
            for name in doc.xpath('//table[2]/tr/td/font/text()'):
                vote.no(name.strip())

            self.save_vote(vote)
Example #21
0
    def scrape_upper_vote(self, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            text = page.xpath('string(//pre)')

            motion = ""
            motion += re.search(r'Amendment Number:\s([^\s]+)?',
                                text, re.MULTILINE).group(0).strip()
            motion += " " + re.search(r'Reading Number .:\s([^\s]+)?',
                                      text, re.MULTILINE).group(0).strip()
            motion += " " + re.search(r'Floor Actions ..:\s([^\s]+)?',
                                      text, re.MULTILINE).group(0).strip()

            yeas = int(re.search(r'Yeas\s-\s(\d+)', text,
                                 re.MULTILINE).group(1))
            nays = int(re.search(r'Nays\s-\s(\d+)', text,
                                 re.MULTILINE).group(1))
            nv = int(re.search(r'Not\sVoting\s-\s(\d+)', text,
                               re.MULTILINE).group(1))

            date = re.search(r'Date:\s(\d+/\d+/\d+)', text,
                             re.MULTILINE).group(1)
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            passed = yeas > (nays + nv)

            vote = Vote('upper', date, motion, passed, yeas, nays, nv)
            vote.add_source(url)

            pattern = r'_\s%s\s(\w+)'
            for match in re.finditer(pattern % 'Y ', text, re.MULTILINE):
                vote.yes(match.group(1))
            for match in re.finditer(pattern % 'N ', text, re.MULTILINE):
                vote.no(match.group(1))
            for match in re.finditer(pattern % 'EX', text, re.MULTILINE):
                vote.other(match.group(1))

            return vote
Example #22
0
    def parse_roll_call(self, url, chamber, date):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            motion = page.xpath("//div[@class='font8text']")[3].text.strip()

            if motion == 'FP':
                motion = 'FINAL PASSAGE'

            if motion == 'FINAL PASSAGE':
                type = 'passage'
            elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
                type = 'amendment'
            else:
                type = 'other'

            yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
            nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
            lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
            nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
            other = lve + nv

            passed = yeas > (nays + other)

            vote = Vote(chamber, date, motion, passed, yeas, nays, other,
                        type=type)

            for span in page.xpath("//span[text() = 'Y' or text() = 'N'"
                                   "or text() = 'X' or text() = 'E']"):
                name = span.getnext().text.strip()

                if span.text == 'Y':
                    vote.yes(name)
                elif span.text == 'N':
                    vote.no(name)
                else:
                    vote.other(name)

            return vote
Example #23
0
    def get_html_vote_results(self, bill, motion_name, vote_data):
        vote = Vote(bill['chamber'], None, motion_name, False, 0, 0, 0)

        if vote_data.count("No Vote Records Found for this Action.") > 0:
            raise NoVoteDataException()

        passage_indicators = ['Do Pass', 'Do Concur']
        for line in vote_data.splitlines():
            if line in passage_indicators:
                vote['passed'] = True
        
        vote_data = ElementTree(lxml.html.fromstring(vote_data))
        for table in vote_data.findall("//table"):
            left_header = table.findall("tr")[0].findall("th")[0].text.strip()
            if 'YEAS' == left_header:
                count_row = table.findall("tr")[-1]
                vote['yes_count'] = int(count_row.findall("td")[0].text)
                vote['no_count'] = int(count_row.findall("td")[1].text)
                other_count = int(count_row.findall("td")[2].text)
                vote['other_count'] = int(count_row.findall("td")[3].text) + other_count
            elif (('' == left_header) and (4 == len(table.findall("tr")[0].findall("th")))):
                for data in ElementTree(table).findall("//td"):
                    vote_value, name = data.text.replace(u"\xa0", " ").split(" ", 1)
                    vote_value = vote_value.strip()
                    name = name.strip()

                    if name != "":
                        if vote_value == 'Y':
                            vote.yes(name)
                        elif vote_value == 'N':
                            vote.no(name)
                        else:
                            vote.other(name)
            elif (('' == left_header) and (0 == table.findall("tr")[1].findall("td")[0].text.find("DATE:"))):
                date = table.findall("tr")[1].findall("td")[0].text
                date = datetime.strptime(date.replace("DATE:", "").strip(), "%B %d, %Y")
                vote['date'] = date
        return vote
Example #24
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = 'http://www.dccouncil.washington.dc.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s'
        url = base_url % (vote_type_id, bill['bill_id'])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id('VoteDate').text)

            # check if voice vote / approved boxes have an 'x'
            voice = (doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] ==
                     'x')
            passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0]
                      == 'x')

            yes_count = extract_int(doc.xpath(
                '//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(doc.xpath(
                '//span[@id="VoteCount2"]/b/text()')[0])
            other_count = 13 - (yes_count+no_count)   # a bit lazy

            vote = Vote('upper', vote_date, vote_type, passed, yes_count,
                        no_count, other_count, voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath('//u'):
                member = member_u.text
                vote_text = member_u.xpath('../../i/text()')[0]
                if 'YES' in vote_text:
                    vote.yes(member)
                elif 'NO' in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Example #25
0
    def scrape(self, chamber, year):
        session = "%s%d" % (year, int(year) + 1)
        if session not in [s_ for t in metadata['terms']
                           for s_ in t['sessions']]:
            raise NoDataForPeriod(year)

        if chamber == 'upper':
            measure_abbr = 'SB'
            chamber_name = 'SENATE'
            house_type = 'S'
        else:
            measure_abbr = 'AB'
            chamber_name = 'ASSEMBLY'
            house_type = 'A'

        bills = self.session.query(CABill).filter_by(
            session_year=session).filter_by(
            measure_type=measure_abbr)

        for bill in bills:
            bill_session = session
            if bill.session_num != '0':
                bill_session += ' Special Session %s' % bill.session_num

            bill_id = bill.short_bill_id
            version = self.session.query(CABillVersion).filter_by(
                bill=bill).filter(CABillVersion.bill_xml != None).first()
            if not version:
                # not enough data to import
                continue

            fsbill = Bill(bill_session, chamber, bill_id,
                          version.title,
                          short_title=version.short_title)

            for author in version.authors:
                if author.house == chamber_name:
                    fsbill.add_sponsor(author.contribution, author.name)

            for action in bill.actions:
                if not action.action:
                    # NULL action text seems to be an error on CA's part,
                    # unless it has some meaning I'm missing
                    continue
                actor = action.actor or chamber
                actor = actor.strip()
                match = re.match(r'(Assembly|Senate)($| \(Floor)', actor)
                if match:
                    actor = {'Assembly': 'lower',
                             'Senate': 'upper'}[match.group(1)]
                elif actor.startswith('Governor'):
                    actor = 'executive'
                else:
                    actor = re.sub('^Assembly', 'lower', actor)
                    actor = re.sub('^Senate', 'upper', actor)

                type = []

                act_str = action.action
                if act_str.startswith('Introduced'):
                    type.append('bill:introduced')

                if 'To Com' in act_str:
                    type.append('committee:referred')

                if 'Read third time.  Passed.' in act_str:
                    type.append('bill:passed')

                if 'Approved by Governor' in act_str:
                    type.append('bill:signed')

                if 'Item veto' in act_str:
                    type.append('veto:line-item')

                if not type:
                    type = ['other']

                fsbill.add_action(actor, act_str, action.action_date,
                                  type=type)

            for vote in bill.votes:
                if vote.vote_result == '(PASS)':
                    result = True
                else:
                    result = False

                full_loc = vote.location.description
                first_part = full_loc.split(' ')[0].lower()
                if first_part in ['asm', 'assembly']:
                    vote_chamber = 'lower'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                elif first_part.startswith('sen'):
                    vote_chamber = 'upper'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                else:
                    vote_chamber = ''
                    vote_location = full_loc

                fsvote = Vote(vote_chamber,
                              vote.vote_date_time,
                              vote.motion.motion_text or '',
                              result,
                              vote.ayes, vote.noes, vote.abstain,
                              threshold=vote.threshold,
                              location=vote_location)

                for record in vote.votes:
                    if record.vote_code == 'AYE':
                        fsvote.yes(record.legislator_name)
                    elif record.vote_code.startswith('NO'):
                        fsvote.no(record.legislator_name)
                    else:
                        fsvote.other(record.legislator_name)

                fsbill.add_vote(fsvote)

            self.save_bill(fsbill)
Example #26
0
 def scrape_votes(self, vote_page, bill, url): 
     date_match = re.search("[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}", vote_page.text_content())
     date_match = date_match.group(0)
     
     vote_date = dt.datetime.strptime(date_match, '%m/%d/%Y')
     
     votes = {"Yeas":0, "Nays":0, "Absent":0, "Excused":0}
     
     for type, number in votes.items():
         match = re.search(type + ": [0-9]+", vote_page.text_content())
         match = match.group(0)
         match = match.split(" ")
         number = match[1]
         
     passed = votes["Yeas"] > votes["Nays"] 
     
     chamber_match = re.search("(Senate|House) vote", vote_page.text_content())
     chamber_match = chamber_match.group(0)
     chamber_match = chamber_match.split(" ")
     chamber_match = chamber_match[0]
     
     if chamber_match == "Senate":
         chamber = "upper"
         title = "Senator"
     else:
         chamber = "lower"
         title = "Representative"
         
         
     motion_match = vote_page.cssselect('td[align="center"]')
     motion_match = motion_match[2]
     motion = motion_match.text_content()
     
     vote = Vote(chamber, vote_date, motion, passed, votes["Yeas"], votes["Nays"], votes["Absent"] + votes["Excused"])
     vote.add_source(url)   
     
     vote_elements = vote_page.cssselect('span[class="RollCall"]')
     
     vote_types = []
     
     for ve in vote_elements:
         voters = ve.text_content().split(", ")
         
         if len(voters) == 1:
             voters = voters[0].split(" and ")
             
         before, itself, after = voters[0].partition(title)
         voters[0] = after.lstrip("s ")
         voters[-1] = voters[-1].lstrip("and ")
             
         vote_types.append(voters)              
         
     for v in vote_types[0]:
         vote.yes(v)
     
     for v in vote_types[1]:
         vote.no(v)
         
     for v in vote_types[2]:
         vote.other(v)
  
     for v in vote_types[3]:
         vote.other(v)
     
     bill.add_vote(vote)
Example #27
0
    def scrape_votes(self, url, chamb):
        with self.urlopen(url) as doc:
            soup = BeautifulSoup(doc)
            date = None
            motion = None
            yeas = None
            neas = None
            others = None
            passed = None
            chamber = chamb
            necessary = None
            vote = None

            fonts = soup.findAll("font")
            span = soup.findAll("span")
            if (len(fonts) + (len(span))) > 4:  # data is vaguely structured
                if len(fonts) < 4:
                    fonts = span
                for line in fonts:
                    # this could be sped up.
                    line = str(line.contents[0])
                    line = line.strip()
                    if line.find("Taken on") > -1:
                        # then the text is in the form of: "Take on <date> <reason>"
                        split = line.split(None, 3)
                        date = split[2]
                        if len(split) > 3:
                            motion = split[3]
                    elif line.find("Those voting Yea") > -1:
                        yeas = self.get_num_from_line(line)
                    elif line.find("Those voting Nay") > -1:
                        neas = self.get_num_from_line(line)
                    elif line.find("Those absent and not voting") > -1:
                        others = self.get_num_from_line(line)
                    elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1):
                        necessary = self.get_num_from_line(line)
                if yeas >= necessary:
                    passed = True
                else:
                    passed = False
                vote = Vote(chamber, date, motion, passed, yeas, neas, others)

                # figure out who voted for what
                table = soup.findAll("table")
                tds = table[len(table) - 1].findAll("td")  # get the last table

                vote_value = None
                digits = re.compile("^[\d ]+$")
                for cell in tds:
                    string = cell.find("font")
                    if string == None:
                        string = cell.find("span")  # either we are looking at fonts or spans
                    if string != None:
                        string = string.contents[0]
                        string = string.strip()
                    else:
                        string = ""
                    if (len(string) > 0) and (digits.search(string) == None):
                        if vote_value == None:
                            if (string == "Y") or (string == "N"):
                                vote_value = string
                            elif (string == "X") or (string == "A"):
                                vote_value = "X"
                        else:
                            if vote_value == "Y":
                                vote.yes(string)
                            elif vote_value == "N":
                                vote.no(string)
                            else:
                                vote.other(string)
                            vote_value = None

            else:
                # data is mostly unstructured. Have to sift through a string
                data = soup.find("pre")
                lines = data.contents[len(data.contents) - 1]
                lines = lines.strip()
                exp = re.compile(r"\n+|\r+|\f+")
                lines = exp.split(lines)
                names = []
                for i in range(len(lines)):
                    line = lines[i].strip()
                    if line.find("Taken on") > -1:
                        # then the text is in the form of: "Take on <date> <reason>"
                        split = line.split(None, 3)
                        date = split[2]
                        if len(split) > 3:
                            motion = split[3]
                    elif line.find("Those voting Yea") > -1:
                        yeas = self.get_num_from_line(line)
                    elif line.find("Those voting Nay") > -1:
                        neas = self.get_num_from_line(line)
                    elif line.find("Those absent and not voting") > -1:
                        others = self.get_num_from_line(line)
                    elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1):
                        if line.find("Adoption") > -1:
                            motion = "Adoption"
                        else:
                            motion = "Passage"
                        necessary = self.get_num_from_line(line)
                    elif line.find("The following is the roll call vote:") > -1:
                        break  # the next lines contain actual votes
                # process the vote values
                if yeas >= necessary:
                    passed = True
                else:
                    passed = False
                vote = Vote(chamber, date, motion, passed, yeas, neas, others)
                lines = lines[i + 1 :]
                lines = string.join(lines, "  ")
                lines = lines.split("  ")
                absent_vote_value = re.compile("^(X|A)$")
                yea_vote_value = re.compile("^Y$")
                nea_vote_value = re.compile("^N$")
                # there aren't two spaces between vote and name so it doesn't get parsed
                annoying_vote = re.compile("^(Y|X|A|N) ([\S ]+)$")
                digits = re.compile("^[\d ]+$")
                vote_value = None
                for word in lines:
                    word = word.strip()
                    if (len(word) > 0) and (digits.search(word) == None):
                        word = strip_digits(word)
                        if vote_value != None:
                            if vote_value == "Y":
                                vote.yes(word)
                            elif vote_value == "N":
                                vote.no(word)
                            else:
                                vote.other(word)
                            vote_value = None
                        elif absent_vote_value.match(word) != None:
                            vote_value = "X"
                        elif yea_vote_value.match(word) != None:
                            vote_value = "Y"
                        elif nea_vote_value.match(word) != None:
                            vote_value = "N"
                        elif annoying_vote.match(word) != None:
                            split = annoying_vote.match(word)
                            vote_value = split.group(2)
                            name = split.group(1)
                            if vote_value == "Y":
                                vote.yes(name)
                            elif vote_value == "N":
                                vote.no(name)
                            else:
                                vote.other(name)
                            vote_value = None
            return vote
Example #28
0
 def scrape_votes(self, chamber, url, bill, date, **kwargs):
     """
     Scrapes the votes from a vote detail page with the legislator's names
     this handles all of the votes and expects the following keyword
     arguments: motion ... hmm I guess thats it :)
     """
     o_args = {}
     passed = '' # to test if we need to compare vote counts later
     v_type = kwargs.pop('type')
     if 'passed' in kwargs:
         passed = {'PASSED': True, 'FAILED': False}[kwargs.pop('passed')]
     if 'AMEND' in kwargs:
         o_args['amended'] = kwargs.pop('AMEND').text_content().strip()
     if 'motion' in kwargs:
         motion = kwargs.pop('motion')
     if 'EMER' in kwargs and kwargs['EMER'].text_content().strip():
         o_args['EMER'] = kwargs.pop('EMER').text_content().strip()
     if '2/3 VOTE' in kwargs and kwargs['2/3 VOTE'].text_content().strip():
         o_args['2/3 VOTE'] = kwargs.pop('2/3 VOTE').text_content().strip()
     if 'committee' in kwargs:
         o_args['committee'] = utils.get_committee_name(kwargs.pop('committee'),
                                                         chamber)
     
     with self.urlopen(url) as vote_page:
         root = html.fromstring(vote_page)
         vote_table = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')[0]
         vote_count = vote_table.xpath('following-sibling::p/following-sibling::text()')
         vote_string = vote_count[0].replace(u'\xa0', '').strip()
         v_count = re.compile(r'\b[A-Z]*\s*[A-z]*:\s\d*')
         v_list = v_count.findall(vote_string)
         o_count = 0
         for x in v_list:
             k, v = x.split(':')
             # make NOT VOTING not_voting
             k = k.strip().replace(' ', '_').lower()
             v = int(v.strip())
             if k == 'ayes':
                 yes_count = int(v)
             elif k == 'nays':
                 no_count = int(v)
             else:
                 o_args.update({str(k):v})
                 o_count += int(v)
         if passed == '':
             passed = yes_count > no_count
             if 'committee' not in o_args:
                 if chamber == 'upper' and passed:
                     if 'EMER' in o_args or '2/3 VOTE' in o_args:
                         passed = yes_count > 20
                     else:
                         passed = yes_count > 16
                 elif chamber == 'lower' and passed:
                     if 'EMER' in o_args or '2/3 VOTE' in o_args:
                         passed = yes_count > 40
                     else:
                         passed = yes_count > 31
                         
         vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                     o_count, type=v_type, **o_args)
         vote.add_source(url)
         # grab all the tables descendant tds
         tds = vote_table.xpath('descendant::td')
         # pair 'em up
         matched = [ tds[y:y+2] for y in range(0, len(tds), 2) ]
         for name, v in iter(matched):
             v = v.text_content().strip()
             name = name.text_content().strip()
             if name == 'Member Name':
                 continue
             if v == 'Y':
                 vote.yes(name)
             elif v == 'N':
                 vote.no(name)
             else:
                 vote.other(name)
         bill.add_vote(vote)
Example #29
0
    def scrape_bill_type(self, chamber, session, bill_type, type_abbr):
        if chamber == 'upper':
            chamber_name = 'SENATE'
        else:
            chamber_name = 'ASSEMBLY'


        bills = self.session.query(CABill).filter_by(
            session_year=session).filter_by(
            measure_type=type_abbr)


        for bill in bills:
            bill_session = session
            if bill.session_num != '0':
                bill_session += ' Special Session %s' % bill.session_num

            bill_id = bill.short_bill_id

            fsbill = Bill(bill_session, chamber, bill_id, '')

            title = ''
            short_title = ''
            type = ['bill']
            subject = ''
            for version in self.session.query(CABillVersion).filter_by(
                bill=bill).filter(CABillVersion.bill_xml != None):

                title = version.title
                short_title = version.short_title
                type = [bill_type]

                if version.appropriation == 'Yes':
                    type.append('appropriation')
                if version.fiscal_committee == 'Yes':
                    type.append('fiscal committee')
                if version.local_program == 'Yes':
                    type.append('local program')
                if version.urgency == 'Yes':
                    type.append('urgency')
                if version.taxlevy == 'Yes':
                    type.append('tax levy')

                subject = version.subject

                fsbill.add_version(version.bill_version_id, '',
                                   date=version.bill_version_action_date,
                                   title=version.title,
                                   short_title=version.short_title,
                                   subject=[subject],
                                   type=type)

            if not title:
                self.warning("Couldn't find title for %s, skipping" % bill_id)
                continue

            fsbill['title'] = title
            fsbill['short_title'] = short_title
            fsbill['type'] = type
            fsbill['subjects'] = [subject]

            for author in version.authors:
                if author.house == chamber_name:
                    fsbill.add_sponsor(author.contribution, author.name)

            for action in bill.actions:
                if not action.action:
                    # NULL action text seems to be an error on CA's part,
                    # unless it has some meaning I'm missing
                    continue
                actor = action.actor or chamber
                actor = actor.strip()
                match = re.match(r'(Assembly|Senate)($| \(Floor)', actor)
                if match:
                    actor = {'Assembly': 'lower',
                             'Senate': 'upper'}[match.group(1)]
                elif actor.startswith('Governor'):
                    actor = 'executive'
                else:
                    actor = re.sub('^Assembly', 'lower', actor)
                    actor = re.sub('^Senate', 'upper', actor)

                type = []

                act_str = action.action
                if act_str.startswith('Introduced'):
                    type.append('bill:introduced')

                if 'To Com' in act_str:
                    type.append('committee:referred')

                if 'Read third time.  Passed.' in act_str:
                    type.append('bill:passed')

                if 'Approved by Governor' in act_str:
                    type.append('governor:signed')

                if 'Item veto' in act_str:
                    type.append('governor:vetoed:line-item')

                if not type:
                    type = ['other']

                fsbill.add_action(actor, act_str, action.action_date,
                                  type=type)

            for vote in bill.votes:
                if vote.vote_result == '(PASS)':
                    result = True
                else:
                    result = False

                full_loc = vote.location.description
                first_part = full_loc.split(' ')[0].lower()
                if first_part in ['asm', 'assembly']:
                    vote_chamber = 'lower'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                elif first_part.startswith('sen'):
                    vote_chamber = 'upper'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                else:
                    raise ScrapeError("Bad location: %s" % full_loc)

                motion = vote.motion.motion_text or ''

                if "Third Reading" in motion or "3rd Reading" in motion:
                    vtype = 'passage'
                elif "Do Pass" in motion:
                    vtype = 'passage'
                else:
                    vtype = 'other'

                motion = motion.strip()

                # Why did it take until 2.7 to get a flags argument on re.sub?
                motion = re.compile(r'(\w+)( Extraordinary)? Session$',
                                    re.IGNORECASE).sub('', motion)
                motion = re.compile(r'^(Senate|Assembly) ',
                                    re.IGNORECASE).sub('', motion)
                motion = re.sub(r'^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.?  ',
                                '', motion)
                motion = re.sub(r' \(\w+\)$', '', motion)
                motion = re.sub(r'(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$',
                                '', motion)
                motion = re.sub(r'(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? '
                                r'Urgency Clause$',
                                '(Urgency Clause)', motion)
                motion = re.sub(r'\s+', ' ', motion)

                if not motion:
                    self.warning("Got blank motion on vote for %s" % bill_id)
                    continue

                fsvote = Vote(vote_chamber,
                              vote.vote_date_time,
                              motion,
                              result,
                              int(vote.ayes),
                              int(vote.noes),
                              int(vote.abstain),
                              threshold=vote.threshold,
                              type=vtype)

                if vote_location != 'Floor':
                    fsvote['committee'] = vote_location

                for record in vote.votes:
                    if record.vote_code == 'AYE':
                        fsvote.yes(record.legislator_name)
                    elif record.vote_code.startswith('NO'):
                        fsvote.no(record.legislator_name)
                    else:
                        fsvote.other(record.legislator_name)

                fsbill.add_vote(fsvote)

            self.save_bill(fsbill)
Example #30
0
    def scrape_bill_type(self, chamber, session, bill_type, type_abbr):
        if chamber == "upper":
            chamber_name = "SENATE"
        else:
            chamber_name = "ASSEMBLY"

        bills = self.session.query(CABill).filter_by(session_year=session).filter_by(measure_type=type_abbr)

        for bill in bills:
            bill_session = session
            if bill.session_num != "0":
                bill_session += " Special Session %s" % bill.session_num

            bill_id = bill.short_bill_id

            fsbill = Bill(bill_session, chamber, bill_id, "")

            # Construct session for web query, going from '20092010' to '0910'
            source_session = session[2:4] + session[6:8]

            # Turn 'AB 10' into 'ab_10'
            source_num = "%s_%s" % (bill.measure_type.lower(), bill.measure_num)

            # Construct a fake source url
            source_url = "http://www.leginfo.ca.gov/cgi-bin/postquery?" "bill_number=%s&sess=%s" % (
                source_num,
                source_session,
            )

            fsbill.add_source(source_url)

            title = ""
            short_title = ""
            type = ["bill"]
            subject = ""
            for version in (
                self.session.query(CABillVersion).filter_by(bill=bill).filter(CABillVersion.bill_xml != None)
            ):

                title = version.title
                short_title = version.short_title
                type = [bill_type]

                if version.appropriation == "Yes":
                    type.append("appropriation")
                if version.fiscal_committee == "Yes":
                    type.append("fiscal committee")
                if version.local_program == "Yes":
                    type.append("local program")
                if version.urgency == "Yes":
                    type.append("urgency")
                if version.taxlevy == "Yes":
                    type.append("tax levy")

                subject = version.subject

                fsbill.add_version(
                    version.bill_version_id,
                    "",
                    date=version.bill_version_action_date.date(),
                    title=version.title,
                    short_title=version.short_title,
                    subject=[subject],
                    type=type,
                )

            if not title:
                self.warning("Couldn't find title for %s, skipping" % bill_id)
                continue

            fsbill["title"] = title
            fsbill["short_title"] = short_title
            fsbill["type"] = type
            fsbill["subjects"] = [subject]

            for author in version.authors:
                if author.house == chamber_name:
                    fsbill.add_sponsor(author.contribution, author.name)

            for action in bill.actions:
                if not action.action:
                    # NULL action text seems to be an error on CA's part,
                    # unless it has some meaning I'm missing
                    continue
                actor = action.actor or chamber
                actor = actor.strip()
                match = re.match(r"(Assembly|Senate)($| \(Floor)", actor)
                if match:
                    actor = {"Assembly": "lower", "Senate": "upper"}[match.group(1)]
                elif actor.startswith("Governor"):
                    actor = "executive"
                else:
                    actor = re.sub("^Assembly", "lower", actor)
                    actor = re.sub("^Senate", "upper", actor)

                type = []

                act_str = action.action
                if act_str.startswith("Introduced"):
                    type.append("bill:introduced")

                if "To Com" in act_str:
                    type.append("committee:referred")

                if "Read third time.  Passed." in act_str:
                    type.append("bill:passed")

                if "Approved by Governor" in act_str:
                    type.append("governor:signed")

                if "Item veto" in act_str:
                    type.append("governor:vetoed:line-item")

                if not type:
                    type = ["other"]

                fsbill.add_action(actor, act_str, action.action_date.date(), type=type)

            for vote in bill.votes:
                if vote.vote_result == "(PASS)":
                    result = True
                else:
                    result = False

                full_loc = vote.location.description
                first_part = full_loc.split(" ")[0].lower()
                if first_part in ["asm", "assembly"]:
                    vote_chamber = "lower"
                    vote_location = " ".join(full_loc.split(" ")[1:])
                elif first_part.startswith("sen"):
                    vote_chamber = "upper"
                    vote_location = " ".join(full_loc.split(" ")[1:])
                else:
                    raise ScrapeError("Bad location: %s" % full_loc)

                motion = vote.motion.motion_text or ""

                if "Third Reading" in motion or "3rd Reading" in motion:
                    vtype = "passage"
                elif "Do Pass" in motion:
                    vtype = "passage"
                else:
                    vtype = "other"

                motion = motion.strip()

                # Why did it take until 2.7 to get a flags argument on re.sub?
                motion = re.compile(r"(\w+)( Extraordinary)? Session$", re.IGNORECASE).sub("", motion)
                motion = re.compile(r"^(Senate|Assembly) ", re.IGNORECASE).sub("", motion)
                motion = re.sub(r"^(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.?  ", "", motion)
                motion = re.sub(r" \(\w+\)$", "", motion)
                motion = re.sub(r"(SCR|SB|AB|AJR|ACR)\s?\d+ \w+\.?$", "", motion)
                motion = re.sub(r"(SCR|SJR|SB|AB|AJR|ACR)\s?\d+ \w+\.? " r"Urgency Clause$", "(Urgency Clause)", motion)
                motion = re.sub(r"\s+", " ", motion)

                if not motion:
                    self.warning("Got blank motion on vote for %s" % bill_id)
                    continue

                fsvote = Vote(
                    vote_chamber,
                    self._tz.localize(vote.vote_date_time),
                    motion,
                    result,
                    int(vote.ayes),
                    int(vote.noes),
                    int(vote.abstain),
                    threshold=vote.threshold,
                    type=vtype,
                )

                if vote_location != "Floor":
                    fsvote["committee"] = vote_location

                for record in vote.votes:
                    if record.vote_code == "AYE":
                        fsvote.yes(record.legislator_name)
                    elif record.vote_code.startswith("NO"):
                        fsvote.no(record.legislator_name)
                    else:
                        fsvote.other(record.legislator_name)

                fsbill.add_vote(fsvote)

            self.save_bill(fsbill)