Example #1
0
    def parse_vote(self, bill, action, act_chamber, act_date, url):
        url = "http://www.legis.state.ak.us/basis/%s" % url
        info_page = self.soup_parser(self.urlopen(url))

        tally = re.findall("Y(\d+) N(\d+)\s*(?:\w(\d+))*\s*(?:\w(\d+))*\s*(?:\w(\d+))*", action)[0]
        yes, no, o1, o2, o3 = map(lambda x: 0 if x == "" else int(x), tally)
        yes, no, other = int(yes), int(no), (int(o1) + int(o2) + int(o3))

        votes = info_page.findAll("pre", text=re.compile("Yeas"), limit=1)[0].split("\n\n")

        motion = info_page.findAll(text=re.compile("The question being"))[0]
        motion = re.findall('The question being:\s*"(.*)\?"', motion, re.DOTALL)[0].replace("\n", " ")

        vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other)

        for vote_list in votes:
            vote_type = False
            if vote_list.startswith("Yeas: "):
                vote_list, vote_type = vote_list[6:], vote.yes
            elif vote_list.startswith("Nays: "):
                vote_list, vote_type = vote_list[6:], vote.no
            elif vote_list.startswith("Excused: "):
                vote_list, vote_type = vote_list[9:], vote.other
            elif vote_list.startswith("Absent: "):
                vote_list, vote_type = vote_list[9:], vote.other
            if vote_type:
                for name in vote_list.split(","):
                    vote_type(name.strip())

        vote.add_source(url)
        return vote
Example #2
0
    def parse_vote_new(self, bill, chamber, url):
        vote_page = BeautifulSoup(self.urlopen(url))
        table = vote_page.table
        info_row = table.findAll('tr')[1]

        date = info_row.td.contents[0]
        date = dt.datetime.strptime(date, '%m/%d/%Y')
        motion = info_row.findAll('td')[1].contents[0]
        yes_count = int(info_row.findAll('td')[2].contents[0])
        no_count = int(info_row.findAll('td')[3].contents[0])
        abs_count = int(info_row.findAll('td')[4].contents[0])
        passed = info_row.findAll('td')[5].contents[0] == 'Pass'

        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, abs_count)
        vote.add_source(url)

        for tr in table.findAll('tr')[3:]:
            if len(tr.findAll('td')) != 2:
                continue

            name = tr.td.contents[0].split(' of')[0]
            type = tr.findAll('td')[1].contents[0]
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            else:
                vote.other(name)

        bill.add_vote(vote)
Example #3
0
 def apply_votes(self, bill):
     """Given a bill (and assuming it has a status_url in its dict), parse all of the votes
     """
     bill_votes = votes.all_votes_for_url(self, bill['status_url'])
     for (chamber,vote_desc,pdf_url,these_votes) in bill_votes:
         try:
             date = vote_desc.split("-")[-1]
         except IndexError:
             self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc))
             continue
         yes_votes = []
         no_votes = []
         other_votes = []
         for voter,vote in these_votes.iteritems():
             if vote == 'Y': 
                 yes_votes.append(voter)
             elif vote == 'N': 
                 no_votes.append(voter)
             else:
                 other_votes.append(voter)
         passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf
         vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url)
         for voter in yes_votes:
             vote.yes(voter)
         for voter in no_votes:
             vote.no(voter)
         for voter in other_votes:
             vote.other(voter)
         bill.add_vote(vote)
Example #4
0
def votes(root):
    for el in root.xpath(u'//p[starts-with(., "Yeas \u2014")]'):
        text = ''.join(el.getprevious().itertext())
        m = re.search(r'(\w+ \d+) was adopted by \(Record (\d+)\): '
                      '(\d+) Yeas, (\d+) Nays, (\d+) Present', text)
        if m:
            yes_count = int(m.group(3))
            no_count = int(m.group(4))
            other_count = int(m.group(5))

            vote = Vote(None, None, 'final passage', True,
                        yes_count, no_count, other_count)
            vote['bill_id'] = m.group(1)
            vote['session'] = '81'
            vote['record'] = m.group(2)
            vote['filename'] = m.group(2)

            for name in names(el):
                vote.yes(name)

            el = el.getnext()
            if el.text and el.text.startswith('Nays'):
                for name in names(el):
                    vote.no(name)
                el = el.getnext()

            while el.text and re.match(r'Present|Absent', el.text):
                for name in names(el):
                    vote.other(name)
                el = el.getnext()

            vote['other_count'] = len(vote['other_votes'])
            yield vote
        else:
            pass
Example #5
0
    def scrape_old_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.h3.contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        def get_count(cell):
            if len(cell.contents) == 0:
                return 0
            else:
                return int(cell.contents[0])

        results_tbl = vote_page.findAll('table')[1]
        yes_count = get_count(results_tbl.findAll('td')[1])
        no_count = get_count(results_tbl.findAll('td')[3])
        excused_count = get_count(results_tbl.findAll('td')[5])
        absent_count = get_count(results_tbl.findAll('td')[7])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.table
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #6
0
    def scrape_new_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        yes_count = int(vote_page.find(
            id="ctl00_contentMain_tdAyes").contents[0])
        no_count = int(vote_page.find(
            id="ctl00_contentMain_tdNays").contents[0])
        excused_count = int(vote_page.find(
            id="ctl00_contentMain_tdExcused").contents[0])
        absent_count = int(vote_page.find(
            id="ctl00_contentMain_tdAbsent").contents[0])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes")
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
Example #7
0
    def scrape_bills(self, chamber, year):
        if year != "2009":
            raise NoDataForYear

        if chamber == "upper":
            other_chamber = "lower"
            bill_id = "SB 1"
        else:
            other_chamber = "upper"
            bill_id = "HB 1"

        b1 = Bill("2009-2010", chamber, bill_id, "A super bill")
        b1.add_source("http://example.com")
        b1.add_version("As Introduced", "http://example.com/SB1.html")
        b1.add_document("Google", "http://google.com")
        b1.add_sponsor("primary", "Bob Smith")
        b1.add_sponsor("secondary", "Johnson, Sally")

        d1 = datetime.datetime.strptime("1/29/2010", "%m/%d/%Y")
        v1 = Vote("upper", d1, "Final passage", True, 2, 0, 0)
        v1.yes("Bob Smith")
        v1.yes("Sally Johnson")

        d2 = datetime.datetime.strptime("1/30/2010", "%m/%d/%Y")
        v2 = Vote("lower", d2, "Final passage", False, 0, 1, 1)
        v2.no("B. Smith")
        v2.other("Sally Johnson")

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, "introduced", d1)
        b1.add_action(chamber, "read first time", d1)
        b1.add_action(other_chamber, "introduced", d2)

        self.save_bill(b1)
Example #8
0
def record_votes(root):
    for el in root.xpath(u'//p[starts-with(., "Yeas \u2014")]'):
        text = ''.join(el.getprevious().itertext())
        text.replace('\n', ' ')
        m = re.search(r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+'
                      '(?P<type>adopted|passed'
                      '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+'
                      'by\W+\(Record\W+(?P<record>\d+)\):\W+'
                      '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+'
                      '(?P<present>\d+)\W+Present', text)
        if m:
            yes_count = int(m.group('yeas'))
            no_count = int(m.group('nays'))
            other_count = int(m.group('present'))

            bill_id = m.group('bill_id')
            if bill_id.startswith('H') or bill_id.startswith('CSHB'):
                bill_chamber = 'lower'
            elif bill_id.startswith('S') or bill_id.startswith('CSSB'):
                bill_chamber = 'upper'
            else:
                continue

            type = get_type(m)
            vote = Vote(None, None, type, True,
                        yes_count, no_count, other_count)
            vote['bill_id'] = bill_id
            vote['bill_chamber'] = bill_chamber
            vote['session'] = '81'
            vote['method'] = 'record'
            vote['record'] = m.group('record')
            vote['filename'] = m.group('record')

            for name in names(el):
                vote.yes(name)

            el = el.getnext()
            if el.text and el.text.startswith('Nays'):
                for name in names(el):
                    vote.no(name)
                el = el.getnext()

            while el.text and re.match(r'Present|Absent', el.text):
                for name in names(el):
                    vote.other(name)
                el = el.getnext()

            vote['other_count'] = len(vote['other_votes'])
            yield vote
        else:
            pass
Example #9
0
 def parse_votes(self, url, page, chamberName, bill):
     # Votes
     for a in page.cssselect("#votes a"):
         vote_url = urlparse.urljoin(url, a.get("href"))
         vote_page = parse(vote_url).getroot()
         date = vote_page.cssselect("#date")[0].text
         yeses = int(vote_page.cssselect("#yea")[0].text)
         noes = int(vote_page.cssselect("#nay")[0].text)
         other = sum(map(lambda s: int(s.text), vote_page.cssselect("#not-voting")))
         vote_obj = Vote(chamberName, date, "", yeses > noes, yeses, noes, other)
         for vote in vote_page.cssselect("ul.roll-call li"):
             rep = vote.text_content().strip()
             val = vote[0].text
             if val == "Y":
                 vote_obj.yes(rep)
             elif val == "N":
                 vote_obj.no(rep)
             else:
                 vote_obj.other(rep)
         bill.add_vote(vote_obj)
Example #10
0
 def parse_vote(self, bill, actor, date, text, line):
     url = "http://leg1.state.va.us%s" % line['href']
     abbr = {'S': 'upper', 'H': 'lower'}
     with self.soup_context(url) as vote_data:
         house = abbr[re.findall('\d+/\d+\/\d+\s+([\bHouse\b|\bSenate\b])', self.unescape(unicode(vote_data)))[0]]
         vote = Vote(house, date, text, None, 0, 0, 0)
         for cast in vote_data.findAll('p'):
             if cast.string is None:
                 continue
             cleaned = cast.string.replace('\r\n', ' ')
             split_start = cleaned.find('--')
             voted = cleaned[0:split_start].strip()
             split_end = cleaned.find('--', split_start + 2)
             if split_end == -1:
                 continue
             names = []
             maybe_names = cleaned[split_start+2:split_end].split(", ")
             t_name = ''
             #not sure how to skip iterations, so.
             for i in range(len(maybe_names)):
                if re.match('\w\.\w\.', maybe_names[i]):
                    names.append(t_name + ', ' + maybe_names[i])
                    t_name = ''
                else:
                    if t_name != '':
                        names.append(t_name)
                    t_name = maybe_names[i]
             for voter in names:
                 sanitized = voter.replace('.', '').lower()
                 if voted=='YEAS':
                     vote.yes(voter)
                 elif voted=='NAYS':
                     vote.no(voter)
                 else:
                     vote.other(voter.strip())
         vote['other_count'] = len(vote['other_votes'])			
         vote['yes_count'] = len(vote['yes_votes'])
         vote['no_count'] = len(vote['no_votes'])
         vote['passed'] = (vote['yes_count'] > vote['no_count'])
         bill.add_vote(vote)
Example #11
0
    def parse_vote(self, vote_url, chamberName):
        page = parse(vote_url).getroot()
        summary_table = filter(lambda tab: tab.text_content().find("Yeas") == 0, page.cssselect("table"))[0]
        vote_table = ancestor_table(summary_table)
        ind = table_index(vote_table, summary_table)
        vote_tally_table = vote_table.cssselect("table")[ind + 1]
        date_table = vote_table.cssselect("table")[ind - 1]
        counts_line = summary_table.text_content().strip().split()
        yeses = int(counts_line[2].strip("0") or 0)
        noes = int(counts_line[5].strip("0") or 0)
        other = int(counts_line[8].strip("0") or 0) + int(counts_line[11].strip("0") or 0)

        tally_counts = filter(lambda p: p != "", map(str.strip, vote_tally_table.text_content().strip().split("\n")))

        if len(tally_counts[0]) > 1:
            tally_counts = map(lambda t: (t[:1], t[1:]), tally_counts)
        else:
            tc = []
            for i in xrange(0, len(tally_counts), 2):
                tc.append((tally_counts[i], tally_counts[i + 1]))
            tally_counts = tc

        date_line = date_table.text_content().strip().split("\n")
        date = " ".join(date_line[0:2])
        motion = date_line[-1]
        vote = Vote(chamberName, date, motion, yeses > noes, yeses, noes, other)
        print "tally_counts: %s" % tally_counts
        for tc in tally_counts:
            val = tc[0]
            rep = tc[1]
            if val == "Y":
                vote.yes(rep)
            elif val == "N":
                vote.no(rep)
            else:
                vote.other(rep)
        return vote
Example #12
0
    def scrape_bill(self, chamber, current_bill, session):
        other_chamber = 'upper' if chamber == 'lower' else 'lower'
        with self.soup_context("http://alisondb.legislature.state.al.us/acas/SESSBillsStatusResultsMac.asp?BillNumber=%s&GetStatus=Get+Status&session=%s" % (current_bill, session[0])) as bill:
             if "Your ACAS Session has expired." in str(bill):
                 raise Exception("Expired cookie - you'll have to run with -n to skip caching")
             try:
                 bill_id = int(re.findall(r'BTN([0-9]+)', str(bill))[0])
             except:
                 raise Exception("No bill found. Hopefully that means it's the end of the session") 
             title = bill.find("td", {'colspan': '7'}).string
             self.log("Starting parse of %s" % current_bill)
             #create our bill!
             bill = Bill(session[1], chamber, current_bill, title.strip())

             #add sponsors and co-sponsors
             with self.soup_context("http://alisondb.legislature.state.al.us/acas/ACTIONSponsorsResultsMac.asp?OID=%d" % bill_id) as sponsors:
                 # This pains me.
                 (primary,secondary) = sponsors.findAll("table", text="Co-Sponsors")[0].parent.parent.parent.findAll('table')
                 for p in primary.findAll('td'):
                     bill.add_sponsor('primary', p.string)
                 for s in secondary.findAll('td'):
                     bill.add_sponsor('cosponsor', s.string)
             with self.soup_context("http://alisondb.legislature.state.al.us/acas/ACTIONHistoryResultsMac.asp?OID=%d" % bill_id) as history:
                  actions = history.findAll('table', text="Committee")[0].parent.parent.parent.findAll('tr')
                  #Date Amend/Subst Matter Committee Nay Yea Abs Vote
                  for event in actions:
                       e = event.findAll('td')
                       if len(e) == 0:
                           continue
                       date = e[0].string
                       amend = e[1].find('input')
                       matter = e[2].string
                       y_votes = e[5].string
                       n_votes = e[4].string
                       a_votes = e[6].string

                       if not matter:
                           continue

                       roll = e[7].find('input')
                       #(date, amend, matter, committee, nays, yeas, abs, vote_thing) = map(lambda x: x.string, e)
                       if date != None:
                           act_date = dt.datetime.strptime(date, '%m/%d/%Y')
                       if amend != None:
                           splitter = re.findall(r'documentSelected\(\'(\w*)\',\'([\w\d-]*)\',\'([\w\.\-]*)\',\'([\w\d/]*)\',\'([\w\d]*)\',\'([\w\s]*)\'', str(amend))[0]
                           amend = "http://alisondb.legislature.state.al.us/acas/%s/%s" % (splitter[3], splitter[2])
                           bill.add_document(matter, amend)

                       if roll != None: 
                          splitter = re.findall(r'voteSelected\(\'(\d*)\',\'(\d*)\',\'(\d*)\',\'(.*)\',\'(\d*)\'',str(roll))[0]
                          roll = "http://alisondb.legislature.state.al.us/acas/GetRollCallVoteResults.asp?MOID=%s&VOTE=%s&BODY=%s&SESS=%s" % (splitter[0], splitter[1], splitter[2], splitter[4])
                          with self.soup_context(roll) as votes:
                              vote_rows = votes.findAll('table', text='Member')[0].parent.parent.parent.findAll('tr')
                              
                              yea_votes = int(votes.findAll('tr', text='Total Yea:')[0].parent.parent.findAll('td')[2].string)
                              nay_votes = int(votes.findAll('tr', text='Total Nay:')[0].parent.parent.findAll('td')[2].string)
                              abs_votes = int(votes.findAll('tr', text='Total Abs:')[0].parent.parent.findAll('td')[2].string)
                              p_votes   = len(votes.findAll('tr', text='P'))
                              
                              #chamber, date, motion, passed, yes_count, no_count, other_count
                              vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes + p_votes)
                              
                              vote.add_source(roll)
                              for row in vote_rows:
                                  skip = str(row)
                                  if "Total Yea" in skip or "Total Nay" in skip or "Total Abs" in skip:
                                      continue
                                  html_layouts_are_awesome = row.findAll('td')
                                  if len(html_layouts_are_awesome) == 0:
                                      continue
	
                                  (name, t) = html_layouts_are_awesome[0].string, html_layouts_are_awesome[2].string
                                  self.dumb_vote(vote, name, t)
                                  
                                  if len(html_layouts_are_awesome) > 3:
                                      (name, t) = html_layouts_are_awesome[4].string, html_layouts_are_awesome[6].string
                                      self.dumb_vote(vote, name, t)
                              bill.add_vote(vote)

                       if y_votes != None:
                           yea_votes = self.dumber_vote(y_votes)
                           nay_votes = self.dumber_vote(n_votes)
                           abs_votes = self.dumber_vote(a_votes)
                           vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes)
                           bill.add_vote(vote)
                       
                       bill.add_action(chamber, matter, act_date)
             self.add_bill(bill)
Example #13
0
    def parse_status(self, bill, url):
        chamber = bill['chamber']
        session = bill['session']
        bill_id = bill['bill_id']
        status = self.soup_parser(self.urlopen(url))
        bill.add_source(url)
        act_table = status.table

        # Get actions
        for row in act_table.findAll('tr')[1:]:
            act_date = row.td.find(text=True)
            act_date = dt.datetime.strptime(act_date, "%m/%d/%Y")
            action = row.findAll('td')[1].find(text=True)

            # If not specified, assume action occurred
            # in originating house
            actor = chamber

            split_action = action.split('/')
            if len(split_action) > 1:
                actor = split_action[0]

                if actor == 'House':
                    actor = 'lower'
                elif actor == 'Senate':
                    actor = 'upper'
                elif actor == 'LFA':
                    actor = 'Office of the Legislative Fiscal Analyst'

                action = '/'.join(split_action[1:]).strip()

            if action == 'Governor Signed':
                actor = 'Governor'

            bill.add_action(actor, action, act_date)

            # Check if this action is a vote
            links = row.findAll('a')
            if len(links) > 1:
                vote_url = links[-1]['href']

                # Committee votes are of a different format that
                # we don't handle yet
                if not vote_url.endswith('txt'):
                    continue

                vote_url = '/'.join(url.split('/')[:-1]) + '/' + vote_url
                vote_page = self.urlopen(vote_url)

                vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                                    '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                                     '(\d+)(.*)',
                                    re.MULTILINE | re.DOTALL)
                match = vote_re.search(vote_page)
                yes_count = match.group(1)
                no_count = match.group(3)
                other_count = match.group(6)

                if int(yes_count) > int(no_count):
                    passed = True
                else:
                    passed = False

                if actor == 'upper' or actor == 'lower':
                    vote_chamber = actor
                    vote_location = ''
                else:
                    vote_chamber = ''
                    vote_location = actor

                vote = Vote(vote_chamber, act_date,
                            action, passed, yes_count, no_count,
                            other_count,
                            location=vote_location)
                vote.add_source(vote_url)

                yes_votes = re.split('\s{2,}', match.group(2).strip())
                no_votes = re.split('\s{2,}', match.group(4).strip())
                other_votes = re.split('\s{2,}', match.group(7).strip())

                map(vote.yes, yes_votes)
                map(vote.no, no_votes)
                map(vote.other, other_votes)

                bill.add_vote(vote)
Example #14
0
    def get_vote(self, bill, url):
        url = "http://www.ncga.state.nc.us" + url + "&bPrintable=true"
        chamber = {"H": "lower", "S": "upper"}[re.findall("sChamber=(\w)", url)[0]]

        data = self.urlopen(url)
        soup = self.soup_parser(data)

        motion = (
            soup.findAll("a", href=re.compile("BillLookUp\.pl"))[0]
            .findParents("tr", limit=1)[0]
            .findAll("td")[1]
            .font.contents[-1]
        )

        vote_time = soup.findAll("b", text="Time:")[0].next.strip()
        vote_time = dt.datetime.strptime(vote_time, "%b %d %Y  %I:%M%p")

        vote_mess = soup.findAll("td", text=re.compile("Total Votes:"))[0]
        (yeas, noes, nots, absent, excused) = map(
            lambda x: int(x),
            re.findall(
                "Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. " "Absent: (\d+)\s+Exc. Vote: (\d+)", vote_mess, re.U
            )[0],
        )

        # chamber, date, motion, passed, yes_count, no_count, other_count
        v = Vote(chamber, vote_time, motion, (yeas > noes), yeas, noes, nots + absent + excused)

        # eh, it's easier to just get table[2] for this..
        vote_table = soup.findAll("table")[2]

        for row in vote_table.findAll("tr"):
            if "Democrat" in self.flatten(row):
                continue

            cells = row.findAll("td")
            if len(cells) == 1:
                # I can't find any examples of ties in the House,
                # nor information on who would break them.
                if not self.lt_gov and chamber == "upper":
                    full_name = (
                        soup.findAll("td", text=re.compile("Lieutenant Governor"))[0]
                        .parent.findAll("span")[0]
                        .contents[0]
                    )
                    (first_name, last_name, middle_name, suffix) = split_name(full_name)

                    self.lt_gov = Person(
                        full_name, first_name=first_name, last_name=last_name, middle_name=middle_name, suffix=suffix
                    )

                    self.lt_gov.add_role("Lieutenant Governor", bill["session"])

                    self.add_legislator(self.lt_gov)

                if "VOTES YES" in self.flatten(cells[0]):
                    v["passed"] = True
                    v.yes(full_name)
                else:
                    v["passed"] = False
                    v.no(full_name)
                continue
            elif len(cells) == 2:
                vote_type, a = cells
                bunch = [self.flatten(a)]
            elif len(cells) == 3:
                vote_type, d, r = cells
                bunch = [self.flatten(d), self.flatten(r)]
            else:
                continue

            # why doesn't .string work? ... bleh.
            vote_type = vote_type.font.b.contents[0]

            if "Ayes" in vote_type:
                adder = v.yes
            elif "Noes" in vote_type:
                adder = v.no
            else:
                adder = v.other

            for party in bunch:
                party = map(lambda x: x.replace(" (SPEAKER)", ""), party[(party.index(":") + 1) :].split(";"))

                if party[0] == "None":
                    party = []

                for x in party:
                    adder(x)

        v.add_source(url)
        bill.add_vote(v)
Example #15
0
    def get_vote(self, bill, url):
        url = 'http://www.ncga.state.nc.us' + url + '&bPrintable=true'
        chamber = {'H': 'lower', 'S': 'upper'}[
            re.findall('sChamber=(\w)', url)[0]]

        data = self.urlopen(url)
        soup = self.soup_parser(data)

        motion = soup.findAll('a', href=re.compile('BillLookUp\.pl'))[0] \
                     .findParents('tr', limit=1)[0].findAll('td')[1] \
                     .font.contents[-1]

        vote_time = soup.findAll('b', text='Time:')[0].next.strip()
        vote_time = dt.datetime.strptime(vote_time, '%b %d %Y  %I:%M%p')

        vote_mess = soup.findAll('td', text=re.compile('Total Votes:'))[0]
        (yeas, noes, nots, absent, excused) = map(lambda x: int(x),
                                                  re.findall(
                'Ayes: (\d+)\s+Noes: (\d+)\s+Not: (\d+)\s+Exc. '
                'Absent: (\d+)\s+Exc. Vote: (\d+)', vote_mess, re.U)[0])

        # chamber, date, motion, passed, yes_count, no_count, other_count
        v = Vote(chamber, vote_time, motion, (yeas > noes),
                 yeas, noes, nots + absent + excused)

        # eh, it's easier to just get table[2] for this..
        vote_table = soup.findAll('table')[2]

        for row in vote_table.findAll('tr'):
            if 'Democrat' in self.flatten(row):
                continue

            cells = row.findAll('td')
            if len(cells) == 1:
                # I can't find any examples of ties in the House,
                # nor information on who would break them.
                if not self.lt_gov and chamber == 'upper':
                    full_name = soup.findAll(
                        'td', text=re.compile('Lieutenant Governor'))[0] \
                        .parent.findAll('span')[0].contents[0]
                    (first_name, last_name, middle_name, suffix) = split_name(
                        full_name)

                    self.lt_gov = Person(full_name, first_name=first_name,
                                         last_name=last_name,
                                         middle_name=middle_name,
                                         suffix=suffix)

                    self.lt_gov.add_role('Lieutenant Governor',
                                         bill['session'])

                    self.save_person(self.lt_gov)

                if 'VOTES YES' in self.flatten(cells[0]):
                    v['passed'] = True
                    v.yes(full_name)
                else:
                    v['passed'] = False
                    v.no(full_name)
                continue
            elif len(cells) == 2:
                vote_type, a = cells
                bunch = [self.flatten(a)]
            elif len(cells) == 3:
                vote_type, d, r = cells
                bunch = [self.flatten(d), self.flatten(r)]
            else:
                continue

            # why doesn't .string work? ... bleh.
            vote_type = vote_type.font.b.contents[0]

            if 'Ayes' in vote_type:
                adder = v.yes
            elif 'Noes' in vote_type:
                adder = v.no
            else:
                adder = v.other

            for party in bunch:
                party = map(lambda x: x.replace(
                        ' (SPEAKER)', ''), party[
                        (party.index(':') + 1):].split(';'))

                if party[0] == 'None':
                    party = []

                for x in party:
                    adder(x)

        v.add_source(url)
        bill.add_vote(v)
Example #16
0
    def scrape_bills(self, chamber, year):
        session = "%s%d" % (year, int(year) + 1)
        if not session in self.metadata['sessions']:
            raise NoDataForYear(year)

        if chamber == 'upper':
            measure_abbr = 'SB'
            chamber_name = 'SENATE'
            house_type = 'S'
        else:
            measure_abbr = 'AB'
            chamber_name = 'ASSEMBLY'
            house_type = 'A'

        bills = self.session.query(CABill).filter_by(
            session_year=session).filter_by(
            measure_type=measure_abbr)

        for bill in bills:
            bill_session = session
            if bill.session_num != '0':
                bill_session += ' Special Session %s' % bill.session_num

            bill_id = bill.short_bill_id
            version = self.session.query(CABillVersion).filter_by(
                bill=bill).filter(CABillVersion.bill_xml != None).first()
            if not version:
                # not enough data to import
                continue

            fsbill = Bill(bill_session, chamber, bill_id,
                          version.title,
                          short_title=version.short_title)

            for author in version.authors:
                if author.house == chamber_name:
                    fsbill.add_sponsor(author.contribution, author.name)

            for action in bill.actions:
                if not action.action:
                    # NULL action text seems to be an error on CA's part,
                    # unless it has some meaning I'm missing
                    continue
                actor = action.actor or chamber
                fsbill.add_action(actor, action.action, action.action_date)

            for vote in bill.votes:
                if vote.vote_result == '(PASS)':
                    result = True
                else:
                    result = False

                full_loc = vote.location.description
                first_part = full_loc.split(' ')[0].lower()
                if first_part in ['asm', 'assembly']:
                    vote_chamber = 'lower'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                elif first_part.startswith('sen'):
                    vote_chamber = 'upper'
                    vote_location = ' '.join(full_loc.split(' ')[1:])
                else:
                    vote_chamber = ''
                    vote_location = full_loc

                fsvote = Vote(vote_chamber,
                              vote.vote_date_time,
                              vote.motion.motion_text or '',
                              result,
                              vote.ayes, vote.noes, vote.abstain,
                              threshold=vote.threshold,
                              location=vote_location)

                for record in vote.votes:
                    if record.vote_code == 'AYE':
                        fsvote.yes(record.legislator_name)
                    elif record.vote_code.startswith('NO'):
                        fsvote.no(record.legislator_name)
                    else:
                        fsvote.other(record.legislator_name)

                fsbill.add_vote(fsvote)

            self.add_bill(fsbill)
Example #17
0
    def parse_vote_details(self, url):
        """
        Grab the details of a specific vote, such as how each legislator
        voted.
        """

        def find_vote(letter):
            return vote_page.findAll('span', {'class': 'font8text'},
                                     text=letter)

        with self.soup_context(url) as vote_page:
            header = vote_page.find('div', {'class': 'subHdrGraphic'})

            if 'Senate' in header.string:
                chamber = 'upper'
            else:
                chamber = 'lower'

            # we'll use the link back to the bill as a base to
            # get the motion/date
            linkback = vote_page.find(
                'a', href=re.compile('billinfo')).parent.parent
            date = linkback.find('div').string
            date = dt.datetime.strptime(date, "%A, %B %d, %Y")
            motion = linkback.findNextSibling('div')
            if motion.a:
                motion = "%s %s" % (motion.a.string,
                                    motion.contents[-1].string.strip())
            elif motion.span:
                motion = "%s %s" % (motion.span.string.strip(),
                                    motion.contents[-1].string.strip())
            else:
                motion = motion.string.strip().replace('&nbsp;', '')

            yes_count = int(vote_page.find('div', text='YEAS').next.string)
            no_count = int(vote_page.find('div', text='NAYS').next.string)
            lve_count = int(vote_page.find('div', text='LVE').next.string)
            nv_count = int(vote_page.find('div', text='N/V').next.string)
            other_count = lve_count + nv_count

            passed = yes_count > no_count
            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote.add_source(url)

            # find the votes by the inner text. because background colors lie.
            yes_votes = [vote.yes, find_vote('Y')]
            no_votes = [vote.no, find_vote('N')]
            nv_votes = [vote.other, find_vote('E') + find_vote('X')]

            for (action, votes) in (yes_votes, no_votes, nv_votes):
                for a_vote in votes:
                    action(a_vote.parent.findNextSibling('span').string)

            if len(vote['yes_votes']) != yes_count:
                raise ScrapeError('wrong yes count %d/%d' %
                                  (len(vote['yes_votes']), yes_count))
            if len(vote['no_votes']) != no_count:
                raise ScrapeError('wrong no count %d/%d' %
                                  (len(vote['no_votes']), no_count))
            if len(vote['other_votes']) != other_count:
                raise ScrapeError('wrong other count %d/%d' %
                                  (len(vote['other_votes']), other_count))
        return vote
Example #18
0
    def scrape_votes(self,url,chamb):
        soup = BeautifulSoup(urllib2.urlopen(urllib2.Request(url)).read())
        date=None
        motion=None
        yeas=None
        neas=None
        others=None
        passed=None
        chamber=chamb
        necessary=None
        vote=None

        fonts = soup.findAll('font')
        span = soup.findAll('span')
        if (len(fonts) + (len(span))) > 4: #data is vaguely structured
            if (len(fonts) < 4):
                fonts = span
            for line in fonts:
                #this could be sped up.
                line = str(line.contents[0])
                line = line.strip()
                if line.find("Taken on") > -1:
                    #then the text is in the form of: "Take on <date> <reason>"
                    split = line.split(None,3)
                    date = split[2]
                    if (len(split) > 3):
                        motion=split[3]
                elif line.find("Those voting Yea") > -1:
                    yeas = self.get_num_from_line(line)
                elif line.find("Those voting Nay") > -1:
                    neas = self.get_num_from_line(line)
                elif line.find("Those absent and not voting") > -1:
                    others = self.get_num_from_line(line)
                elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1):
                    necessary = self.get_num_from_line(line)
            if yeas >= necessary:
                passed = True
            else:
                passed = False
            vote = Vote(chamber,date,motion,passed,yeas,neas,others)

            #figure out who voted for what
            table = soup.findAll('table')
            tds = table[len(table)-1].findAll('td')#get the last table

            vote_value = None
            digits = re.compile('^[\d ]+$')
            for cell in tds:
                string = cell.find('font')
                if (string == None):
                    string = cell.find('span') #either we are looking at fonts or spans
                if (string != None):
                    string = string.contents[0]
                    string = string.strip()
                else:
                    string = ''
                if (len(string) > 0) and (digits.search(string) == None):
                    if vote_value == None:
                        if (string == 'Y') or (string == 'N'):
                            vote_value = string
                        elif (string == 'X') or (string == 'A'):
                            vote_value = 'X'
                    else:
                        if vote_value == 'Y':
                            vote.yes(string)
                        elif vote_value == 'N':
                            vote.no(string)
                        else:
                            vote.other(string)
                        vote_value = None

        else:
            #data is mostly unstructured. Have to sift through a string
            data = soup.find('pre')
            lines = data.contents[len(data.contents)-1]
            lines = lines.strip()
            exp = re.compile(r'\n+|\r+|\f+')
            lines = exp.split(lines)
            names = []
            for i in range(len(lines)):
                line = lines[i].strip()
                if line.find("Taken on") > -1:
                #then the text is in the form of: "Take on <date> <reason>"
                    split = line.split(None,3)
                    date = split[2]
                    if (len(split) > 3):
                        motion=split[3]
                elif line.find("Those voting Yea") > -1:
                    yeas = self.get_num_from_line(line)
                elif line.find("Those voting Nay") > -1:
                    neas = self.get_num_from_line(line)
                elif line.find("Those absent and not voting") > -1:
                    others = self.get_num_from_line(line)
                elif (line.find("Necessary for Adoption") > -1) or (line.find("Necessary for Passage") > -1):
                    if (line.find("Adoption") > -1):
                        motion="Adoption"
                    else:
                        motion="Passage"
                    necessary = self.get_num_from_line(line)
                elif (line.find("The following is the roll call vote:") > -1):
                    break #the next lines contain actual votes
            #process the vote values
            if yeas >= necessary:
                passed = True
            else:
                passed = False
            vote = Vote(chamber,date,motion,passed,yeas,neas,others)
            lines = lines[i+1:]
            lines = string.join(lines,'  ')
            lines = lines.split('  ')
            absent_vote_value = re.compile('^(X|A)$')
            yea_vote_value = re.compile('^Y$')
            nea_vote_value = re.compile('^N$')
            #there aren't two spaces between vote and name so it doesn't get parsed
            annoying_vote = re.compile('^(Y|X|A|N) ([\S ]+)$')
            digits = re.compile('^[\d ]+$')
            vote_value = None
            for word in lines:
                word = word.strip()
                if (len(word) > 0) and (digits.search(word) == None):
                    word = strip_digits(word)
                    if vote_value != None:
                        if vote_value == 'Y':
                            vote.yes(word)
                        elif vote_value == 'N':
                            vote.no(word)
                        else:
                            vote.other(word)
                        vote_value = None
                    elif absent_vote_value.match(word) != None:
                        vote_value = 'X'
                    elif yea_vote_value.match(word) != None:
                        vote_value = 'Y'
                    elif nea_vote_value.match(word) != None:
                        vote_value = 'N'
                    elif annoying_vote.match(word) != None:
                        split = annoying_vote.match(word)
                        vote_value = split.group(2)
                        name = split.group(1)
                        if vote_value == 'Y':
                            vote.yes(name)
                        elif vote_value == 'N':
                            vote.no(name)
                        else:
                            vote.other(name)
                        vote_value = None
Example #19
0
    def _scrape_bill_votes(self, soup, bill, chamber):
        # scrape votes
        # http://flooractivityext.leg.wa.gov/rollcall.aspx?id=9695&bienId=4
        for roll_call_link in soup.findAll('a', href=re.compile('ShowRollCall')):
            print('roll_call ', roll_call_link)
            print('roll_call href ', roll_call_link['href'])
            href = roll_call_link['href']
            #if href.count('(') and href.count(')') and href.count(','):
            toks = href.split('(')
            toks = toks[1].split(')')
            toks = toks[0].split(',')
            id = toks[0]
            bienId = toks[1]
            roll_call_url = 'http://flooractivityext.leg.wa.gov/rollcall.aspx?id=%s&bienId=%s' % (id, bienId)
            print('roll_call_url ', roll_call_url)

            with self.soup_context(roll_call_url) as roll_call_info:
                rows = roll_call_info.findAll('tr')
                date = rows[3].find('td').string
                motion = rows[2].find('td').string

                #strip cruft
                motion = string.replace(motion, '&amp;', '')
                motion = string.replace(motion, '&nbsp;', '')
                motion = string.replace(motion, '  ', ' ')

                print('orig motion ', motion)
                # eg. "House vote on Final Passage"
                # lop off first three words to get motion: "Final Passage"
                # first word is chamber
                if toks[0] == 'House':
                    chamber = 'lower'
                elif toks[0] == 'Senate':
                    chamber = 'upper'
                print('chamber ', chamber)
                toks = motion.split(' ')
                motion = ' '.join(toks[3:])
                print('motion ', motion)
                print('date ', date)

                counts = roll_call_info.find(text=re.compile('Yeas:'))
                print('yeas ', counts)
                toks = counts.string.splitlines()
                for tok in toks:
                    print( 'tok ', tok)
                    toks2 = tok.strip().split('&')[0].split(' ')
                    if toks2[0] == 'Yeas:':
                        yes_count = int(toks2[1])
                        print('yes_count ', yes_count)
                    elif toks2[0] == 'Nays:':
                        no_count = int(toks2[1])
                        print('no_count ', no_count)
                    elif toks2[0] == 'Absent:':
                        absent_count = int(toks2[1])
                        print('abs_coount ', absent_count)
                    elif toks2[0] == 'Excused:':
                        excused_count = int(toks2[1])
                        print('excused_count ', excused_count)
                        
                vote = Vote(chamber, date, motion, True, yes_count, no_count, excused_count)
                vote.add_source(roll_call_url)
                #Vote('upper', '12/7/08', 'Final passage', True, 30, 8, 3)

#                        voterLists = roll_call_info.findAll('span', {'class': 'RollCall'})
#                        for voterList in voterLists:
#                            print('voterList ', voterList)
#                            toks = voterList.string.split(',')
#                            for tok in toks:
#                                print('tok ', tok)

                #eg. &nbsp;&nbsp;Representatives Alexander, Angel, Simpson, G., and Mr. Speaker
                #eg. &nbsp;&nbsp;Representative Alexander

                start_tok = 'Representative'
                if chamber == 'upper':
                    start_tok = 'Senator'

                nameLists = roll_call_info.findAll(text=re.compile(start_tok))
                print 'len nameLists', len(nameLists)
                print 'nameLists', nameLists
                if not nameLists:
                    continue
                nameListIdx = 0
                for i, count in enumerate([yes_count, no_count, absent_count, excused_count]):
                    print 'i,count', i, count
                    if count is 0:
                        continue
                    nameList = nameLists[nameListIdx]
                    nameListIdx = nameListIdx + 1

                    start_tok = 'Representative'
                    if chamber == 'upper':
                        start_tok = 'Senator'
                    if count > 1:
                        start_tok = '%ss' % start_tok
                    print 'start_tok', start_tok

                    if not nameList:
                        continue

                    if count > 2:
                        toks = nameList.split(',')
                    else:
                        toks = nameList.split('and')


                    #eg. &nbsp&nbsp;Senators Benton
                    #eg. &nbsp&nbsp;Senator Benton
                    first_tok = toks.pop(0)
                    print 'first_tok', first_tok
                    name = first_tok.split(start_tok)[-1].strip()
                    print('first_name', name)
                    if i == 0:
                        vote.yes(name)
                    elif i == 2:
                        vote.no(name)

                    if count == 1:
                        continue
                    if count == 2:
                        last_tok = toks[0]
                    else:
                        #eg. and Zarelli
                        last_tok = toks.pop(-1)
                    print 'last_tok', last_tok
                    name = last_tok.replace('and ', '', 1)
                    if type == 'yes':
                        vote.yes(name)
                    elif type == 'no':
                        vote.no(name)
                    print('last_name', name)

                    sz = len(toks)
                    for j, tok in enumerate(toks):
                        name = tok.strip()
                        if name[1] == '.':
                            continue
                        if j+1 < sz:
                            next_tok = toks[j+1].strip()
                            if next_tok[1] == '.':
                                name = ('%s, %s' % (name, next_tok))
                        print('name', name)
                        if type == 'yes':
                            vote.yes(name)
                        elif type == 'no':
                            vote.no(name)
                    print 'nameList ', nameList