Esempio n. 1
0
    def parse_bill(self, chamber, session, bill_id, bill_info_url):
        with self.urlopen_context(bill_info_url) as bill_info_data:
            bill_info = self.soup_parser(bill_info_data)
            version_url = '%s/bill.doc' % bill_id
            version_link = bill_info.find(href=version_url)

            if not version_link:
                # This bill was withdrawn
                return

            bill_title = version_link.findNext('p').contents[0].strip()

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_version("Most Recent Version",
                             session_url(session) + version_url)
            bill.add_source(bill_info_url)

            sponsor_links = bill_info.findAll(href=re.compile(
                    'legislator/[SH]\d+\.htm'))

            for sponsor_link in sponsor_links:
                bill.add_sponsor('primary', sponsor_link.contents[0].strip())

            action_p = version_link.findAllNext('p')[-1]
            for action in action_p.findAll(text=True):
                action = action.strip()
                if (not action or action == 'last action' or
                    'Prefiled' in action):
                    continue

                action_date = action.split('-')[0]
                action_date = dt.datetime.strptime(action_date, '%b %d')
                # Fix:
                action_date = action_date.replace(
                    year=int('20' + session[2:4]))

                action = '-'.join(action.split('-')[1:])

                if action.endswith('House') or action.endswith('(H)'):
                    actor = 'lower'
                elif action.endswith('Senate') or action.endswith('(S)'):
                    actor = 'upper'
                else:
                    actor = chamber

                bill.add_action(actor, action, action_date)

            vote_link = bill_info.find(href=re.compile('.*/vote_history.pdf'))
            if vote_link:
                bill.add_document(
                    'vote_history.pdf',
                    bill_info_url.replace('.htm', '') + "/vote_history.pdf")

            self.add_bill(bill)
Esempio n. 2
0
    def scrape_bills(self,chamber,year):
        if int(year) %2 == 0:  
            raise NoDataForYear(year)
        # 
        year = int(year)
        oyear = year #save off the original of the session
        if chamber == 'upper':
            bill_no = 1
            abbr = 'SB'
        else:
            bill_no = 4001
            abbr = 'HB'
        while True:
            (bill_page,year) = self.scrape_bill(year, abbr, bill_no)
            # if we can't find a page, we must be done. This is a healthy thing.
            if bill_page == None: return
            title = ''.join(self.flatten(bill_page.findAll(id='frg_billstatus_ObjectSubject')[0]))
            title = title.replace('\n','').replace('\r','')
            bill_id = "%s %d" % (abbr, bill_no)

            the_bill = Bill("Regular Session %d" % oyear, chamber, bill_id, title)

            #sponsors
            first = 0
            for name in bill_page.findAll(id='frg_billstatus_SponsorList')[0].findAll('a'):
                the_bill.add_sponsor(['primary', 'cosponsor'][first], name.string)
                first = 1

            #versions
            for doc in bill_page.findAll(id='frg_billstatus_DocumentGridTable')[0].findAll('tr'):
                r = self.parse_doc(the_bill, doc)
                if r: the_bill.add_version(*r)

            #documents
            if 'frg_billstatus_HlaTable' in str(bill_page):
                for doc in bill_page.findAll(id='frg_billstatus_HlaTable')[0].findAll('tr'):
                    r = self.parse_doc(the_bill, doc)
                    if r: the_bill.add_document(*r)
            if 'frg_billstatus_SfaSection' in str(bill_page):
                for doc in bill_page.findAll(id='frg_billstatus_SfaSection')[0].findAll('tr'):
                    r = self.parse_doc(the_bill, doc)
                    if r: the_bill.add_document(*r)

            the_bill.add_source('http://legislature.mi.gov/doc.aspx?%d-%s-%04d' % (year, abbr, bill_no))
            self.parse_actions(the_bill, bill_page.findAll(id='frg_billstatus_HistoriesGridView')[0])
            self.add_bill(the_bill)
            bill_no = bill_no + 1
        pass
Esempio n. 3
0
    def parse_bill(self, chamber, session, bill_id, bill_info_url):
        with self.urlopen_context(bill_info_url) as bill_info_data:
            bill_info = self.soup_parser(bill_info_data)
            version_url = "%s/bill.doc" % bill_id
            version_link = bill_info.find(href=version_url)

            if not version_link:
                # This bill was withdrawn
                return

            bill_title = version_link.findNext("p").contents[0].strip()

            bill = Bill(session, chamber, bill_id, bill_title)
            bill.add_version("Most Recent Version", session_url(session) + version_url)
            bill.add_source(bill_info_url)

            sponsor_links = bill_info.findAll(href=re.compile("legislator/[SH]\d+\.htm"))

            for sponsor_link in sponsor_links:
                bill.add_sponsor("primary", sponsor_link.contents[0].strip())

            action_p = version_link.findAllNext("p")[-1]
            for action in action_p.findAll(text=True):
                action = action.strip()
                if not action or action == "last action" or "Prefiled" in action:
                    continue

                action_date = action.split("-")[0]
                action_date = dt.datetime.strptime(action_date, "%b %d")
                # Fix:
                action_date = action_date.replace(year=int("20" + session[2:4]))

                action = "-".join(action.split("-")[1:])

                if action.endswith("House") or action.endswith("(H)"):
                    actor = "lower"
                elif action.endswith("Senate") or action.endswith("(S)"):
                    actor = "upper"
                else:
                    actor = chamber

                bill.add_action(actor, action, action_date)

            vote_link = bill_info.find(href=re.compile(".*/vote_history.pdf"))
            if vote_link:
                bill.add_document("vote_history.pdf", bill_info_url.replace(".htm", "") + "/vote_history.pdf")

            self.save_bill(bill)
Esempio n. 4
0
    def scrape_bills(self, chamber, year):
        if year != "2009":
            raise NoDataForYear

        if chamber == "upper":
            other_chamber = "lower"
            bill_id = "SB 1"
        else:
            other_chamber = "upper"
            bill_id = "HB 1"

        b1 = Bill("2009-2010", chamber, bill_id, "A super bill")
        b1.add_source("http://example.com")
        b1.add_version("As Introduced", "http://example.com/SB1.html")
        b1.add_document("Google", "http://google.com")
        b1.add_sponsor("primary", "Bob Smith")
        b1.add_sponsor("secondary", "Johnson, Sally")

        d1 = datetime.datetime.strptime("1/29/2010", "%m/%d/%Y")
        v1 = Vote("upper", d1, "Final passage", True, 2, 0, 0)
        v1.yes("Bob Smith")
        v1.yes("Sally Johnson")

        d2 = datetime.datetime.strptime("1/30/2010", "%m/%d/%Y")
        v2 = Vote("lower", d2, "Final passage", False, 0, 1, 1)
        v2.no("B. Smith")
        v2.other("Sally Johnson")

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, "introduced", d1)
        b1.add_action(chamber, "read first time", d1)
        b1.add_action(other_chamber, "introduced", d2)

        self.save_bill(b1)
Esempio n. 5
0
    def scrape_bill(self, chamber, current_bill, session):
        other_chamber = 'upper' if chamber == 'lower' else 'lower'
        with self.soup_context("http://alisondb.legislature.state.al.us/acas/SESSBillsStatusResultsMac.asp?BillNumber=%s&GetStatus=Get+Status&session=%s" % (current_bill, session[0])) as bill:
             if "Your ACAS Session has expired." in str(bill):
                 raise Exception("Expired cookie - you'll have to run with -n to skip caching")
             try:
                 bill_id = int(re.findall(r'BTN([0-9]+)', str(bill))[0])
             except:
                 raise Exception("No bill found. Hopefully that means it's the end of the session") 
             title = bill.find("td", {'colspan': '7'}).string
             self.log("Starting parse of %s" % current_bill)
             #create our bill!
             bill = Bill(session[1], chamber, current_bill, title.strip())

             #add sponsors and co-sponsors
             with self.soup_context("http://alisondb.legislature.state.al.us/acas/ACTIONSponsorsResultsMac.asp?OID=%d" % bill_id) as sponsors:
                 # This pains me.
                 (primary,secondary) = sponsors.findAll("table", text="Co-Sponsors")[0].parent.parent.parent.findAll('table')
                 for p in primary.findAll('td'):
                     bill.add_sponsor('primary', p.string)
                 for s in secondary.findAll('td'):
                     bill.add_sponsor('cosponsor', s.string)
             with self.soup_context("http://alisondb.legislature.state.al.us/acas/ACTIONHistoryResultsMac.asp?OID=%d" % bill_id) as history:
                  actions = history.findAll('table', text="Committee")[0].parent.parent.parent.findAll('tr')
                  #Date Amend/Subst Matter Committee Nay Yea Abs Vote
                  for event in actions:
                       e = event.findAll('td')
                       if len(e) == 0:
                           continue
                       date = e[0].string
                       amend = e[1].find('input')
                       matter = e[2].string
                       y_votes = e[5].string
                       n_votes = e[4].string
                       a_votes = e[6].string

                       if not matter:
                           continue

                       roll = e[7].find('input')
                       #(date, amend, matter, committee, nays, yeas, abs, vote_thing) = map(lambda x: x.string, e)
                       if date != None:
                           act_date = dt.datetime.strptime(date, '%m/%d/%Y')
                       if amend != None:
                           splitter = re.findall(r'documentSelected\(\'(\w*)\',\'([\w\d-]*)\',\'([\w\.\-]*)\',\'([\w\d/]*)\',\'([\w\d]*)\',\'([\w\s]*)\'', str(amend))[0]
                           amend = "http://alisondb.legislature.state.al.us/acas/%s/%s" % (splitter[3], splitter[2])
                           bill.add_document(matter, amend)

                       if roll != None: 
                          splitter = re.findall(r'voteSelected\(\'(\d*)\',\'(\d*)\',\'(\d*)\',\'(.*)\',\'(\d*)\'',str(roll))[0]
                          roll = "http://alisondb.legislature.state.al.us/acas/GetRollCallVoteResults.asp?MOID=%s&VOTE=%s&BODY=%s&SESS=%s" % (splitter[0], splitter[1], splitter[2], splitter[4])
                          with self.soup_context(roll) as votes:
                              vote_rows = votes.findAll('table', text='Member')[0].parent.parent.parent.findAll('tr')
                              
                              yea_votes = int(votes.findAll('tr', text='Total Yea:')[0].parent.parent.findAll('td')[2].string)
                              nay_votes = int(votes.findAll('tr', text='Total Nay:')[0].parent.parent.findAll('td')[2].string)
                              abs_votes = int(votes.findAll('tr', text='Total Abs:')[0].parent.parent.findAll('td')[2].string)
                              p_votes   = len(votes.findAll('tr', text='P'))
                              
                              #chamber, date, motion, passed, yes_count, no_count, other_count
                              vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes + p_votes)
                              
                              vote.add_source(roll)
                              for row in vote_rows:
                                  skip = str(row)
                                  if "Total Yea" in skip or "Total Nay" in skip or "Total Abs" in skip:
                                      continue
                                  html_layouts_are_awesome = row.findAll('td')
                                  if len(html_layouts_are_awesome) == 0:
                                      continue
	
                                  (name, t) = html_layouts_are_awesome[0].string, html_layouts_are_awesome[2].string
                                  self.dumb_vote(vote, name, t)
                                  
                                  if len(html_layouts_are_awesome) > 3:
                                      (name, t) = html_layouts_are_awesome[4].string, html_layouts_are_awesome[6].string
                                      self.dumb_vote(vote, name, t)
                              bill.add_vote(vote)

                       if y_votes != None:
                           yea_votes = self.dumber_vote(y_votes)
                           nay_votes = self.dumber_vote(n_votes)
                           abs_votes = self.dumber_vote(a_votes)
                           vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes)
                           bill.add_vote(vote)
                       
                       bill.add_action(chamber, matter, act_date)
             self.add_bill(bill)