def scrape_session(self, chamber, session, special=0): session_url = bill_list_url(chamber, session, special) with self.soup_context(session_url) as bill_list_page: bill_link_re = "body=%s&type=(B|R)&bn=\d+" % bill_abbr(chamber) for link in bill_list_page.findAll(href=re.compile(bill_link_re)): self.parse_bill(chamber, session, special, link)
def parse_bill(self, chamber, session, special, link): bill_number = link.contents[0] type = re.search('type=(B|R|)', link['href']).group(1) bill_id = "%s%s %s" % (bill_abbr(chamber), type, bill_number) bill_info_url = info_url(chamber, session, special, type, bill_number) with self.soup_context(bill_info_url) as info_page: title_label = info_page.find(text='Short Title:') title = title_label.findNext().contents[0] bill = Bill(session, chamber, bill_id, title) bill.add_source(bill_info_url) self.parse_bill_versions(bill, info_page) self.parse_history(bill, history_url(chamber, session, special, type, bill_number)) self.parse_votes(bill, vote_url(chamber, session, special, type, bill_number)) self.add_bill(bill)