def parse_senate_actions(self, bill, url): bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = lxml.html.fromstring(actions_page) bigtable = actions_page.xpath('/html/body/font/form/table/tr[3]/td/div/table/tr') for row in bigtable: date = row[0].text_content() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row[1].text_content() actor = senate_get_actor_from_action(action) # TODO add the type of action (see MA for an example) bill.add_action(actor, action, date)
def parse_senate_actions(self, bill, url): bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = lxml.html.fromstring(actions_page) bigtable = actions_page.xpath("/html/body/font/form/table/tr[3]/td/div/table/tr") for row in bigtable: date = row[0].text_content() date = dt.datetime.strptime(date, "%m/%d/%Y") action = row[1].text_content() actor = senate_get_actor_from_action(action) type_class = self.get_action(actor, action) bill.add_action(actor, action, date, type=type_class)
def _parse_senate_actions(self, bill, url): bill.add_source(url) actions_page = self.get(url).text actions_page = lxml.html.fromstring(actions_page) bigtable = actions_page.xpath('/html/body/font/form/table/tr[3]/td/div/table/tr') for row in bigtable: date = row[0].text_content() date = dt.datetime.strptime(date, '%m/%d/%Y') action = row[1].text_content() actor = senate_get_actor_from_action(action) type_class = self._get_action(actor, action) bill.add_action(actor, action, date, type=type_class)
def parse_senate_actions(self, bill, url): bill.add_source(url) with self.soup_context(url) as actions_page: bigtable = actions_page.find(id='Table5') act_row = bigtable.next.next.nextSibling.next act_row = act_row.nextSibling.nextSibling act_table = act_row.td.div.table for row in act_table.findAll('tr'): date = row.td.contents[0] date = dt.datetime.strptime(date, '%m/%d/%Y') action = row.td.nextSibling.nextSibling.contents[0] actor = senate_get_actor_from_action(action) bill.add_action(actor, action, date)
def parse_senate_actions(self, bill, url): bill.add_source(url) with self.urlopen(url) as actions_page: actions_page = BeautifulSoup(actions_page) bigtable = actions_page.find(id='Table5') act_row = bigtable.next.next.nextSibling.next act_row = act_row.nextSibling.nextSibling act_table = act_row.td.div.table for row in act_table.findAll('tr'): date = row.td.contents[0] date = dt.datetime.strptime(date, '%m/%d/%Y') action = row.td.nextSibling.nextSibling.contents[0] actor = senate_get_actor_from_action(action) bill.add_action(actor, action, date)