Ejemplo n.º 1
0
    def parse_house_actions(self, bill, url):
        url = re.sub("BillActions", "BillActionsPrn", url)
        bill.add_source(url)
        with self.urlopen(url) as actions_page:
            actions_page = lxml.html.fromstring(actions_page)
            rows = actions_page.xpath('//table/tr')

            for row in rows[1:]:
                # new actions are represented by having dates in the first td
                # otherwise, it's a continuation of the description from the
                # previous action
                if len(row) > 0 and row[0].tag == 'td':
                    if len(row[0].text_content().strip()) > 0:
                        date = row[0].text_content().strip()
                        date = dt.datetime.strptime(date, '%m/%d/%Y')
                        action = row[2].text_content().strip()
                    else:
                        action += ('\n' + row[2].text_content())
                        action = action.rstrip()
                    actor = house_get_actor_from_action(action)
                    #TODO probably need to add the type here as well
                    bill.add_action(actor, action, date)

        # add that last action
        actor = house_get_actor_from_action(action)
        #TODO probably need to add the type here as well
        bill.add_action(actor, action, date)
Ejemplo n.º 2
0
    def parse_house_actions(self, bill, url):
        bill.add_source(url)
        with self.urlopen(url) as actions_page:
            actions_page = BeautifulSoup(actions_page)
            rows = actions_page.findAll('tr')

            # start with index 0 because the table doesn't have an opening <tr>
            first_row = rows[0]
            date = first_row.td.contents[0].strip()
            date = dt.datetime.strptime(date, '%m/%d/%Y')
            action = first_row.td.nextSibling.nextSibling.contents[0].strip()

            for row in rows[1:]:
                # new actions are represented by having dates in the first td
                # otherwise, it's a continuation of the description from the
                # previous action
                if row.td != None:
                    if len(row.td.contents) > 0 and row.td.contents[0] != ' ':
                        actor = house_get_actor_from_action(action)
                        bill.add_action(actor, action, date)
                        date = row.td.contents[0].strip()
                        date = dt.datetime.strptime(date, '%m/%d/%Y')
                        action = row.td.nextSibling.nextSibling
                        action = action.contents[0].strip()
                    else:
                        action += ('\n' +
                                   row.td.nextSibling.nextSibling.contents[0])
                        action = action.rstrip()

        # add that last action
        actor = house_get_actor_from_action(action)
        bill.add_action(actor, action, date)
Ejemplo n.º 3
0
    def parse_house_actions(self, bill, url):
        bill.add_source(url)
        with self.urlopen(url) as actions_page:
            actions_page = BeautifulSoup(actions_page)
            rows = actions_page.findAll('tr')

            # start with index 0 because the table doesn't have an opening <tr>
            first_row = rows[0]
            date = first_row.td.contents[0].strip()
            date = dt.datetime.strptime(date, '%m/%d/%Y')
            action = first_row.td.nextSibling.nextSibling.contents[0].strip()

            for row in rows[1:]:
                # new actions are represented by having dates in the first td
                # otherwise, it's a continuation of the description from the
                # previous action
                if row.td != None:
                    if len(row.td.contents) > 0 and row.td.contents[0] != ' ':
                        actor = house_get_actor_from_action(action)
                        bill.add_action(actor, action, date)
                        date = row.td.contents[0].strip()
                        date = dt.datetime.strptime(date, '%m/%d/%Y')
                        action = row.td.nextSibling.nextSibling
                        action = action.contents[0].strip()
                    else:
                        action += ('\n' +
                                   row.td.nextSibling.nextSibling.contents[0])
                        action = action.rstrip()

        # add that last action
        actor = house_get_actor_from_action(action)
        bill.add_action(actor, action, date)
Ejemplo n.º 4
0
    def _parse_house_actions(self, bill, url):
        url = re.sub("BillActions", "BillActionsPrn", url)
        bill.add_source(url)
        actions_page = self.get(url).text
        actions_page = lxml.html.fromstring(actions_page)
        rows = actions_page.xpath('//table/tr')

        for row in rows[1:]:
            # new actions are represented by having dates in the first td
            # otherwise, it's a continuation of the description from the
            # previous action
            if len(row) > 0 and row[0].tag == 'td':
                if len(row[0].text_content().strip()) > 0:
                    date = row[0].text_content().strip()
                    date = dt.datetime.strptime(date, '%m/%d/%Y')
                    action = row[2].text_content().strip()
                else:
                    action += ('\n' + row[2].text_content())
                    action = action.rstrip()
                actor = house_get_actor_from_action(action)
                type_class = self._get_action(actor, action)

                votes = self._get_votes(date, actor, action)
                for vote in votes:
                    bill.add_vote(vote)

                bill.add_action(actor, action, date, type=type_class)
Ejemplo n.º 5
0
    def parse_house_actions(self, bill, url):
        url = re.sub("BillActions", "BillActionsPrn", url)
        bill.add_source(url)
        with self.urlopen(url) as actions_page:
            actions_page = lxml.html.fromstring(actions_page)
            rows = actions_page.xpath("//table/tr")

            for row in rows[1:]:
                # new actions are represented by having dates in the first td
                # otherwise, it's a continuation of the description from the
                # previous action
                if len(row) > 0 and row[0].tag == "td":
                    if len(row[0].text_content().strip()) > 0:
                        date = row[0].text_content().strip()
                        date = dt.datetime.strptime(date, "%m/%d/%Y")
                        action = row[2].text_content().strip()
                    else:
                        action += "\n" + row[2].text_content()
                        action = action.rstrip()
                    actor = house_get_actor_from_action(action)
                    type_class = self.get_action(actor, action)
                    bill.add_action(actor, action, date, type=type_class)