Beispiel #1
0
def test_vote_event_bill_clearing():
    # ensure that we don't wind up with vote events sitting around forever on bills as
    # changes make it look like there are multiple vote events
    j = create_jurisdiction()
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id',
                                      name='House',
                                      classification='lower',
                                      jurisdiction=j)
    bill = Bill.objects.create(id='bill-1',
                               identifier='HB 1',
                               legislative_session=session,
                               from_organization=org)
    Bill.objects.create(id='bill-2',
                        identifier='HB 2',
                        legislative_session=session,
                        from_organization=org)
    oi = OrganizationImporter('jid')
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, oi)

    vote_event1 = ScrapeVoteEvent(
        legislative_session='1900',
        start_date='2013',
        classification='anything',
        result='passed',
        motion_text='a vote on somthing',  # typo intentional
        bill=bill.identifier,
        bill_chamber='lower',
        chamber='lower')
    vote_event2 = ScrapeVoteEvent(legislative_session='1900',
                                  start_date='2013',
                                  classification='anything',
                                  result='passed',
                                  motion_text='a vote on something else',
                                  bill=bill.identifier,
                                  bill_chamber='lower',
                                  chamber='lower')

    # have to use import_data so postimport is called
    VoteEventImporter('jid', dmi, oi, bi).import_data(
        [vote_event1.as_dict(), vote_event2.as_dict()])
    assert VoteEvent.objects.count() == 2

    # a typo is fixed, we don't want 3 vote events now
    vote_event1.motion_text = 'a vote on something'
    VoteEventImporter('jid', dmi, oi, bi).import_data(
        [vote_event1.as_dict(), vote_event2.as_dict()])
    assert VoteEvent.objects.count() == 2
def test_vote_event_bill_clearing():
    # ensure that we don't wind up with vote events sitting around forever on bills as
    # changes make it look like there are multiple vote events
    j = create_jurisdiction()
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id', name='House', classification='lower',
                                      jurisdiction=j)
    bill = Bill.objects.create(id='bill-1', identifier='HB 1', legislative_session=session,
                               from_organization=org)
    Bill.objects.create(id='bill-2', identifier='HB 2', legislative_session=session,
                        from_organization=org)
    oi = OrganizationImporter('jid')
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, oi)

    vote_event1 = ScrapeVoteEvent(legislative_session='1900', start_date='2013',
                                  classification='anything', result='passed',
                                  motion_text='a vote on somthing',             # typo intentional
                                  bill=bill.identifier, bill_chamber='lower',
                                  chamber='lower'
                                  )
    vote_event2 = ScrapeVoteEvent(legislative_session='1900', start_date='2013',
                                  classification='anything', result='passed',
                                  motion_text='a vote on something else',
                                  bill=bill.identifier, bill_chamber='lower',
                                  chamber='lower'
                                  )

    # have to use import_data so postimport is called
    VoteEventImporter('jid', dmi, oi, bi).import_data([
        vote_event1.as_dict(),
        vote_event2.as_dict()
    ])
    assert VoteEvent.objects.count() == 2

    # a typo is fixed, we don't want 3 vote events now
    vote_event1.motion_text = 'a vote on something'
    VoteEventImporter('jid', dmi, oi, bi).import_data([
        vote_event1.as_dict(),
        vote_event2.as_dict()
    ])
    assert VoteEvent.objects.count() == 2
Beispiel #3
0
    def handle_page(self):
        summary = self.doc.xpath(
            "/".join(
                [
                    '//h4[starts-with(text(), "SUMMARY")]',
                    "/following-sibling::p",
                    "text()",
                ]
            )
        )
        if summary and summary[0].strip():
            self.obj.add_abstract(abstract=summary[0].strip(), note="summary")

        # versions
        for va in self.doc.xpath(
            '//h4[text()="FULL TEXT"]/following-sibling::ul[1]/li/a[1]'
        ):

            # 11/16/09 \xa0House: Prefiled and ordered printed; offered 01/13/10 10100110D
            date, desc = va.text.split(u" \xa0")
            desc.rsplit(" ", 1)[0]  # chop off last part
            link = va.get("href")
            if "http" not in link:
                link = "{}{}".format(BASE_URL, link)
            date = datetime.datetime.strptime(date, "%m/%d/%y").date()

            # budget bills in VA are searchable but no full text available
            if "+men+" in link:
                logging.getLogger("va").warning(
                    "not adding budget version, bill text not available"
                )
            else:
                # VA duplicates reprinted bills, lets keep the original name
                self.obj.add_version_link(
                    desc, link, date=date, media_type="text/html", on_duplicate="ignore"
                )

        # amendments
        for va in self.doc.xpath(
            '//h4[text()="AMENDMENTS"]/following-sibling::ul[1]/li/a[1]'
        ):
            version_name = va.xpath("string(.)")
            if (
                (
                    "adopted" in version_name.lower()
                    or "engrossed" in version_name.lower()
                )
                and "not adopted" not in version_name.lower()
                and "not engrossed" not in version_name.lower()
            ):
                version_url = va.xpath("@href")[0]
                self.obj.add_version_link(
                    version_name,
                    version_url,
                    media_type="text/html",
                    on_duplicate="ignore",
                )

        # actions
        seen_next = False
        for ali, next_ali in pairwise(self.doc.xpath('//h4[text()="HISTORY"]/following-sibling::ul[1]/li')):
            # If we've used this action text before, we don't need to parse it again
            if seen_next:
                seen_next = False
                continue
            date, action = ali.text_content().split(u" \xa0")
            try:
                actor, action = action.split(": ", 1)
            except ValueError:
                assert any(
                    [action.startswith("{}:".format(x)) for x in self.actor_map.keys()]
                ), "Unparseable action text found: '{}'".format(action)
                logging.getLogger("va").warning(
                    "Skipping apparently-null action: '{}'".format(action)
                )
                continue

            # Bill history entries purely in parentheses tend to be
            # notes and not actions, so we'll skip them.
            if action.startswith("(") and action.endswith(")"):
                continue

            actor = self.actor_map[actor]
            date = datetime.datetime.strptime(date.strip(), "%m/%d/%y").date()

            # if action ends in (##-Y ##-N) remove that part
            vrematch = self.vote_strip_re.match(action)
            # The following conditional logic is messy to handle
            # Virginia's crazy and inconsistently formatted bill
            # histories. Someone less harried and tired than me
            # could probably make this much cleaner. - alo
            if vrematch:
                vote_action, y, n, o = vrematch.groups()
                y = int(y)
                n = int(n)
                # Set default count for "other" votes to 0. We have to
                # do this explicitly as it's excluded from the action
                # text when there were no abstentions (the only type of
                # "other" vote encountered thus far).
                o = int(o) if o else 0

                vote_url = ali.xpath("a/@href")

                # Finds relevant information from the current action if
                # vote count encountered, then searches for the presence
                # of identical counts in the next entry (we assume that
                # it's probably there). If matching votes are found, it
                # merges data in both to create a unified vote record.
                #
                # This is because Virginia usually publishes two lines
                # of history data for a single vote, without guaranteed
                # order, so we unsafely attempt to match on identical
                # vote counts in the next line.
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    motion_text=vote_action.strip(),
                    result="pass" if y > n else "fail",
                    classification="passage",
                    bill=self.obj,
                )
                vote.set_count("yes", y)
                vote.set_count("no", n)
                vote.set_count("other", o)

                try:
                    next_action = next_ali.text_content().split(" \xa0")[1].split(": ", 1)[1]
                except ValueError:
                    next_action = ""

                vrematch_next = self.vote_strip_re.match(next_action)
                if vrematch_next:
                    vote_action_next, y_next, n_next, o_next = vrematch_next.groups()
                    y_next = int(y_next)
                    n_next = int(n_next)
                    o_next = int(o_next) if o_next else 0
                    vote_url_next = next_ali.xpath("a/@href")
                    # Check that the vote counts match and that only one action
                    # has a URL (otherwise, they're probably different votes).
                    if [y_next, n_next, o_next] == [y, n, o] and len(vote_url) != len(vote_url_next):
                        seen_next = True
                        if not vote_url:
                            vote_url = vote_url_next
                        else:
                            vote.motion_text = vote_action_next.strip()
                            action = next_action

                if vote_url:
                    list(
                        self.scrape_page_items(
                            VotePage, url=vote_url[0], obj=vote
                        )
                    )
                    vote.add_source(vote_url[0])
                else:
                    vote.add_source(self.url)

                yield from add_pupa_id(vote)

            # categorize actions
            for pattern, atype in ACTION_CLASSIFIERS:
                if re.match(pattern, action):
                    break
            else:
                atype = None

            # if matched a 'None' atype, don't add the action
            if atype != SKIP:
                self.obj.add_action(action, date, chamber=actor, classification=atype)