Ejemplo n.º 1
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = VoteEvent(
            chamber="upper",
            start_date=date.strftime("%Y-%m-%d"),
            motion_text="Passage",
            # setting 'fail' for now.
            result="fail",
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.pupa_id = url

        text = convert_pdf(filename, "text").decode("utf-8")
        os.remove(filename)

        if re.search(r"Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+", text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url,
                                                    date)
            return

        data = re.split(r"(Yea|Nay|Absent)s?:", text)[::-1]
        data = filter(None, data)
        keymap = dict(yea="yes", nay="no")
        actual_vote = collections.defaultdict(int)
        vote_count = {"yes": 0, "no": 0, "other": 0}
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), "other")
            values = data.pop()
            for name in re.split(r"(?:[\s,]+and\s|[\s,]{2,})", values):
                if name.lower().strip() == "none.":
                    continue
                name = name.replace("..", "")
                name = re.sub(r"\.$", "", name)
                name = name.strip("-1234567890 \n")
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = ("pass" if vote_count["yes"] >
                       (vote_count["no"] + vote_count["other"]) else "fail")

        yield vote
Ejemplo n.º 2
0
def toy_vote_event():
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
    )
    ve.add_source("http://uri.example.com/", note="foo")
    return ve
Ejemplo n.º 3
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r"Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)", text)
        yes, no = int(votes[0][0]), int(votes[0][1])

        vtype = []
        for regex, type in motion_classifiers.items():
            if re.match(regex, text):
                vtype = type
                break

        v = VoteEvent(
            chamber=chamber,
            start_date=TIMEZONE.localize(date),
            motion_text=text,
            result="pass" if yes > no else "fail",
            classification=vtype,
            bill=bill,
        )
        v.dedupe_key = url.split("/")[-1]
        v.set_count("yes", yes)
        v.set_count("no", no)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if "av" in url:
                self.add_house_votes(v, url)
            elif "sv" in url:
                self.add_senate_votes(v, url)

        return v
Ejemplo n.º 4
0
    def parse_vote_page(self, vote_url, bill):
        vote_html = self.get(vote_url).text
        doc = lxml.html.fromstring(vote_html)
        # chamber
        if "senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        # date in the following format: Mar 23, 2009
        date = doc.xpath('//td[starts-with(text(), "Legislative")]')[0].text
        date = date.replace(u"\xa0", " ")
        date = datetime.datetime.strptime(date[18:], "%b %d, %Y")

        # motion
        motion = "".join(x.text_content() for x in doc.xpath('//td[@colspan="23"]'))
        if motion == "":
            motion = "No motion given"  # XXX: Double check this. See SJ 3.
        motion = motion.replace(u"\xa0", " ")

        # totals
        tot_class = doc.xpath('//td[contains(text(), "Yeas")]')[0].get("class")
        totals = doc.xpath('//td[@class="%s"]/text()' % tot_class)[1:]
        yes_count = int(totals[0].split()[-1])
        no_count = int(totals[1].split()[-1])
        other_count = int(totals[2].split()[-1])
        other_count += int(totals[3].split()[-1])
        other_count += int(totals[4].split()[-1])
        passed = yes_count > no_count

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )
        vote.pupa_id = vote_url  # contains sequence number
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        # go through, find Voting Yea/Voting Nay/etc. and next tds are voters
        func = None
        for td in doc.xpath("//td/text()"):
            td = td.replace(u"\xa0", " ")
            if td.startswith("Voting Yea"):
                func = vote.yes
            elif td.startswith("Voting Nay"):
                func = vote.no
            elif td.startswith("Not Voting"):
                func = vote.other
            elif td.startswith("Excused"):
                func = vote.other
            elif func:
                td = td.rstrip("*")
                func(td)

        return vote
Ejemplo n.º 5
0
def test_vote_event_pupa_identifier_dedupe():
    j = create_jurisdiction()
    j.legislative_sessions.create(name="1900", identifier="1900")
    Organization.objects.create(id="org-id",
                                name="Legislature",
                                classification="legislature",
                                jurisdiction=j)

    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        start_date="2013",
        classification="anything",
        result="passed",
        motion_text="a vote on something",
        identifier="Roll Call No. 1",
    )
    vote_event.pupa_id = "foo"

    dmi = DumbMockImporter()
    oi = OrganizationImporter("jid")
    bi = BillImporter("jid", dmi, oi)

    _, what = VoteEventImporter("jid", dmi, oi,
                                bi).import_item(vote_event.as_dict())
    assert what == "insert"
    assert VoteEvent.objects.count() == 1

    # same exact vote event, no changes
    _, what = VoteEventImporter("jid", dmi, oi,
                                bi).import_item(vote_event.as_dict())
    assert what == "noop"
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote_event.result = "failed"
    _, what = VoteEventImporter("jid", dmi, oi,
                                bi).import_item(vote_event.as_dict())
    assert what == "update"
    assert VoteEvent.objects.count() == 1

    # new bill identifier, update
    vote_event.identifier = "First Roll Call"
    _, what = VoteEventImporter("jid", dmi, oi,
                                bi).import_item(vote_event.as_dict())
    assert what == "update"
    assert VoteEvent.objects.count() == 1

    # new identifier, insert
    vote_event.pupa_id = "bar"
    _, what = VoteEventImporter("jid", dmi, oi,
                                bi).import_item(vote_event.as_dict())
    assert what == "insert"
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 6
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" +
                    session + "_" + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(
                datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == "House":
                chamber = "lower"
            elif chamber == "Senate":
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if yes_count > no_count else "fail",
                bill=bill,
                classification="passage",
            )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
Ejemplo n.º 7
0
 def _get_votes(self, date, actor, action, bill, url):
     vre = r"(?P<leader>.*)(AYES|YEAS):\s+(?P<yeas>\d+)\s+(NOES|NAYS):\s+(?P<nays>\d+).*"
     if "YEAS" in action.upper() or "AYES" in action.upper():
         match = re.match(vre, action)
         if match:
             v = match.groupdict()
             yes, no = int(v["yeas"]), int(v["nays"])
             vote = VoteEvent(
                 chamber=actor,
                 motion_text=v["leader"],
                 result="pass" if yes > no else "fail",
                 classification="passage",
                 start_date=TIMEZONE.localize(date),
                 bill=bill,
             )
             vote.add_source(url)
             yield vote
Ejemplo n.º 8
0
def test_full_vote_event():
    j = create_jurisdiction()
    j.legislative_sessions.create(name="1900", identifier="1900")
    sp1 = ScrapePerson("John Smith", primary_org="lower")
    sp2 = ScrapePerson("Adam Smith", primary_org="lower")
    org = ScrapeOrganization(name="House", classification="lower")
    bill = ScrapeBill("HB 1",
                      "1900",
                      "Axe & Tack Tax Act",
                      from_organization=org._id)
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-01",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        organization=org._id,
    )
    vote_event.set_count("yes", 20)
    vote_event.yes("John Smith")
    vote_event.no("Adam Smith")

    oi = OrganizationImporter("jid")
    oi.import_data([org.as_dict()])

    pi = PersonImporter("jid")
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter("jid", pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter("jid", oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter("jid", pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ["passage:bill"]
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == "yes"
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == "John Smith":
            assert v.option == "yes"
            assert v.voter == Person.objects.get(name="John Smith")
        else:
            assert v.option == "no"
            assert v.voter == Person.objects.get(name="Adam Smith")
Ejemplo n.º 9
0
def test_fix_bill_id():
    j = create_jurisdiction()
    j.legislative_sessions.create(name="1900", identifier="1900")

    org1 = ScrapeOrganization(name="House", classification="lower")
    bill = ScrapeBill("HB 1",
                      "1900",
                      "Test Bill ID",
                      classification="bill",
                      chamber="lower")

    oi = OrganizationImporter("jid")

    oi.import_data([org1.as_dict()])

    from openstates.settings import IMPORT_TRANSFORMERS

    IMPORT_TRANSFORMERS["bill"] = {
        "identifier":
        lambda x: re.sub(r"([A-Z]*)\s*0*([-\d]+)", r"\1 \2", x, 1)
    }

    bi = BillImporter("jid", oi, DumbMockImporter())
    bi.import_data([bill.as_dict()])

    ve = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-02",
        classification="passage:bill",
        result="fail",
        bill_chamber="lower",
        bill="HB1",
        identifier="4",
        bill_action="passage",
        organization=org1._id,
    )

    VoteEventImporter("jid", DumbMockImporter(), oi,
                      bi).import_data([ve.as_dict()])

    IMPORT_TRANSFORMERS["bill"] = {}

    ve = VoteEvent.objects.get()
    ve.bill.identifier == "HB 1"
Ejemplo n.º 10
0
def test_vote_event_org_chamber():
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        chamber="upper",
    )
    assert get_pseudo_id(ve.organization) == {"classification": "upper"}
Ejemplo n.º 11
0
def test_vote_event_org_obj():
    o = Organization("something", classification="committee")
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        organization=o,
    )
    assert ve.organization == o._id
Ejemplo n.º 12
0
def test_vote_event_org_dict():
    odict = {"name": "Random Committee", "classification": "committee"}
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        organization=odict,
    )
    assert get_pseudo_id(ve.organization) == odict
Ejemplo n.º 13
0
def test_org_and_chamber_conflict():
    with pytest.raises(ValueError):
        VoteEvent(
            legislative_session="2009",
            motion_text="passage of the bill",
            start_date="2009-01-07",
            result="pass",
            classification="passage",
            organization="test",
            chamber="lower",
        )
Ejemplo n.º 14
0
def test_vote_event_bill_id_dedupe():
    create_jurisdiction()
    bill = Bill.objects.create(
        id="bill-1",
        identifier="HB 1",
        legislative_session=LegislativeSession.objects.get(),
        from_organization=Organization.objects.get(classification="lower"),
    )
    bill2 = Bill.objects.create(
        id="bill-2",
        identifier="HB 2",
        legislative_session=LegislativeSession.objects.get(),
        from_organization=Organization.objects.get(classification="lower"),
    )

    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        start_date="2013",
        classification="anything",
        result="passed",
        motion_text="a vote on something",
        bill=bill.identifier,
        bill_chamber="lower",
        chamber="lower",
    )
    bi = BillImporter("jid")

    _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict())
    assert what == "insert"
    assert VoteEvent.objects.count() == 1

    # same exact vote event, no changes
    _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict())
    assert what == "noop"
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote_event.result = "failed"
    _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict())
    assert what == "update"
    assert VoteEvent.objects.count() == 1

    # new vote event, insert
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        start_date="2013",
        classification="anything",
        result="passed",
        motion_text="a vote on something",
        bill=bill2.identifier,
        bill_chamber="lower",
        chamber="lower",
    )
    _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict())
    assert what == "insert"
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 15
0
def test_vote_event_bill_clearing():
    # ensure that we don't wind up with vote events sitting around forever on bills as
    # changes make it look like there are multiple vote events
    j = create_jurisdiction()
    session = j.legislative_sessions.create(name="1900", identifier="1900")
    org = Organization.objects.create(id="org-id",
                                      name="House",
                                      classification="lower",
                                      jurisdiction=j)
    bill = Bill.objects.create(
        id="bill-1",
        identifier="HB 1",
        legislative_session=session,
        from_organization=org,
    )
    Bill.objects.create(
        id="bill-2",
        identifier="HB 2",
        legislative_session=session,
        from_organization=org,
    )
    oi = OrganizationImporter("jid")
    dmi = DumbMockImporter()
    bi = BillImporter("jid", dmi, oi)

    vote_event1 = ScrapeVoteEvent(
        legislative_session="1900",
        start_date="2013",
        classification="anything",
        result="passed",
        motion_text="a vote on somthing",  # typo intentional
        bill=bill.identifier,
        bill_chamber="lower",
        chamber="lower",
    )
    vote_event2 = ScrapeVoteEvent(
        legislative_session="1900",
        start_date="2013",
        classification="anything",
        result="passed",
        motion_text="a vote on something else",
        bill=bill.identifier,
        bill_chamber="lower",
        chamber="lower",
    )

    # have to use import_data so postimport is called
    VoteEventImporter("jid", dmi, oi, bi).import_data(
        [vote_event1.as_dict(), vote_event2.as_dict()])
    assert VoteEvent.objects.count() == 2

    # a typo is fixed, we don't want 3 vote events now
    vote_event1.motion_text = "a vote on something"
    VoteEventImporter("jid", dmi, oi, bi).import_data(
        [vote_event1.as_dict(), vote_event2.as_dict()])
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 16
0
    def parse_vote(
        self, bill, journal_entry_number, action, act_chamber, act_date, url
    ):
        # html = self.get(url).text
        # doc = lxml.html.fromstring(html)
        yes = no = other = 0
        result = ""
        vote_counts = action.split()
        for vote_count in vote_counts:
            if re.match(r"[\D][\d]", vote_count):
                if "Y" in vote_count:
                    yes = int(vote_count[1:])
                elif "N" in vote_count:
                    no = int(vote_count[1:])
                elif "E" in vote_count or "A" in vote_count:
                    other += int(vote_count[1:])

        if "PASSED" in action:
            result = "pass"
        elif "FAILED" in action:
            result = "fail"
        else:
            result = "pass" if yes > no else "fail"

        vote = VoteEvent(
            bill=bill,
            start_date=act_date.strftime("%Y-%m-%d"),
            chamber=act_chamber,
            motion_text=action + " #" + journal_entry_number,
            result=result,
            classification="passage",
        )

        vote.set_count("yes", yes)
        vote.set_count("no", no)
        vote.set_count("other", other)
        vote.add_source(url)

        yield vote
Ejemplo n.º 17
0
def viva_voce_votes(root, session, chamber):
    for el in root.xpath(u'//div[starts-with(., "All Members are deemed")]'):
        mv = MaybeViva(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else None,
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", 0)
        v.set_count("no", 0)
        v.set_count("absent", 0)
        v.set_count("not voting", 0)

        yield v
Ejemplo n.º 18
0
def test_vote_event_bill_actions_two_stage():
    # this test is very similar to what we're testing in test_vote_event_bill_actions w/
    # ve3 and ve4, that two bills that reference the same action won't conflict w/ the
    # OneToOneField, but in this case we do it in two stages so that the conflict is found
    # even if the votes weren't in the same scrape
    create_jurisdiction()
    bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", chamber="lower")

    bill.add_action(description="passage", date="1900-04-02", chamber="lower")

    ve1 = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-02",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        bill_action="passage",
        chamber="lower",
    )
    ve2 = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-02",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        bill_action="passage",
        chamber="lower",
    )
    # disambiguate them
    ve1.pupa_id = "one"
    ve2.pupa_id = "two"

    bi = BillImporter("jid")
    bi.import_data([bill.as_dict()])

    # first imports just fine
    VoteEventImporter("jid", bi).import_data([ve1.as_dict()])
    votes = list(VoteEvent.objects.all())
    assert len(votes) == 1
    assert votes[0].bill_action is not None

    # when second is imported, ensure that action stays pinned to first just as it would
    # have if they were both in same import
    VoteEventImporter("jid", bi).import_data([ve1.as_dict(), ve2.as_dict()])
    votes = list(VoteEvent.objects.all())
    assert len(votes) == 2
    assert votes[0].bill_action is not None
    assert votes[1].bill_action is None
def test_fix_bill_id():
    create_jurisdiction()
    bill = ScrapeBill("HB 1",
                      "1900",
                      "Test Bill ID",
                      classification="bill",
                      chamber="lower")

    from openstates.settings import IMPORT_TRANSFORMERS

    IMPORT_TRANSFORMERS["bill"] = {
        "identifier": fix_bill_id,
    }

    bi = BillImporter("jid")
    bi.import_data([bill.as_dict()])

    ve = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-02",
        classification="passage:bill",
        result="fail",
        bill_chamber="lower",
        bill="HB1",
        identifier="4",
        bill_action="passage",
        chamber="lower",
    )

    VoteEventImporter("jid", bi).import_data([ve.as_dict()])

    IMPORT_TRANSFORMERS["bill"] = {}

    ve = VoteEvent.objects.get()
    ve.bill.identifier == "HB 1"
Ejemplo n.º 20
0
def test_full_vote_event():
    create_jurisdiction()
    bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", chamber="lower")
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-01",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        chamber="lower",
    )
    vote_event.set_count("yes", 20)
    vote_event.yes("John Smith")
    vote_event.no("Adam Smith")

    Person.objects.create(name="John Smith")
    Person.objects.create(name="Adam Smith")
    for person in Person.objects.all():
        person.memberships.create(organization=Organization.objects.get(
            classification="lower"))

    bi = BillImporter("jid")
    bi.import_data([bill.as_dict()])

    VoteEventImporter("jid", bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ["passage:bill"]
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == "yes"
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == "John Smith":
            assert v.option == "yes"
            assert v.voter == Person.objects.get(name="John Smith")
        else:
            assert v.option == "no"
            assert v.voter == Person.objects.get(name="Adam Smith")
Ejemplo n.º 21
0
    def add_archived_votes(self, bill, bill_id):
        bill_id = bill_id.split()
        bill_id[0] = bill_id[0][0]
        if len(bill_id[-1]) == 2:
            bill_id[-1] = "00" + bill_id[-1]
        if len(bill_id[-1]) == 3:
            bill_id[-1] = "0" + bill_id[-1]
        bill_id = "".join(bill_id)

        if bill_id in self.archived_votes:

            for vote_key, legislator_votes in self.archived_votes[
                    bill_id].items():
                (
                    vote_date,
                    r_number,
                    action_number,
                    action_vote_result,
                    archive_url,
                    cod,
                    _,
                ) = vote_key

                if archive_url[-1] == "S":
                    chamber = "upper"
                else:
                    chamber = "lower"

                vote_date = eastern.localize(vote_date)
                vote_date = vote_date.isoformat()

                motion_text = (action_number + r_number + cod +
                               action_vote_result).replace(" ", "_")

                ve = VoteEvent(
                    chamber=chamber,  # TODO: check this
                    start_date=vote_date,
                    motion_text=motion_text,
                    bill=bill,
                    classification=
                    "other",  # No indication on classification for archived votes
                    result=action_vote_result,
                )
                ve.add_source(archive_url)

                for lv in legislator_votes:
                    ve.vote(lv["how_voted"], lv["leg"])

                yield ve
Ejemplo n.º 22
0
    def process_committee_vote(self, committee_action, bill):
        try:
            date = committee_action["ActionDate"]
            vote_info = committee_action["Vote"]

        except KeyError:
            self.logger.warning("Committee vote has no data. Skipping.")
            return
        date = self.date_format(date)

        other_count = 0
        for v in vote_info:
            vote_count = 0 if v["VoteCount"] == "" else int(v["VoteCount"])

            if v["VoteType"] == "Yes":
                yes_count = vote_count
            elif v["VoteType"] == "No":
                no_count = vote_count
            else:
                other_count += vote_count

        result = "fail"
        if yes_count > no_count:
            result = "pass"

        v = VoteEvent(
            chamber="legislature",
            start_date=date,
            motion_text="Committee Vote",
            result=result,
            classification="committee",
            bill=bill,
        )
        v.set_count("yes", yes_count)
        v.set_count("no", no_count)
        v.set_count("other", other_count)

        return v
Ejemplo n.º 23
0
    def scrape_votes(self, vote_url, bill, chamber):

        try:
            filename, response = self.urlretrieve(vote_url)
        except scrapelib.HTTPError:
            self.logger.warning("PDF not posted or available")
            return
        # Grabs text from pdf
        pdflines = [
            line.decode("utf-8")
            for line in convert_pdf(filename, "text").splitlines()
        ]
        os.remove(filename)

        vote_date = 0
        voters = defaultdict(list)
        for x in range(len(pdflines)):
            line = pdflines[x]
            if re.search(r"(\d+/\d+/\d+)", line):
                initial_date = line.strip()
            if ("AM" in line) or ("PM" in line):
                split_l = line.split()
                for y in split_l:
                    if ":" in y:
                        time_location = split_l.index(y)
                        motion = " ".join(split_l[0:time_location])
                        time = split_l[time_location:]
                        if len(time) > 0:
                            time = "".join(time)
                        dt = initial_date + " " + time
                        dt = datetime.strptime(dt, "%m/%d/%Y %I:%M:%S%p")
                        vote_date = central.localize(dt)
                        vote_date = vote_date.isoformat()
                        # In rare case that no motion is provided
                        if len(motion) < 1:
                            motion = "No Motion Provided"
            if "YEAS:" in line:
                yeas = int(line.split()[-1])
            if "NAYS:" in line:
                nays = int(line.split()[-1])
            if "ABSTAINED:" in line:
                abstained = int(line.split()[-1])
            if "PASSES:" in line:
                abstained = int(line.split()[-1])
            if "NOT VOTING:" in line:
                not_voting = int(line.split()[-1])

            if "YEAS :" in line:
                y = 0
                next_line = pdflines[x + y]
                while "NAYS : " not in next_line:
                    next_line = next_line.split("  ")
                    if next_line and ("YEAS" not in next_line):
                        for v in next_line:
                            if v and "YEAS" not in v:
                                voters["yes"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1
            if line and "NAYS :" in line:
                y = 0
                next_line = 0
                next_line = pdflines[x + y]
                while ("ABSTAINED : " not in next_line) and ("PASSES :"
                                                             not in next_line):
                    next_line = next_line.split("  ")
                    if next_line and "NAYS" not in next_line:
                        for v in next_line:
                            if v and "NAYS" not in v:
                                voters["no"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1

            if line and ("ABSTAINED :" in line or "PASSES :" in line):
                y = 2
                next_line = 0
                next_line = pdflines[x + y]
                while "NOT VOTING :" not in next_line:
                    next_line = next_line.split("  ")
                    if next_line and ("ABSTAINED" not in next_line
                                      or "PASSES" not in next_line):
                        for v in next_line:
                            if v:
                                voters["abstain"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1

            if line and "NOT VOTING : " in line:
                lines_to_go_through = math.ceil(not_voting / len(line.split()))
                next_line = pdflines[x]
                for y in range(lines_to_go_through):
                    if len(pdflines) > (x + y + 2):
                        next_line = pdflines[x + y + 2].split("  ")
                        for v in next_line:
                            if v:
                                voters["not voting"].append(v.strip())
                if yeas > (nays + abstained + not_voting):
                    passed = True
                else:
                    passed = False

                ve = VoteEvent(
                    chamber=chamber,
                    start_date=vote_date,
                    motion_text=motion,
                    result="pass" if passed else "fail",
                    bill=bill,
                    classification="passage",
                )
                ve.add_source(vote_url)
                for how_voted, how_voted_voters in voters.items():
                    for voter in how_voted_voters:
                        if len(voter) > 0:
                            ve.vote(how_voted, voter)
                # Resets voters dictionary before going onto next page in pdf
                voters = defaultdict(list)
                yield ve
Ejemplo n.º 24
0
    def parse_vote_pdf(self, vote_url, bill):

        filename, response = self.urlretrieve(vote_url)

        text = convert_pdf(filename, type="text").decode()
        lines = text.splitlines()

        if "Senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        date_string = lines[0].split("Calendar Date:")[1].strip()
        date = datetime.datetime.strptime(date_string, "%b %d, %Y %I:%M (%p)")

        page_index = None
        for index, line in enumerate(lines):
            if "Yeas" in line and "Nays" in line:
                page_index = index
                break

        vote_counts = 5 * [0]
        vote_types = ["yes", "no", "not voting", "excused", "absent"]

        if page_index:

            counts = re.split(r"\s{2,}", lines[page_index].strip())

            for index, count in enumerate(counts):
                number, string = count.split(" ", 1)
                number = int(number)
                vote_counts[index] = number
        else:
            raise ValueError("Vote Counts Not found at %s" % vote_url)

        passed = vote_counts[0] > vote_counts[1]

        # Consent calendar votes address multiple bills in one VoteEvent
        # eg, http://mgaleg.maryland.gov/2018RS/votes/Senate/0478.pdf
        is_consent_calendar = any(
            ["Consent Calendar" in line for line in lines[:page_index]]
        )
        consent_calendar_bills = None
        motion = ""
        if is_consent_calendar:
            motion = re.split(r"\s{2,}", lines[page_index - 4].strip())[0]
            consent_calendar_bills = re.split(r"\s{2,}", lines[page_index - 1].strip())
            assert (
                consent_calendar_bills
            ), "Could not find bills for consent calendar vote"

        motion_keywords = [
            "favorable",
            "reading",
            "amendment",
            "motion",
            "introduced",
            "bill pass",
            "committee",
        ]
        motion_lines = [
            3,
            2,
            4,
            5,
        ]  # Relative LineNumbers to be checked for existence of motion

        for i in motion_lines:
            if any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                break
            motion = re.split(r"\s{2,}", lines[page_index - i].strip())[0]
        else:
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # This condition covers for the bad formating in SB 1260
                motion = lines[page_index - 3]
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # Check this one for SB 747
                motion = "No motion given"
                self.warning("No motion given")

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )

        # Include bill ID to avoid duplication for consent calendars
        vote.pupa_id = "{}#{}".format(vote_url, bill.identifier)

        for index, vote_type in enumerate(vote_types):
            vote.set_count(vote_type, vote_counts[index])
        page_index = page_index + 2

        # Keywords for identifying where names are located in the pdf
        show_stoppers = [
            "Voting Nay",
            "Not Voting",
            "COPY",
            "Excused",
            "indicates vote change",
            "Indicates Vote Change",
        ]
        vote_index = 0

        # For matching number of names extracted with vote counts(extracted independently)
        vote_name_counts = 5 * [0]

        while page_index < len(lines):

            current_line = lines[page_index].strip()

            if not current_line or "Voting Yea" in current_line:
                page_index += 1
                continue

            if any(show_stopper in current_line for show_stopper in show_stoppers):
                page_index += 1
                vote_index = vote_index + 1
                continue

            names = re.split(r"\s{2,}", current_line)

            vote_name_counts[vote_index] += len(names)

            for name in names:
                vote.vote(vote_types[vote_index], name)
            page_index += 1

        if vote_counts != vote_name_counts:
            raise ValueError("Votes Count and Number of Names don't match")

        return vote
Ejemplo n.º 25
0
    def scrape_vote(self, bill, date, url):
        page = self.get(url).json()

        location = page["actionLog"]["FullName"]
        if location:
            if "House" in location:
                chamber = "lower"
            elif "Senate" in location:
                chamber = "upper"
            elif "Joint" in location:
                chamber = "legislature"
            else:
                self.warning("Bad Vote chamber: '%s', skipping" % location)
                return
        else:
            self.warning("Bad Vote chamber: '%s', skipping" % location)
            return

        motion = page["actionLog"]["StatusText"]
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = page["Yeas"]
            no_count = page["Nays"]
            excused_count = page["Excused"]
            absent_count = page["Absent"]

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                vtype = "passage"
            elif motion == "Concurred in amendments":
                vtype = "amendment"
            # commenting out until we add these back to OS-core
            # elif motion == "Veto override":
            #     vtype = "veto-override"
            else:
                vtype = []

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if passed else "fail",
                classification=vtype,
                bill=bill,
            )
            # differentiate nearly identical votes
            vote.dedupe_key = url

            vote.add_source(url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("excused", excused_count)
            vote.set_count("absent", absent_count)

            for person in page["RollCalls"]:
                option = person["Vote1"]
                if option in ("Aye", "Yea"):
                    vote.yes(person["UniqueName"])
                elif option == "Nay":
                    vote.no(person["UniqueName"])
                elif option == "Excused":
                    vote.vote("excused", person["UniqueName"])
                elif option == "Absent":
                    vote.vote("absent", person["UniqueName"])

            yield vote
Ejemplo n.º 26
0
    def scrape_votes(self, bill, page):
        base_url = "https://apps.azleg.gov/api/BillStatusFloorAction"
        for header in page["FloorHeaders"]:
            params = {
                "billStatusId": page["BillId"],
                "billStatusActionId": header["BillStatusActionId"],
                "includeVotes": "true",
            }
            resp = self.get(base_url, params=params)
            actions = json.loads(resp.content.decode("utf-8"))

            for action in actions:
                if action["Action"] == "No Action":
                    continue
                action_date = datetime.datetime.strptime(
                    action["ReportDate"], "%Y-%m-%dT%H:%M:%S")
                vote = VoteEvent(
                    chamber={
                        "S": "upper",
                        "H": "lower"
                    }[header["LegislativeBody"]],
                    motion_text=action["Action"],
                    classification="passage",
                    result=("pass" if action["UnanimouslyAdopted"]
                            or action["Ayes"] > action["Nays"] else "fail"),
                    start_date=action_date.strftime("%Y-%m-%d"),
                    bill=bill,
                )
                vote.add_source(resp.url)
                vote.set_count("yes", action["Ayes"] or 0)
                vote.set_count("no", action["Nays"] or 0)
                vote.set_count("other", (action["Present"] or 0))
                vote.set_count("absent", (action["Absent"] or 0))
                vote.set_count("excused", (action["Excused"] or 0))
                vote.set_count("not voting", (action["NotVoting"] or 0))

                for v in action["Votes"]:
                    vote_type = {"Y": "yes", "N": "no"}.get(v["Vote"], "other")
                    vote.vote(vote_type, v["Legislator"]["FullName"])
                vote.pupa_id = resp.url + str(action["ReferralNumber"])
                yield vote
Ejemplo n.º 27
0
    def scrape_vote(self, url, session):
        fname, _ = self.urlretrieve(url)
        text = convert_pdf(fname, type="text").decode()
        lines = text.splitlines()

        chamber = "upper" if "senate" in url else "lower"
        if "Maryland" not in text:
            self.warning(f"empty vote from {url}")
            return
        date = re.findall(r"Legislative Date: (\w+ \d+, \d{4})", text)[0]

        section = "preamble"
        motion = None
        bill_id = None
        how = None
        voters = defaultdict(list)

        for line in lines:
            if section == "preamble":
                if "vetoed" in line.lower():
                    self.warning(
                        f"skipping vote that appears to be on prior session: {line}, {bill_id}"
                    )
                    return
                possible_bill_id = re.findall(r"([HS][BJR] \d+)", line)
                if possible_bill_id:
                    bill_id = possible_bill_id[0]

                # preamble has metadata, then motion, then counts.  our process then is to
                # store the last line as the motion, but if the last line looks like a
                # continuation, append it to the prior line

                line = line.strip()
                counts = re.findall(
                    r"(\d+) Yeas\s+(\d+) Nays\s+(\d+) Not Voting\s+(\d+) Excused\s+(\d+) Absent",
                    line,
                )
                if counts:
                    yes_count, no_count, nv_count, excused_count, absent_count = counts[
                        0]
                    yes_count = int(yes_count)
                    no_count = int(no_count)
                    nv_count = int(nv_count)
                    excused_count = int(excused_count)
                    absent_count = int(absent_count)
                    section = "votes"
                elif line and line != "(Const)":
                    # questions seem to be split across two lines
                    if line.endswith("?"):
                        motion = motion + " " + line
                    else:
                        motion = line
            elif section == "votes":
                if line.startswith("Voting Yea"):
                    how = "yes"
                elif line.startswith("Voting Nay"):
                    how = "no"
                elif line.startswith("Not Voting"):
                    how = "not voting"
                elif line.startswith("Excused from Voting"):
                    how = "excused"
                elif line.startswith("Excused (Absent)"):
                    how = "absent"
                elif how:
                    names = re.split(r"\s{2,}", line)
                    voters[how].extend(names)

        if not bill_id and not motion:
            return
        elif bill_id and not motion:
            self.warning(
                f"got {bill_id} but no motion, not registering as a vote")
        elif motion and not bill_id:
            self.warning(
                f"got {motion} but no bill_id, not registering as a vote")
            return

        # bleh - result not indicated anywhere
        result = "pass" if yes_count > no_count else "fail"
        bill_chamber = "upper" if bill_id.startswith("S") else "lower"
        date = datetime.datetime.strptime(date,
                                          "%b %d, %Y").strftime("%Y-%m-%d")
        vote = VoteEvent(
            chamber=chamber,
            start_date=date,
            result=result,
            classification="passage",
            motion_text=motion,
            legislative_session=session,
            bill=bill_id,
            bill_chamber=bill_chamber,
        )
        # URL includes sequence ID, will be unique
        vote.dedupe_key = url
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("not voting", nv_count)
        vote.set_count("excused", excused_count)
        vote.set_count("absent", absent_count)
        for how, names in voters.items():
            for name in names:
                name = name.strip().replace("*", "")
                if name and "COPY" not in name and "Indicates Vote Change" not in name:
                    vote.vote(how, name)
        check_counts(vote, raise_error=True)
        return vote
Ejemplo n.º 28
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session,
                                 url, bill_chamber):

        # vote types that have been reconsidered since last vote of that type
        reconsiderations = set()

        for action in action_table.xpath("*")[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d")
            actor_code = action[1].text_content().upper()
            string = action[2].text_content()
            actor = self._vote_type_map[actor_code]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]["name"]
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string,
                                  date,
                                  chamber=actor,
                                  classification=act_type)

            for committee in real_committees:
                act.add_related_entity(name=committee,
                                       entity_type="organization")
            vote = self.parse_vote(string)

            if vote:
                v, motion = vote
                motion_text = (("Reconsider: " + motion)
                               if actor in reconsiderations else motion)
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    bill=bill_id,
                    bill_chamber=bill_chamber,
                    legislative_session=session,
                    motion_text=motion_text,
                    result="pass" if "passed" in string.lower() else "fail",
                    classification="passage",
                )
                reconsiderations.discard(actor)
                vote.add_source(url)
                vote.set_count("yes", int(v["n_yes"] or 0))
                vote.set_count("no", int(v["n_no"] or 0))
                vote.set_count("not voting", int(v["n_excused"] or 0))
                for voter in split_specific_votes(v["yes"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["yes_resv"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["no"]):
                    voter = self.clean_voter_name(voter)
                    vote.no(voter)
                for voter in split_specific_votes(v["excused"]):
                    voter = self.clean_voter_name(voter)
                    vote.vote("not voting", voter)

                yield vote

            elif re.search("reconsider", string, re.IGNORECASE):
                reconsiderations.add(actor)
Ejemplo n.º 29
0
    def handle_page(self):
        # Checks to see if any vote totals are provided
        if (len(
                self.doc.xpath(
                    '//span[contains(@id, "ctl00_MainContent_lblTotal")]/text()'
                )) > 0):
            (date,
             ) = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
            date = format_datetime(
                datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p"),
                "US/Eastern")
            # ctl00_MainContent_lblTotal //span[contains(@id, "ctl00_MainContent_lblTotal")]
            yes_count = int(
                self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
            no_count = int(
                self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
            other_count = int(
                self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
            result = "pass" if yes_count > no_count else "fail"

            (committee,
             ) = self.doc.xpath('//span[contains(@id, "lblCommittee")]/text()')
            (action,
             ) = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
            motion = "{} ({})".format(action, committee)

            vote = VoteEvent(
                start_date=date,
                bill=self.kwargs["bill"],
                chamber="lower",
                motion_text=motion,
                result=result,
                classification="committee",
            )
            vote.add_source(self.url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", other_count)

            for member_vote in self.doc.xpath(
                    '//ul[contains(@class, "vote-list")]/li'):
                if not member_vote.text_content().strip():
                    continue

                (member, ) = member_vote.xpath("span[2]//text()")
                (member_vote, ) = member_vote.xpath("span[1]//text()")

                member = member.strip()
                if member_vote == "Y":
                    vote.yes(member)
                elif member_vote == "N":
                    vote.no(member)
                elif member_vote == "-":
                    vote.vote("not voting", member)
                # Parenthetical votes appear to not be counted in the
                # totals for Yea, Nay, _or_ Missed
                elif re.search(r"\([YN]\)", member_vote):
                    continue
                else:
                    raise ValueError(
                        "Unknown vote type found: {}".format(member_vote))

            yield vote
Ejemplo n.º 30
0
    def handle_page(self):
        (_, motion) = self.lines[5].split("FINAL ACTION:")
        motion = motion.strip()
        if not motion:
            self.scraper.warning("Vote appears to be empty")
            return

        vote_top_row = [
            self.lines.index(x) for x in self.lines
            if re.search(r"^\s+Yea\s+Nay.*?(?:\s+Yea\s+Nay)+$", x)
        ][0]
        yea_columns_end = self.lines[vote_top_row].index("Yea") + len("Yea")
        nay_columns_begin = self.lines[vote_top_row].index("Nay")

        votes = {"yes": [], "no": [], "other": []}
        for line in self.lines[(vote_top_row + 1):]:
            if line.strip():
                member = re.search(
                    r"""(?x)
                        ^\s+(?:[A-Z\-]+)?\s+    # Possible vote indicator
                        ([A-Z][a-z]+            # Name must have lower-case characters
                        [\w\-\s]+)              # Continue looking for the rest of the name
                        (?:,[A-Z\s]+?)?         # Leadership has an all-caps title
                        (?:\s{2,}.*)?           # Name ends when many spaces are seen
                        """,
                    line,
                ).group(1)
                # sometimes members have trailing X's from other motions in the
                # vote sheet we aren't collecting
                member = re.sub(r"(\s+X)+", "", member)
                # Usually non-voting members won't even have a code listed
                # Only a couple of codes indicate an actual vote:
                # "VA" (vote after roll call) and "VC" (vote change)
                did_vote = bool(re.search(r"^\s+(X|VA|VC)\s+[A-Z][a-z]", line))
                if did_vote:
                    # Check where the "X" or vote code is on the page
                    vote_column = len(line) - len(line.lstrip())
                    if vote_column <= yea_columns_end:
                        votes["yes"].append(member)
                    elif vote_column >= nay_columns_begin:
                        votes["no"].append(member)
                    else:
                        raise ValueError(
                            "Unparseable vote found for {0} in {1}:\n{2}".
                            format(member, self.url, line))
                else:
                    votes["other"].append(member)

            # End loop as soon as no more members are found
            else:
                break

        totals = re.search(r"(?msu)\s+(\d{1,3})\s+(\d{1,3})\s+.*?TOTALS",
                           self.text).groups()
        yes_count = int(totals[0])
        no_count = int(totals[1])
        result = "pass" if (yes_count > no_count) else "fail"

        vote = VoteEvent(
            start_date=self.kwargs["date"],
            bill=self.kwargs["bill"],
            chamber="upper",
            motion_text=motion,
            classification="committee",
            result=result,
        )
        vote.add_source(self.url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", len(votes["other"]))

        # set voters
        for vtype, voters in votes.items():
            for voter in voters:
                voter = voter.strip()
                # Removes the few voter names with a ton of extra spaces with  VA at the end.
                # Ex: Cruz                                                               VA
                if "  VA" in voter:
                    voter = " ".join(voter.split()[:-2])
                if len(voter) > 0:
                    vote.vote(vtype, voter)

        yield vote