Ejemplo n.º 1
0
def toy_vote():
    v = Vote(legislative_session="2009",
             motion_text="passage of the bill",
             start_date="2009-01-07",
             result='pass',
             classification='bill-passage')
    v.add_source("http://uri.example.com/", note="foo")
    return v
Ejemplo n.º 2
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = Vote(
            chamber='upper',
            start_date=date.strftime("%Y-%m-%d"),
            motion_text='Passage',
            # setting 'fail' for now.
            result='fail',
            classification='passage',
            bill=bill
        )
        vote.add_source(url)

        text = convert_pdf(filename, 'text').decode('utf-8')
        os.remove(filename)

        if re.search('Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+', text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url, date)
            return

        data = re.split(r'(Yea|Nay|Absent)s?:', text)[::-1]
        data = filter(None, data)
        keymap = dict(yea='yes', nay='no')
        actual_vote = collections.defaultdict(int)
        vote_count = {
            'yes': 0,
            'no': 0,
            'other': 0
        }
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), 'other')
            values = data.pop()
            for name in re.split(r'(?:[\s,]+and\s|[\s,]{2,})', values):
                if name.lower().strip() == 'none.':
                    continue
                name = name.replace('..', '')
                name = re.sub(r'\.$', '', name)
                name = name.strip('-1234567890 \n')
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = 'pass' if vote_count['yes'] > (vote_count['no'] +
                                                     vote_count['other']) else 'fail'

        yield vote
Ejemplo n.º 3
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url, uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == 'Votes:':
            # New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")[:3]

        def proc_block(obj, typ):
            if obj is None:
                return {
                    "type": None,
                    "count": None,
                    "votes": []
                }
            votes = []
            for vote in obj.xpath(".//br"):
                if vote.tail:
                    vote = vote.tail.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {
                "type": typ,
                "count": count,
                "votes": votes
            }

        vote_dict = {
            "yes": proc_block(yes, 'yes'),
            "no": proc_block(no, 'no'),
            "other": proc_block(other, 'other'),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0
        vote = Vote(chamber=actor,
                    start_date=date,
                    motion_text=motion,
                    identifier=str(uniqid),
                    result='pass' if (yes_count > no_count) else 'fail',
                    classification='passage',
                    bill=bill)
        vote.extras = {'_vote_id': uniqid}
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                vote.vote(key, voter)

        yield vote
Ejemplo n.º 4
0
def test_vote_org_chamber():
    v = Vote(legislative_session="2009",
             motion_text="passage of the bill",
             start_date="2009-01-07",
             result='pass',
             classification='bill-passage',
             chamber='upper')
    assert get_pseudo_id(v.organization) == {'classification': 'upper'}
Ejemplo n.º 5
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId'
        form = {
            'rollCallId': vote_id,
            'sort': '',
            'group': '',
            'filter': '',
        }

        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page['Model']
            vote_chamber = self.chamber_map[roll['ChamberName']]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(roll['TakenAtDateTime'],
                                             '%m/%d/%y %I:%M %p').strftime('%Y-%m-%d')

            # TODO: What does this code mean?
            vote_motion = roll['RollCallVoteType']

            vote_passed = 'pass' if roll['RollCallStatus'] == 'Passed' else 'fail'
            other_count = (int(roll['NotVotingCount']) +
                           int(roll['VacantVoteCount']) +
                           int(roll['AbsentVoteCount']) +
                           int(roll['ConflictVoteCount'])
                           )
            vote = Vote(chamber=vote_chamber,
                        start_date=vote_date,
                        motion_text=vote_motion,
                        result=vote_passed,
                        classification='other',
                        bill=bill.identifier,
                        legislative_session=session
                        )
            vote.add_source(vote_url)
            vote.set_count('yes', roll['YesVoteCount'])
            vote.set_count('no', roll['NoVoteCount'])
            vote.set_count('other', other_count)

            for row in roll['AssemblyMemberVotes']:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row['ShortName'])]
                    name = voter['DisplayName']
                except KeyError:
                    self.warning('could not find legislator short name %s',
                                 row['ShortName'])
                    name = row['ShortName']
                if row['SelectVoteTypeCode'] == 'Y':
                    vote.yes(name)
                elif row['SelectVoteTypeCode'] == 'N':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            # bill.add_vote_event(vote)
            yield vote
Ejemplo n.º 6
0
def test_vote_bill_clearing():
    # ensure that we don't wind up with votes sitting around forever on bills as changes
    # make it look like there are multiple votes
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id', name='House', classification='lower')
    bill = Bill.objects.create(id='bill-1', identifier='HB 1', legislative_session=session,
                               from_organization=org)
    Bill.objects.create(id='bill-2', identifier='HB 2', legislative_session=session,
                        from_organization=org)
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    vote1 = ScrapeVote(legislative_session='1900', start_date='2013',
                       classification='anything', result='passed',
                       motion_text='a vote on somthing',             # typo intentional
                       bill=bill.identifier, bill_chamber='lower')
    vote2 = ScrapeVote(legislative_session='1900', start_date='2013',
                       classification='anything', result='passed',
                       motion_text='a vote on something else',
                       bill=bill.identifier, bill_chamber='lower')

    # have to use import_data so postimport is called
    VoteImporter('jid', dmi, dmi, bi).import_data([vote1.as_dict(), vote2.as_dict()])
    assert VoteEvent.objects.count() == 2

    # a typo is fixed, we don't want 3 votes now
    vote1.motion_text = 'a vote on something'
    VoteImporter('jid', dmi, dmi, bi).import_data([vote1.as_dict(), vote2.as_dict()])
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 7
0
def test_vote_identifier_dedupe():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    j.legislative_sessions.create(name='1900', identifier='1900')

    vote = ScrapeVote(legislative_session='1900', start_date='2013',
                      classification='anything', result='passed',
                      motion_text='a vote on something',
                      identifier='Roll Call No. 1')
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 1

    # same exact vote, no changes
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'noop'
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote.result = 'failed'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'update'
    assert VoteEvent.objects.count() == 1

    # new bill, insert
    vote.identifier = 'Roll Call 2'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 8
0
def test_vote_identifier_dedupe():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')

    vote = ScrapeVote(
        legislative_session='1900',
        start_date='2013',
        classification='anything',
        result='passed',
        motion_text='a vote on something',
        identifier='Roll Call No. 1',
    )
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 1

    # same exact vote, no changes
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'noop'
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote.result = 'failed'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'update'
    assert VoteEvent.objects.count() == 1

    # new bill, insert
    vote.identifier = 'Roll Call 2'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 9
0
def test_vote_org_obj():
    o = Organization('something', classification='committee')
    v = Vote(legislative_session="2009",
             motion_text="passage of the bill",
             start_date="2009-01-07",
             result='pass',
             classification='bill-passage',
             organization=o)
    assert v.organization == o._id
Ejemplo n.º 10
0
def test_org_and_chamber_conflict():
    with pytest.raises(ValueError):
        Vote(legislative_session="2009",
             motion_text="passage of the bill",
             start_date="2009-01-07",
             result='pass',
             classification='passage',
             organization='test',
             chamber='lower')
Ejemplo n.º 11
0
def test_vote_org_dict():
    odict = {'name': 'Random Committee', 'classification': 'committee'}
    v = Vote(legislative_session="2009",
             motion_text="passage of the bill",
             start_date="2009-01-07",
             result='pass',
             classification='bill-passage',
             organization=odict)
    assert get_pseudo_id(v.organization) == odict
Ejemplo n.º 12
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url,
                              uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == "Votes:":
            # New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, _, no, _, other = rows.xpath(".//td")[:5]

        def proc_block(obj, typ):
            if obj is None:
                return {"type": None, "count": None, "votes": []}
            votes = []
            for vote in obj.xpath("./text()"):
                if vote.strip():
                    vote = vote.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {"type": typ, "count": count, "votes": votes}

        vote_dict = {
            "yes": proc_block(yes, "yes"),
            "no": proc_block(no, "no"),
            "other": proc_block(other, "other"),
        }

        yes_count = vote_dict["yes"]["count"]
        no_count = vote_dict["no"]["count"] or 0
        other_count = vote_dict["other"]["count"] or 0
        vote = Vote(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            identifier=str(uniqid),
            result="pass" if (yes_count > no_count) else "fail",
            classification="passage",
            bill=bill,
        )
        vote.extras = {"_vote_id": uniqid}
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)
        for key in vote_dict:
            for voter in vote_dict[key]["votes"]:
                vote.vote(key, voter)

        yield vote
Ejemplo n.º 13
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r"YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)"
            r"(.*)ABSENT( OR NOT VOTING)? -?\s?"
            r"(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
        else:
            vote_chamber = ""

        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=motion,
            result="pass" if passed else "fail",
            identifier=str(uniqid),
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        yes_votes = re.split(r"\s{2,}", match.group(2).strip())
        no_votes = re.split(r"\s{2,}", match.group(4).strip())
        other_votes = re.split(r"\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote("other", other)

        yield vote
Ejemplo n.º 14
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = (
            "http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
            "GetRollCalls?billNumber=%s&biennium=%s" % (bill_num, self.biennium)
        )
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {"House": "lower", "Senate": "upper"}[agency]

            vote = Vote(
                chamber=chamber,
                start_date=date,
                motion_text="{} (#{})".format(motion, seq_no),
                result="pass" if yes_count > (no_count + other_count) else "fail",
                classification="other",
                bill=bill,
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("other", other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == "Yea":
                    vote.yes(name)
                elif vtype == "Nay":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
Ejemplo n.º 15
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
            r'(.*)ABSENT( OR NOT VOTING)? -?\s?'
            r'(\d+)(.*)', re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
        else:
            vote_chamber = ''

        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    identifier=str(uniqid),
                    classification='passage',
                    bill=bill)
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        yes_votes = re.split(r'\s{2,}', match.group(2).strip())
        no_votes = re.split(r'\s{2,}', match.group(4).strip())
        other_votes = re.split(r'\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote('other', other)

        yield vote
Ejemplo n.º 16
0
    def addBillHistory(self, bill, history_table) :
        all_votes = []
        
        history = self.parseDataTable(history_table)

        for action, _, _ in history :
            action_description = action['Action']
            try :
                action_date =  action['Date'].date().isoformat()
            except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492
                continue

            if action_description :
                bill.add_action(action_description,
                                action_date,
                                organization=action['Action\xa0By'],
                                classification=ACTION_CLASSIFICATION[action_description])
                if 'url' in action['Action\xa0Details'] :
                    action_detail_url = action['Action\xa0Details']['url']
                    result, votes = self.extractVotes(action_detail_url)

                    if votes and result : # see https://github.com/datamade/municipal-scrapers-us/issues/15
                        action_vote = Vote(legislative_session=bill.legislative_session, 
                                           motion_text=action_description,
                                           classification=None,
                                           start_date=action_date,
                                           result=result,
                                           bill=bill.identifier)
                        action_vote.add_source(action_detail_url)
                        for option, voter in votes :
                            action_vote.vote(option, voter)
                        
                        all_votes.append(action_vote)


        return all_votes
Ejemplo n.º 17
0
def test_full_vote():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    person = Person.objects.create(id='person-id', name='Adam Smith')
    org = Organization.objects.create(id='org-id',
                                      name='House',
                                      classification='lower')
    bill = Bill.objects.create(id='bill-id',
                               identifier='HB 1',
                               legislative_session=session,
                               from_organization=org)
    com = Organization.objects.create(id='com-id',
                                      name='Arbitrary Committee',
                                      parent=org)

    vote = ScrapeVote(legislative_session='1900',
                      motion_text='passage',
                      start_date='1900-04-01',
                      classification='passage:bill',
                      result='pass',
                      bill_chamber='lower',
                      bill=bill.identifier)
    vote.set_count('yes', 20)
    vote.yes('John Smith')
    vote.no('Adam Smith')

    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    VoteImporter('jid', dmi, dmi, bi).import_data([vote.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session_id == session.id
    assert ve.motion_classification == ['passage:bill']
    assert ve.bill_id == bill.id
    count = ve.counts.get()
    assert count.option == 'yes'
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == 'John Smith':
            assert v.option == 'yes'
        else:
            assert v.option == 'no'
Ejemplo n.º 18
0
def test_vote_bill_id_dedupe():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id',
                                      name='House',
                                      classification='lower')
    bill = Bill.objects.create(id='bill-1',
                               identifier='HB 1',
                               legislative_session=session,
                               from_organization=org)
    bill2 = Bill.objects.create(id='bill-2',
                                identifier='HB 2',
                                legislative_session=session,
                                from_organization=org)

    vote = ScrapeVote(legislative_session='1900',
                      start_date='2013',
                      classification='anything',
                      result='passed',
                      motion_text='a vote on something',
                      bill=bill.identifier,
                      bill_chamber='lower')
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 1

    # same exact vote, no changes
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'noop'
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote.result = 'failed'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'update'
    assert VoteEvent.objects.count() == 1

    # new vote, insert
    vote = ScrapeVote(legislative_session='1900',
                      start_date='2013',
                      classification='anything',
                      result='passed',
                      motion_text='a vote on something',
                      bill=bill2.identifier,
                      bill_chamber='lower')
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 19
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = ("http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
               "GetRollCalls?billNumber=%s&biennium=%s" %
               (bill_num, self.biennium))
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {'House': 'lower', 'Senate': 'upper'}[agency]

            vote = Vote(chamber=chamber,
                        start_date=date,
                        motion_text=motion,
                        result='pass' if yes_count >
                        (no_count + other_count) else 'fail',
                        classification='other',
                        bill=bill)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == 'Yea':
                    vote.yes(name)
                elif vtype == 'Nay':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            yield vote
Ejemplo n.º 20
0
def test_vote_bill_clearing():
    # ensure that we don't wind up with votes sitting around forever on bills as changes
    # make it look like there are multiple votes
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id',
                                      name='House',
                                      classification='lower')
    bill = Bill.objects.create(id='bill-1',
                               identifier='HB 1',
                               legislative_session=session,
                               from_organization=org)
    bill2 = Bill.objects.create(id='bill-2',
                                identifier='HB 2',
                                legislative_session=session,
                                from_organization=org)
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    vote1 = ScrapeVote(
        legislative_session='1900',
        start_date='2013',
        classification='anything',
        result='passed',
        motion_text='a vote on somthing',  # typo intentional
        bill=bill.identifier,
        bill_chamber='lower')
    vote2 = ScrapeVote(legislative_session='1900',
                       start_date='2013',
                       classification='anything',
                       result='passed',
                       motion_text='a vote on something else',
                       bill=bill.identifier,
                       bill_chamber='lower')

    # have to use import_data so postimport is called
    VoteImporter('jid', dmi, dmi,
                 bi).import_data([vote1.as_dict(),
                                  vote2.as_dict()])
    assert VoteEvent.objects.count() == 2

    # a typo is fixed, we don't want 3 votes now
    vote1.motion_text = 'a vote on something'
    VoteImporter('jid', dmi, dmi,
                 bi).import_data([vote1.as_dict(),
                                  vote2.as_dict()])
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 21
0
    def parse_vote(self, chamber, bill, row, action_text, action_date, url):
        yes = int(
            row.xpath(
                './/div[label[contains(text(), "A Favor")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        no = int(
            row.xpath(
                './/div[label[contains(text(), "En Contra")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        abstain = int(
            row.xpath(
                './/div[label[contains(text(), "Abstenido")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        absent = int(
            row.xpath(
                './/div[label[contains(text(), "Ausente")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])

        vote_chamber = self.parse_vote_chamber(chamber, action_text)

        classification = "passage" if u"Votación Final" in action_text else "other"

        vote = Vote(
            chamber=vote_chamber,
            start_date=action_date,
            motion_text=action_text,
            result="pass" if (yes > no) else "fail",
            bill=bill,
            classification=classification,
        )
        vote.add_source(url)
        vote.set_count("yes", yes)
        vote.set_count("no", no)
        vote.set_count("absent", absent)
        vote.set_count("abstain", abstain)

        # we don't want to add the attached vote PDF as a version,
        # so add it as a document
        # TODO: maybe this should be set as the source?
        self.parse_version(bill, row, is_document=True)

        yield vote
Ejemplo n.º 22
0
def test_vote_bill_id_dedupe():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    org = Organization.objects.create(id='org-id', name='House', classification='lower')
    bill = Bill.objects.create(id='bill-1', identifier='HB 1', legislative_session=session,
                               from_organization=org)
    bill2 = Bill.objects.create(id='bill-2', identifier='HB 2', legislative_session=session,
                                from_organization=org)

    vote = ScrapeVote(legislative_session='1900', start_date='2013',
                      classification='anything', result='passed',
                      motion_text='a vote on something',
                      bill=bill.identifier, bill_chamber='lower'
                     )
    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 1

    # same exact vote, no changes
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'noop'
    assert VoteEvent.objects.count() == 1

    # new info, update
    vote.result = 'failed'
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'update'
    assert VoteEvent.objects.count() == 1

    # new vote, insert
    vote = ScrapeVote(legislative_session='1900', start_date='2013',
                      classification='anything', result='passed',
                      motion_text='a vote on something',
                      bill=bill2.identifier, bill_chamber='lower'
                     )
    _, what = VoteImporter('jid', dmi, dmi, bi).import_item(vote.as_dict())
    assert what == 'insert'
    assert VoteEvent.objects.count() == 2
Ejemplo n.º 23
0
def test_full_vote():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    session = j.legislative_sessions.create(name='1900', identifier='1900')
    Person.objects.create(id='person-id', name='Adam Smith')
    org = Organization.objects.create(id='org-id', name='House', classification='lower')
    bill = Bill.objects.create(id='bill-id', identifier='HB 1', legislative_session=session,
                               from_organization=org)
    Organization.objects.create(id='com-id', name='Arbitrary Committee', parent=org)

    vote = ScrapeVote(legislative_session='1900', motion_text='passage', start_date='1900-04-01',
                      classification='passage:bill', result='pass', bill_chamber='lower',
                      bill=bill.identifier)
    vote.set_count('yes', 20)
    vote.yes('John Smith')
    vote.no('Adam Smith')

    dmi = DumbMockImporter()
    bi = BillImporter('jid', dmi, dmi)

    VoteImporter('jid', dmi, dmi, bi).import_data([vote.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session_id == session.id
    assert ve.motion_classification == ['passage:bill']
    assert ve.bill_id == bill.id
    count = ve.counts.get()
    assert count.option == 'yes'
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == 'John Smith':
            assert v.option == 'yes'
        else:
            assert v.option == 'no'
Ejemplo n.º 24
0
    def scrape_vote(self, bill, name, url):
        match = re.match('^(Senate|House) Vote on [^,]*,(.*)$', name)

        if not match:
            return

        chamber = {'Senate': 'upper', 'House': 'lower'}[match.group(1)]
        motion = match.group(2).strip()

        if motion.startswith('FINAL PASSAGE'):
            type = 'passage'
        elif motion.startswith('AMENDMENT'):
            type = 'amendment'
        elif 'ON 3RD READING' in motion:
            type = 'reading:3'
        else:
            type = 'other'

        (fd, temp_path) = tempfile.mkstemp()
        self.urlretrieve(url, temp_path)

        html = self.pdf_to_lxml(temp_path)
        os.close(fd)
        os.remove(temp_path)

        vote_type = None
        body = html.xpath('string(/html/body)')

        date_match = re.search('Date: (\d{1,2}/\d{1,2}/\d{4})', body)
        try:
            date = date_match.group(1)
        except AttributeError:
            self.warning("BAD VOTE: date error")
            return

        start_date = dt.datetime.strptime(date, '%m/%d/%Y')
        d = defaultdict(list)
        for line in body.replace(u'\xa0', '\n').split('\n'):
            line = line.replace('&nbsp;', '').strip()
            # Skip blank lines and "Total --"
            if not line or 'Total --' in line:
                continue

            if line in ('YEAS', 'NAYS', 'ABSENT'):
                vote_type = {'YEAS': 'yes', 'NAYS': 'no',
                             'ABSENT': 'other'}[line]
            elif line in ('Total', '--'):
                vote_type = None
            elif vote_type:
                if vote_type == 'yes':
                    d['yes'].append(line)
                elif vote_type == 'no':
                    d['no'].append(line)
                elif vote_type == 'other':
                    d['other'].append(line)

        yes_count = len(d['yes'])
        no_count = len(d['no'])
        other_count = len(d['other'])
        # The PDFs oddly don't say whether a vote passed or failed.
        # Hopefully passage just requires yes_votes > not_yes_votes
        if yes_count > (no_count + other_count):
            passed = True
        else:
            passed = False

        vote = Vote(chamber=chamber,
                    start_date=start_date.strftime('%Y-%m-%d'),
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    classification=type,
                    bill=bill)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        for key, values in d.items():
            for item in values:
                vote.vote(key, item)
        vote.add_source(url)
        yield vote
Ejemplo n.º 25
0
    def scrape_house_vote(self, bill, url):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return
        text = convert_pdf(filename, 'text')
        os.remove(filename)

        lines = text.splitlines()

        vote_type = None
        votes = collections.defaultdict(list)
        date = None

        for idx, line in enumerate(lines):
            line = line.rstrip().decode('utf-8')
            match = re.search(r'(\d+)/(\d+)/(\d{4,4})$', line)
            if match:
                date = datetime.datetime.strptime(match.group(0), "%m/%d/%Y")
                continue

            match = re.match(
                r'\s+YEAS: (\d+)\s+NAYS: (\d+)\s+NOT VOTING: (\d+)', line)
            if match:
                motion = (lines[idx - 2].strip()).decode('utf-8')
                if not motion:
                    self.warning("No motion text found for vote")
                    motion = "PASSAGE"
                yes_count, no_count, other_count = [
                    int(g) for g in match.groups()
                ]

                exc_match = re.search(r'EXCUSED: (\d+)', line)
                if exc_match:
                    other_count += int(exc_match.group(1))

                if line.endswith('ADOPTED') or line.endswith('PASSED'):
                    passed = True
                else:
                    passed = False

                continue

            match = re.match(
                r'(YEAS|NAYS|NOT VOTING|PAIRED|EXCUSED):\s+(\d+)\s*$', line)
            if match:
                vote_type = {
                    'YEAS': 'yes',
                    'NAYS': 'no',
                    'NOT VOTING': 'other',
                    'EXCUSED': 'other',
                    'PAIRED': 'paired'
                }[match.group(1)]
                continue

            if vote_type == 'paired':
                for part in line.split('   '):
                    part = part.strip()
                    if not part:
                        continue
                    name, pair_type = re.match(r'([^\(]+)\((YEA|NAY)\)',
                                               line).groups()
                    name = name.strip()
                    if pair_type == 'YEA':
                        votes['yes'].append(name)
                    elif pair_type == 'NAY':
                        votes['no'].append(name)
            elif vote_type:
                for name in line.split('   '):
                    name = name.strip()
                    if not name:
                        continue
                    votes[vote_type].append(name)
        if date:
            vote = Vote(chamber='lower',
                        start_date=date.strftime("%Y-%m-%d"),
                        motion_text=motion,
                        result='pass' if passed else 'fail',
                        classification='passage',
                        bill=bill)

            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            vote.pupa_id = url

            for key, values in votes.items():
                for value in values:
                    vote.vote(key, value)

            yield vote
        else:
            self.warning("Syntax Error/Warning using 'convert_pdf'")
Ejemplo n.º 26
0
    def scrape_votes(self, bill, url):
        page = lxml.html.fromstring(self.get(url).text.replace(u'\xa0', ' '))

        seen_rcs = set()

        re_ns = "http://exslt.org/regular-expressions"
        path = "//p[re:test(text(), 'OKLAHOMA\s+(HOUSE|STATE\s+SENATE)')]"
        for header in page.xpath(path, namespaces={'re': re_ns}):
            bad_vote = False
            # Each chamber has the motion name on a different line of the file
            if 'HOUSE' in header.xpath("string()"):
                chamber = 'lower'
                motion_index = 8
            else:
                chamber = 'upper'
                motion_index = 13

            motion = header.xpath(
                "string(following-sibling::p[%d])" % motion_index).strip()
            motion = re.sub(r'\s+', ' ', motion)
            if not motion.strip():
                self.warning("Motion text not found")
                return
            match = re.match(r'^(.*) (PASSED|FAILED)$', motion)
            if match:
                motion = match.group(1)
                passed = match.group(2) == 'PASSED'
            else:
                passed = None

            rcs_p = header.xpath(
                "following-sibling::p[contains(., 'RCS#')]")[0]
            rcs_line = rcs_p.xpath("string()").replace(u'\xa0', ' ')
            rcs = re.search(r'RCS#\s+(\d+)', rcs_line).group(1)

            if rcs in seen_rcs:
                continue
            else:
                seen_rcs.add(rcs)

            date_line = rcs_p.getnext().xpath("string()")
            date = re.search(r'\d+/\d+/\d+', date_line).group(0)
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            vtype = None
            counts = collections.defaultdict(int)
            votes = collections.defaultdict(list)

            seen_yes = False

            for sib in header.xpath("following-sibling::p")[13:]:
                line = sib.xpath("string()").replace('\r\n', ' ').strip()
                if "*****" in line:
                    break
                regex = (r'(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL '
                         'PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)(.*)')
                match = re.match(regex, line)
                if match:
                    if match.group(1) == 'YEAS' and 'RCS#' not in line:
                        vtype = 'yes'
                        seen_yes = True
                    elif match.group(1) == 'NAYS' and seen_yes:
                        vtype = 'no'
                    elif match.group(1) == 'VACANT':
                        continue  # skip these
                    elif seen_yes:
                        vtype = 'other'
                    if seen_yes and match.group(3).strip():
                        self.warning("Bad vote format, skipping.")
                        bad_vote = True
                    counts[vtype] += int(match.group(2))
                elif seen_yes:
                    for name in line.split('   '):
                        if not name:
                            continue
                        if 'HOUSE' in name or 'SENATE ' in name:
                            continue
                        votes[vtype].append(name.strip())

            if bad_vote:
                continue

            if passed is None:
                passed = counts['yes'] > (counts['no'] + counts['other'])

            vote = Vote(chamber=chamber,
                        start_date=date.strftime('%Y-%m-%d'),
                        motion_text=motion,
                        result='pass' if passed else 'fail',
                        bill=bill,
                        classification='passage')
            vote.set_count('yes', counts['yes'])
            vote.set_count('no', counts['no'])
            vote.set_count('other', counts['other'])
            vote.pupa_id = url + '#' + rcs

            vote.add_source(url)

            for name in votes['yes']:
                vote.yes(name)
            for name in votes['no']:
                if ':' in name:
                    raise Exception(name)
                vote.no(name)
            for name in votes['other']:
                vote.vote('other', name)

            yield vote
Ejemplo n.º 27
0
    def scrape_bill(self, bill_id):
        old = self.api('bills/' + bill_id + '?')

        # not needed
        old.pop('id')
        old.pop('state')
        old.pop('level', None)
        old.pop('country', None)
        old.pop('created_at')
        old.pop('updated_at')
        old.pop('action_dates')
        old.pop('+subject', None)
        old.pop('+scraped_subjects', None)
        old.pop('subjects', [])

        classification = old.pop('type')

        # ca weirdness
        if 'fiscal committee' in classification:
            classification.remove('fiscal committee')
        if 'urgency' in classification:
            classification.remove('urgency')
        if 'local program' in classification:
            classification.remove('local program')
        if 'tax levy' in classification:
            classification.remove('tax levy')

        if classification[0] in ['miscellaneous', 'jres', 'cres']:
            return

        if classification == ['memorial resolution'] and self.state == 'ar':
            classification = ['memorial']
        if classification == ['concurrent memorial resolution'] and self.state == 'ar':
            classification = ['concurrent memorial']
        if classification == ['joint session resolution'] and self.state == 'il':
            classification = ['joint resolution']
        if classification == ['legislative resolution'] and self.state == 'ny':
            classification = ['resolution']

        if not old['title'] and self.state == 'me':
            old['title'] = '(unknown)'

        chamber = old.pop('chamber')
        if chamber == 'upper' and self.state in ('ne', 'dc'):
            chamber = 'legislature'
        elif chamber in ('joint', 'conference'):
            chamber = 'legislature'

        new = Bill(old.pop('bill_id'), old.pop('session'), old.pop('title'),
                   chamber=chamber, classification=classification)

        abstract = old.pop('summary', None)
        if abstract:
            new.add_abstract(abstract, note='')

        for title in old.pop('alternate_titles'):
            new.add_title(title)

        for doc in old.pop('documents'):
            new.add_document_link(doc['name'], doc['url'], on_duplicate='ignore')

        for doc in old.pop('versions'):
            new.add_version_link(doc['name'], doc['url'], media_type=doc.pop('mimetype', ''))

        for subj in old.pop('scraped_subjects', []):
            if subj:
                new.add_subject(subj)

        for spon in old.pop('sponsors'):
            if spon.get('committee_id') is not None:
                entity_type = 'organization'
            elif spon.get('leg_id') is not None:
                entity_type = 'person'
            else:
                entity_type = ''
            new.add_sponsorship(spon['name'], spon['type'], entity_type,
                                spon['type'] == 'primary')

        for act in old.pop('actions'):
            actor = act['actor']
            if actor.lower() in ('governor', 'mayor', 'secretary of state'):
                actor = 'executive'
            elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'):
                actor = 'lower'
            elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'):
                actor = 'upper'
            elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk',
                           'Office of the Legislative Fiscal Analyst', 'Became Law w',
                           'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'):
                actor = 'legislature'

            if actor in ('committee', 'sponsor') and self.state == 'pr':
                actor = 'legislature'

            # nebraska & DC
            if actor == 'upper' and self.state in ('ne', 'dc'):
                actor = 'legislature'

            if act['action']:
                newact = new.add_action(act['action'], act['date'][:10], chamber=actor,
                                        classification=[action_types[c] for c in act['type'] if c != 'other'])
                for re in act.get('related_entities', []):
                    if re['type'] == 'committee':
                        re['type'] = 'organization'
                    elif re['type'] == 'legislator':
                        re['type'] = 'person'
                    newact.add_related_entity(re['name'], re['type'])

        for comp in old.pop('companions', []):
            if self.state in ('nj', 'ny', 'mn'):
                rtype = 'companion'
            new.add_related_bill(comp['bill_id'], comp['session'], rtype)

        for abid in old.pop('alternate_bill_ids', []) + old.pop('+alternate_bill_ids', []):
            new.add_identifier(abid)


        # generic OpenStates stuff
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')

        for source in old.pop('sources'):
            source.pop('retrieved', None)
            new.add_source(**source)

        ext_title = old.pop('+extended_title', None)
        if ext_title:
            new.add_title(ext_title, note='Extended Title')
        official_title = old.pop('+official_title', None)
        if official_title:
            new.add_title(official_title, note='Official Title')

        to_extras = ['+status', '+final_disposition', '+volume_chapter', '+ld_number', '+referral',
                     '+companion', '+description', '+fiscal_note_probable:',
                     '+preintroduction_required:', '+drafter', '+category:', '+chapter',
                     '+requester', '+transmittal_date:', '+by_request_of', '+bill_draft_number:',
                     '+bill_lr', '+bill_url', '+rcs_num', '+fiscal_note', '+impact_clause', '+fiscal_notes',
                     '+short_title', '+type_', '+conference_committee', 'conference_committee',
                     '+companion_bill_ids']
        for k in to_extras:
            v = old.pop(k, None)
            if v:
                new.extras[k.replace('+', '')] = v

        # votes
        vote_no = 1
        for vote in old.pop('votes'):
            vote.pop('id')
            vote.pop('state')
            vote.pop('bill_id')
            vote.pop('bill_chamber', None)
            vote.pop('+state', None)
            vote.pop('+country', None)
            vote.pop('+level', None)
            vote.pop('+vacant', None)
            vote.pop('+not_voting', None)
            vote.pop('+amended', None)
            vote.pop('+excused', None)
            vote.pop('+NV', None)
            vote.pop('+AB', None)
            vote.pop('+P', None)
            vote.pop('+V', None)
            vote.pop('+E', None)
            vote.pop('+EXC', None)
            vote.pop('+EMER', None)
            vote.pop('+present', None)
            vote.pop('+absent', None)
            vote.pop('+seconded', None)
            vote.pop('+moved', None)
            vote.pop('+vote_type', None)
            vote.pop('+actual_vote', None)
            vote.pop('+skip_votes', None)
            vote.pop('vote_id')
            vote.pop('+bill_chamber', None)
            vote.pop('+session', None)
            vote.pop('+bill_id', None)
            vote.pop('+bill_session', None)
            vote.pop('committee', None)
            vote.pop('committee_id', None)
            vtype = vote.pop('type', 'passage')

            if vtype == 'veto_override':
                vtype = ['veto-override']
            elif vtype == 'amendment':
                vtype = ['amendment-passage']
            elif vtype == 'other':
                vtype = ''
            else:
                vtype = ['bill-passage']

            # most states need identifiers for uniqueness, just do it everywhere
            identifier = vote['date'] + '-' + str(vote_no)
            vote_no += 1

            chamber = vote.pop('chamber')
            if chamber == 'upper' and self.state in ('ne', 'dc'):
                chamber = 'legislature'
            elif chamber == 'joint':
                chamber = 'legislature'

            newvote = Vote(legislative_session=vote.pop('session'),
                           motion_text=vote.pop('motion'),
                           result='pass' if vote.pop('passed') else 'fail',
                           chamber=chamber,
                           start_date=vote.pop('date'),
                           classification=vtype,
                           bill=new,
                           identifier=identifier)
            for vt in ('yes', 'no', 'other'):
                newvote.set_count(vt, vote.pop(vt + '_count'))
                for name in vote.pop(vt + '_votes'):
                    newvote.vote(vt, name['name'])

            for source in vote.pop('sources'):
                source.pop('retrieved', None)
                newvote.add_source(**source)

            if not newvote.sources:
                newvote.sources = new.sources

            to_extras = ['+record', '+method', 'method', '+filename', 'record', '+action',
                         '+location', '+rcs_num', '+type_', '+threshold', '+other_vote_detail',
                         '+voice_vote']
            for k in to_extras:
                v = vote.pop(k, None)
                if v:
                    newvote.extras[k.replace('+', '')] = v

            assert not vote, vote.keys()
            yield newvote

        assert not old, old.keys()

        yield new
Ejemplo n.º 28
0
    def scrape(self):
        for page in self.iterpages():
            for subject in page.xpath('//div[@class="ContainerPanel"]'):
                dates = subject.xpath(".//font[@color='#276598']/b/text()")
                motions = [x.strip() for x in subject.xpath(".//div[@style='width:260px; float:left;']/text()")]
                votes = subject.xpath(".//div[@style='width:150px; float:right;']")
                docket = subject.xpath(".//div[@class='HeaderContent']/b/text()")
                docket = list(filter(lambda x: "docket" in x.lower(), docket))
                docket = docket[0] if docket else None

                for date, motion, vote in zip(dates, motions, votes):
                    when = dt.datetime.strptime(date, "%m/%d/%Y")
                    motion = motion.strip()

                    if motion == "":
                        self.warning("Skipping vote.")
                        continue

                    v = Vote(
                        session=self.session,
                        organization="Boston City Council",
                        type="other",
                        passed=False,
                        date=when.strftime("%Y-%m-%d"),
                        motion=motion,
                        yes_count=0,
                        no_count=0,
                    )

                    if docket:
                        v.set_bill(docket)

                    yes, no, other = 0, 0, 0

                    vit = iter(vote.xpath("./div"))
                    vote = zip(vit, vit, vit)
                    for who, entry, _ in vote:
                        how = entry.text
                        who = who.text

                        if how == "Y":
                            v.yes(who)
                            yes += 1
                        elif how == "N":
                            v.no(who)
                            no += 1
                        else:
                            v.other(who)
                            other += 1

                    for count in v.vote_counts:
                        count["count"] = {"yes": yes, "no": no, "other": other}[count["vote_type"]]

                    v.add_source(DURL, note="root")
                    yield v
Ejemplo n.º 29
0
    def scrape_votes(self, session, zip_url):
        votes = {}
        last_line = []

        for line in self.zf.open("tblrollcallsummary.txt"):
            if line.strip() == "":
                continue

            line = line.split("|")
            if len(line) < 14:
                if len(last_line + line[1:]) == 14:
                    line = last_line
                    self.warning("used bad vote line")
                else:
                    last_line = line
                    self.warning("bad vote line %s" % "|".join(line))
            session_yr = line[0]
            body = line[1]
            vote_num = line[2]
            timestamp = line[3]
            bill_id = line[4].strip()
            yeas = int(line[5])
            nays = int(line[6])
            # present = int(line[7])
            # absent = int(line[8])
            motion = line[11].strip() or "[not available]"

            if session_yr == session and bill_id in self.bills_by_id:
                actor = "lower" if body == "H" else "upper"
                time = dt.datetime.strptime(timestamp, "%m/%d/%Y %I:%M:%S %p")
                # TODO: stop faking passed somehow
                passed = yeas > nays
                vote = Vote(
                    chamber=actor,
                    start_date=time.strftime("%Y-%m-%d"),
                    motion_text=motion,
                    result="pass" if passed else "fail",
                    classification="passage",
                    bill=self.bills_by_id[bill_id],
                )
                vote.set_count("yes", yeas)
                vote.set_count("no", nays)
                vote.add_source(zip_url)
                votes[body + vote_num] = vote

        for line in self.zf.open("tblrollcallhistory.txt"):
            # 2012    | H   | 2    | 330795  | HB309  | Yea |1/4/2012 8:27:03 PM
            session_yr, body, v_num, employee, bill_id, vote, date = line.split(
                "|")

            if not bill_id:
                continue

            if session_yr == session and bill_id.strip() in self.bills_by_id:
                try:
                    leg = self.legislators[employee]["name"]
                except KeyError:
                    self.warning("Error, can't find person %s" % employee)
                    continue

                vote = vote.strip()
                if body + v_num not in votes:
                    self.warning("Skipping processing this vote:")
                    self.warning("Bad ID: %s" % (body + v_num))
                    continue
                other_count = 0
                # code = self.legislators[employee]['seat']
                if vote == "Yea":
                    votes[body + v_num].yes(leg)
                elif vote == "Nay":
                    votes[body + v_num].no(leg)
                else:
                    votes[body + v_num].other(leg)
                    other_count += 1
                votes[body + v_num].set_count("other", other_count)
        for vote in votes.values():
            yield vote
Ejemplo n.º 30
0
    def parse_html_vote(self, bill, actor, date, motion, url, uniqid):
        try:
            page = self.get(url).text
        except scrapelib.HTTPError:
            self.warning("A vote page not found for bill {}".format(
                bill.identifier))
            return
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)
        descr = page.xpath("//b")[0].text_content()
        if descr == '':
            # New page method
            descr = page.xpath("//center")[0].text

        if "on voice vote" in descr:
            return

        if "committee" in descr.lower():
            yield from self.scrape_committee_vote(bill, actor, date, motion,
                                                  page, url, uniqid)
            return

        passed = None
        if "Passed" in descr:
            passed = True
        elif "Failed" in descr:
            passed = False
        elif "UTAH STATE LEGISLATURE" in descr:
            return
        elif descr.strip() == '-':
            return
        else:
            self.warning(descr)
            raise NotImplementedError("Can't see if we passed or failed")

        headings = page.xpath("//b")[1:]
        votes = page.xpath("//table")
        sets = zip(headings, votes)
        vdict = {}
        for (typ, votes) in sets:
            txt = typ.text_content()
            arr = [x.strip() for x in txt.split("-", 1)]
            if len(arr) != 2:
                continue
            v_txt, count = arr
            v_txt = v_txt.strip()
            count = int(count)
            people = [
                x.text_content().strip()
                for x in votes.xpath(".//font[@face='Arial']")
            ]

            vdict[v_txt] = {"count": count, "people": people}

        vote = Vote(chamber=actor,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    bill=bill,
                    classification='passage',
                    identifier=str(uniqid))
        vote.set_count('yes', vdict['Yeas']['count'])
        vote.set_count('no', vdict['Nays']['count'])
        vote.set_count('other', vdict['Absent or not voting']['count'])
        vote.add_source(url)

        for person in vdict['Yeas']['people']:
            vote.yes(person)
        for person in vdict['Nays']['people']:
            vote.no(person)
        for person in vdict['Absent or not voting']['people']:
            vote.vote('other', person)

        yield vote
Ejemplo n.º 31
0
    def scrape_votes(self, bill, url):
        page = lxml.html.fromstring(self.get(url).text.replace(u"\xa0", " "))

        seen_rcs = set()

        re_ns = "http://exslt.org/regular-expressions"
        path = r"//p[re:test(text(), 'OKLAHOMA\s+(HOUSE|STATE\s+SENATE)')]"
        for header in page.xpath(path, namespaces={"re": re_ns}):
            bad_vote = False
            # Each chamber has the motion name on a different line of the file
            if "HOUSE" in header.xpath("string()"):
                chamber = "lower"
                motion_index = 8
            else:
                chamber = "upper"
                motion_index = 13

            motion = header.xpath(
                "string(following-sibling::p[%d])" % motion_index
            ).strip()
            motion = re.sub(r"\s+", " ", motion)
            if not motion.strip():
                self.warning("Motion text not found")
                return
            match = re.match(r"^(.*) (PASSED|FAILED)$", motion)
            if match:
                motion = match.group(1)
                passed = match.group(2) == "PASSED"
            else:
                passed = None

            rcs_p = header.xpath("following-sibling::p[contains(., 'RCS#')]")[0]
            rcs_line = rcs_p.xpath("string()").replace(u"\xa0", " ")
            rcs = re.search(r"RCS#\s+(\d+)", rcs_line).group(1)

            if rcs in seen_rcs:
                continue
            else:
                seen_rcs.add(rcs)

            date_line = rcs_p.getnext().xpath("string()")
            date = re.search(r"\d+/\d+/\d+", date_line).group(0)
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            vtype = None
            counts = collections.defaultdict(int)
            votes = collections.defaultdict(list)

            seen_yes = False

            for sib in header.xpath("following-sibling::p")[13:]:
                line = sib.xpath("string()").replace("\r\n", " ").strip()
                if "*****" in line:
                    break
                regex = (
                    r"(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL "
                    r"PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)(.*)"
                )
                match = re.match(regex, line)
                if match:
                    if match.group(1) == "YEAS" and "RCS#" not in line:
                        vtype = "yes"
                        seen_yes = True
                    elif match.group(1) == "NAYS" and seen_yes:
                        vtype = "no"
                    elif match.group(1) == "VACANT":
                        continue  # skip these
                    elif seen_yes:
                        vtype = "other"
                    if seen_yes and match.group(3).strip():
                        self.warning("Bad vote format, skipping.")
                        bad_vote = True
                    counts[vtype] += int(match.group(2))
                elif seen_yes:
                    for name in line.split("   "):
                        if not name:
                            continue
                        if "HOUSE" in name or "SENATE " in name:
                            continue
                        votes[vtype].append(name.strip())

            if bad_vote:
                continue

            if passed is None:
                passed = counts["yes"] > (counts["no"] + counts["other"])

            vote = Vote(
                chamber=chamber,
                start_date=date.strftime("%Y-%m-%d"),
                motion_text=motion,
                result="pass" if passed else "fail",
                bill=bill,
                classification="passage",
            )
            vote.set_count("yes", counts["yes"])
            vote.set_count("no", counts["no"])
            vote.set_count("other", counts["other"])
            vote.pupa_id = url + "#" + rcs

            vote.add_source(url)

            for name in votes["yes"]:
                vote.yes(name)
            for name in votes["no"]:
                if ":" in name:
                    raise Exception(name)
                vote.no(name)
            for name in votes["other"]:
                vote.vote("other", name)

            yield vote
Ejemplo n.º 32
0
    def scrape(self):
        for page in self.iterpages():
            for subject in page.xpath('//div[@class="ContainerPanel"]'):
                dates = subject.xpath(".//font[@color='#276598']/b/text()")
                motions = [x.strip() for x in subject.xpath(
                    ".//div[@style='width:260px; float:left;']/text()")]
                votes = subject.xpath(".//div[@style='width:150px; float:right;']")
                docket = subject.xpath(".//div[@class='HeaderContent']/b/text()")
                docket = list(filter(lambda x: "docket" in x.lower(), docket))
                docket = docket[0] if docket else None

                for date, motion, vote in zip(dates, motions, votes):
                    when = dt.datetime.strptime(date, "%m/%d/%Y")
                    motion = motion.strip()

                    if motion == "":
                        self.warning("Skipping vote.")
                        continue

                    v = Vote(session=self.session,
                             organization="Boston City Council",
                             type='other',
                             passed=False,
                             date=when.strftime("%Y-%m-%d"),
                             motion=motion,
                             yes_count=0,
                             no_count=0,)

                    if docket:
                        v.set_bill(docket)

                    yes, no, other = 0, 0, 0

                    vit = iter(vote.xpath("./div"))
                    vote = zip(vit, vit, vit)
                    for who, entry, _ in vote:
                        how = entry.text
                        who = who.text

                        if how == 'Y':
                            v.yes(who)
                            yes += 1
                        elif how == 'N':
                            v.no(who)
                            no += 1
                        else:
                            v.other(who)
                            other += 1

                    for count in v.vote_counts:
                        count['count'] = {
                            "yes": yes,
                            "no": no,
                            "other": other
                        }[count['vote_type']]

                    v.add_source(DURL, note='root')
                    yield v
Ejemplo n.º 33
0
    def scrape_votes(self, session):
        votes = {}
        other_counts = defaultdict(int)
        last_line = []
        vote_url = 'http://gencourt.state.nh.us/dynamicdatafiles/RollCallSummary.txt'
        lines = self.get(vote_url).content.decode('utf-8').splitlines()

        for line in lines:

            if len(line) < 2:
                continue

            if line.strip() == "":
                continue

            line = line.split('|')
            if len(line) < 14:
                if len(last_line + line[1:]) == 14:
                    line = last_line
                    self.warning('used bad vote line')
                else:
                    last_line = line
                    self.warning('bad vote line %s' % '|'.join(line))
            session_yr = line[0].replace('\xef\xbb\xbf', '')
            body = line[1]
            vote_num = line[2]
            timestamp = line[3]
            bill_id = line[4].strip()
            yeas = int(line[5])
            nays = int(line[6])
            # present = int(line[7])
            # absent = int(line[8])
            motion = line[11].strip() or '[not available]'

            if session_yr == session and bill_id in self.bills_by_id:
                actor = 'lower' if body == 'H' else 'upper'
                time = dt.datetime.strptime(timestamp, '%m/%d/%Y %I:%M:%S %p')
                time = pytz.timezone('America/New_York').localize(
                    time).isoformat()
                # TODO: stop faking passed somehow
                passed = yeas > nays
                vote = Vote(chamber=actor,
                            start_date=time,
                            motion_text=motion,
                            result='pass' if passed else 'fail',
                            classification='passage',
                            bill=self.bills_by_id[bill_id])
                vote.set_count('yes', yeas)
                vote.set_count('no', nays)
                vote.add_source(vote_url)
                vote.pupa_id = session_yr + body + vote_num  # unique ID for vote
                votes[body + vote_num] = vote

        for line in self.get('http://gencourt.state.nh.us/dynamicdatafiles/RollCallHistory.txt') \
                        .content.decode('utf-8').splitlines():
            if len(line) < 2:
                continue

            # 2016|H|2|330795||Yea|
            # 2012    | H   | 2    | 330795  | 964 |  HB309  | Yea | 1/4/2012 8:27:03 PM
            session_yr, body, v_num, _, employee, bill_id, vote, date = \
                line.split('|')

            if not bill_id:
                continue

            if session_yr == session and bill_id.strip() in self.bills_by_id:
                try:
                    leg = " ".join(self.legislators[employee]['name'].split())
                except KeyError:
                    self.warning("Error, can't find person %s" % employee)
                    continue

                vote = vote.strip()
                if body + v_num not in votes:
                    self.warning("Skipping processing this vote:")
                    self.warning("Bad ID: %s" % (body + v_num))
                    continue
                # code = self.legislators[employee]['seat']

                if vote == 'Yea':
                    votes[body + v_num].yes(leg)
                elif vote == 'Nay':
                    votes[body + v_num].no(leg)
                else:
                    votes[body + v_num].vote('other', leg)
                    # hack-ish, but will keep the vote count sync'd
                    other_counts[body + v_num] += 1
                    votes[body + v_num].set_count('other',
                                                  other_counts[body + v_num])
        for vote in votes.values():
            yield vote
Ejemplo n.º 34
0
    def scrape_chamber(self, chamber, session):
        chamber_name = 'house' if chamber == 'lower' else 'senate'
        session_slug = {
            '62': '62-2011',
            '63': '63-2013',
            '64': '64-2015',
            '65': '65-2017',
        }[session]

        # Open the index page of the session's Registers, and open each
        url = "http://www.legis.nd.gov/assembly/%s/journals/%s-journal.html" % (
            session_slug, chamber_name)
        page = self.lxmlize(url)
        pdfs = page.xpath("//a[contains(@href, '.pdf')]")
        for pdf in pdfs:

            # Initialize information about the vote parsing
            results = {}
            in_motion = False
            cur_vote = None
            in_vote = False
            cur_motion = ""
            bills = []

            # Determine which URLs the information was pulled from
            pdf_url = pdf.attrib['href']

            try:
                (path, response) = self.urlretrieve(pdf_url)
            except requests.exceptions.ConnectionError:
                continue

            # Convert the PDF to text
            data = convert_pdf(path, type='text').decode('utf-8')
            os.unlink(path)

            # Determine the date of the document
            date = re.findall(date_re, data)
            if date:
                date = date[0][0]
                cur_date = datetime.datetime.strptime(date, "%A, %B %d, %Y")
            else:
                # If no date is found anywhere, do not process the document
                self.warning("No date was found for the document; skipping.")
                continue

            # Check each line of the text for motion and vote information
            lines = data.splitlines()
            for line in lines:
                # Ignore lines with no information
                if re.search(chamber_re, line) or \
                        re.search(date_re, line) or \
                        re.search(page_re, line) or \
                        line.strip() == "":
                    pass

                # Ensure that motion and vote capturing are not _both_ active
                elif in_motion and in_vote:
                    raise AssertionError(
                        "Scraper should not be simultaneously processing " +
                        "motion name and votes, as it is for this motion: " +
                        cur_motion)

                # Start capturing motion text after a ROLL CALL header
                elif not in_motion and not in_vote:
                    if line.strip() == "ROLL CALL":
                        in_motion = True

                elif in_motion and not in_vote:
                    if cur_motion == "":
                        cur_motion = line.strip()
                    else:
                        cur_motion = cur_motion + " " + line.strip()

                    # ABSENT AND NOT VOTING marks the end of each motion name
                    # In this case, prepare to capture votes
                    if line.strip().endswith("VOTING") or \
                            line.strip().endswith("VOTING."):
                        in_motion = False
                        in_vote = True

                elif not in_motion and in_vote:
                    # Ignore appointments and confirmations
                    if "The Senate advises and consents to the appointment" \
                            in line:
                        in_vote = False
                        cur_vote = None
                        results = {}
                        cur_motion = ""
                        bills = []

                    # If votes are being processed, record the voting members
                    elif ":" in line:
                        cur_vote, who = (x.strip() for x in line.split(":", 1))
                        who = [
                            x.strip() for x in who.split(';')
                            if x.strip() != ""
                        ]
                        results[cur_vote] = who

                        name_may_be_continued = False if line.endswith(";") \
                            else True

                    # Extracts bill numbers in the closing text
                    # used for when the closing text is multiple lines.
                    elif cur_vote is not None and\
                            re.findall(r"(?i)(H|S|J)(C?)(B|R|M) (\d+)", line) and \
                            not any(x in line.lower() for x in ['passed', 'adopted',
                                                                'sustained', 'prevailed',
                                                                'lost', 'failed']):
                        bills.extend(
                            re.findall(r"(?i)(H|S|J)(C?)(B|R|M) (\d+)", line))

                    elif cur_vote is not None and \
                            not any(x in line.lower() for x in ['passed', 'adopted',
                                                                'sustained', 'prevailed',
                                                                'lost', 'failed']):
                        who = [
                            x.strip() for x in line.split(";")
                            if x.strip() != ""
                        ]

                        if name_may_be_continued:
                            results[cur_vote][-1] = results[cur_vote][-1] + \
                                    " " + who.pop(0)

                        name_may_be_continued = False if line.endswith(";") \
                            else True

                        results[cur_vote].extend(who)

                    # At the conclusion of a vote, save its data
                    elif any(x in line.lower() for x in [
                            'passed', 'adopted', 'sustained', 'prevailed',
                            'lost', 'failed'
                    ]):

                        in_vote = False
                        cur_vote = None

                        # Identify what is being voted on
                        # Throw a warning if impropper informaiton found
                        bills.extend(
                            re.findall(r"(?i)(H|S|J)(C?)(B|R|M) (\d+)", line))
                        if bills == [] or cur_motion.strip() == "":
                            results = {}
                            cur_motion = ""
                            self.warning("No motion or bill name found: " +
                                         "motion name: " + cur_motion + "; " +
                                         "decision text: " + line.strip())
                            continue

                        # If votes are found in the motion name, throw an error
                        if "YEAS:" in cur_motion or "NAYS:" in cur_motion:
                            raise AssertionError(
                                "Vote data found in motion name: " +
                                cur_motion)

                        # Use the collected results to determine who voted how
                        keys = {
                            "YEAS": "yes",
                            "NAYS": "no",
                            "ABSENT AND NOT VOTING": "other"
                        }
                        res = {}
                        for key in keys:
                            if key in results:
                                res[keys[key]] = results[key]
                            else:
                                res[keys[key]] = []

                        # Count the number of members voting each way
                        yes, no, other = \
                            len(res['yes']), \
                            len(res['no']), \
                            len(res['other'])
                        chambers = {
                            "H": "lower",
                            "S": "upper",
                            "J": "legislature"
                        }

                        # Almost all of the time, a vote only applies to one bill and this loop
                        # will only be run once.
                        # Some exceptions exist.

                        for bill in bills:

                            cur_bill_id = "%s%s%s %s" % bill

                            # Identify the source chamber for the bill
                            try:
                                bc = chambers[cur_bill_id[0]]
                            except KeyError:
                                bc = 'other'

                            # Determine whether or not the vote passed
                            if "over the governor's veto" in cur_motion.lower(
                            ):
                                VETO_SUPERMAJORITY = 2 / 3
                                passed = (yes /
                                          (yes + no) > VETO_SUPERMAJORITY)
                            else:
                                passed = (yes > no)
                            # Create a Vote object based on the scraped information
                            vote = Vote(
                                chamber=chamber,
                                start_date=cur_date.strftime('%Y-%m-%d'),
                                motion_text=cur_motion,
                                result='pass' if passed else 'fail',
                                legislative_session=session,
                                classification='passage',
                                bill=cur_bill_id,
                                bill_chamber=bc)

                            vote.add_source(pdf_url)
                            vote.add_source(url)
                            vote.set_count('yes', yes)
                            vote.set_count('no', no)
                            vote.set_count('other', other)
                            # For each category of voting members,
                            # add the individuals to the Vote object
                            for key in res:
                                for voter in res[key]:
                                    vote.vote(key, voter)

                            # Check the vote counts in the motion text against
                            # the parsed results
                            for category_name in keys.keys():
                                # Need to search for the singular, not plural, in the text
                                # so it can find, for example,  " 1 NAY "
                                vote_re = r"(\d+)\s{}".format(
                                    category_name[:-1])
                                motion_count = int(
                                    re.findall(vote_re, cur_motion)[0])

                                for item in vote.counts:
                                    if item['option'] == keys[category_name]:
                                        vote_count = item['value']

                                if motion_count != vote_count:
                                    self.warning(
                                        "Motion text vote counts ({}) ".format(
                                            motion_count) +
                                        "differed from roll call counts ({}) ".
                                        format(vote_count) +
                                        "for {0} on {1}".format(
                                            category_name, cur_bill_id))

                                    for item in vote.counts:
                                        if item['option'] == keys[
                                                category_name]:
                                            vote_count = motion_count

                            yield vote

                        # With the vote successfully processed,
                        # wipe its data and continue to the next one
                        results = {}
                        cur_motion = ""
                        bills = []
    def scrape_votes(self, session, zip_url):
        votes = {}
        last_line = []

        for line in self.zf.open('tblrollcallsummary.txt'):
            if line.strip() == "":
                continue

            line = line.split('|')
            if len(line) < 14:
                if len(last_line + line[1:]) == 14:
                    line = last_line
                    self.warning('used bad vote line')
                else:
                    last_line = line
                    self.warning('bad vote line %s' % '|'.join(line))
            session_yr = line[0]
            body = line[1]
            vote_num = line[2]
            timestamp = line[3]
            bill_id = line[4].strip()
            yeas = int(line[5])
            nays = int(line[6])
            # present = int(line[7])
            # absent = int(line[8])
            motion = line[11].strip() or '[not available]'

            if session_yr == session and bill_id in self.bills_by_id:
                actor = 'lower' if body == 'H' else 'upper'
                time = dt.datetime.strptime(timestamp, '%m/%d/%Y %I:%M:%S %p')
                # TODO: stop faking passed somehow
                passed = yeas > nays
                vote = Vote(chamber=actor,
                            start_date=time.strftime("%Y-%m-%d"),
                            motion_text=motion,
                            result='pass' if passed else 'fail',
                            classification='passage',
                            bill=self.bills_by_id[bill_id])
                vote.set_count('yes', yeas)
                vote.set_count('no', nays)
                vote.add_source(zip_url)
                votes[body + vote_num] = vote

        for line in self.zf.open('tblrollcallhistory.txt'):
            # 2012    | H   | 2    | 330795  | HB309  | Yea |1/4/2012 8:27:03 PM
            session_yr, body, v_num, employee, bill_id, vote, date \
                    = line.split('|')

            if not bill_id:
                continue

            if session_yr == session and bill_id.strip() in self.bills_by_id:
                try:
                    leg = self.legislators[employee]['name']
                except KeyError:
                    self.warning("Error, can't find person %s" % employee)
                    continue

                vote = vote.strip()
                if body + v_num not in votes:
                    self.warning("Skipping processing this vote:")
                    self.warning("Bad ID: %s" % (body + v_num))
                    continue
                other_count = 0
                # code = self.legislators[employee]['seat']
                if vote == 'Yea':
                    votes[body + v_num].yes(leg)
                elif vote == 'Nay':
                    votes[body + v_num].no(leg)
                else:
                    votes[body + v_num].other(leg)
                    other_count += 1
                votes[body + v_num].set_count('other', other_count)
        for vid, vote in votes.items():
            yield vote
Ejemplo n.º 36
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = "lower"
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = "upper"
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        page = self.get(url, verify=False).text

        if "BUDGET ADDRESS" in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1)
        date = date.replace(" ", "")
        date = datetime.datetime.strptime(
            date + " " + bill.legislative_session, "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=name,
            result="pass" if yes_count > need_count else "fail",
            classification="passage",
            bill=bill,
        )
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == "VACANT":
                    continue
                name = string.capwords(name)
                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote("other", name)

        yield vote
Ejemplo n.º 37
0
def toy_vote():
    v = Vote(session="2009", motion_text="passage of the bill", start_date="2009-01-07",
             result='pass', classification='passage:bill')
    v.add_source("http://uri.example.com/", note="foo")
    return v
Ejemplo n.º 38
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = 'lower'
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = 'upper'
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if 'BUDGET ADDRESS' in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1))

        other_count = page.xpath(
            "string(//span[contains(., 'Those absent')])")
        other_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1))

        need_count = page.xpath(
            "string(//span[contains(., 'Necessary for')])")
        need_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1)
        date = date.replace(' ', '')
        date = datetime.datetime.strptime(date + " " + bill.legislative_session,
                                          "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=name,
                    result='pass' if yes_count > need_count else 'fail',
                    classification='passage',
                    bill=bill
                    )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (
                    i + name_offset)).strip()

                if not name or name == 'VACANT':
                    continue

                if "Y" in row.xpath("string(td[%d])" %
                                    (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" %
                                      (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote('other', name)

        yield vote
Ejemplo n.º 39
0
    def get_bills(self):
        bills = [
            {"name": "HB500",
             "title": "Makes various changes to provisions governing employment practices",
             "session": "2011",
             "versions": ["http://example.com/HB500.pdf"],
             "actions": [
                 {"description": "Introduced",
                  "actor": "Committee on Pudding Pops",
                  "date": "2014-04-15",},

                 {"date": "2014-04-15",
                  "description": "Read first time. Referred to Committee on Commerce and Labor. To printer.",
                  "actor": "Test City Council" },

                 {"date": "2014-04-15",
                  "description": "From printer. To committee.",
                  "actor": "Test City Council"},

                 {"date": "2014-04-15",
                  "description": "From committee: Do pass.",
                  "actor": "Rules"},

                 {"description": "Signed into law",
                  "actor": "Fiscal Committee",
                  "date": "2014-04-19",},
             ],
             "sponsors_people": [
             ],
             "sponsors_committee": [
             ],
            "votes": [
                {"motion": "Vote by the Committee on the Whole.",
                 "yes_count": 1,
                 "other_count": 1,
                 "no_count": 3,
                 "passed": True,
                 "type": "passage:bill",
                 "date": "2014-04-15",
                 "session": "2011",
                 "roll": {
                     "yes": [
                        "Eliana Meyer",
                     ],
                     "no": [
                        "Gunnar Luna",
                        "Regina Cruz",
                        "Makenzie Keller",
                     ],
                     "other": [
                        "Unknown Person",
                     ],
                 }
                },
            ]},
            {"name": "HB101",
             "title": "Joint county ditch proceedings-conduct by teleconference or video conference",
             "session": "2011",
             "versions": ["http://example.com/HB101.pdf"],
             "actions": [
                 {"description": "Introduced",
                  "actor": "council",
                  "date": "2014-04-15",},
                 {"description": "Referred to the Committee on Pudding Pops",
                  "actor": "council",
                  "date": "2014-04-16",},
                 {"description": "Reported favorably",
                  "actor": "council",
                  "date": "2014-04-16",},
                 {"description": "Referred to the Bills in the Third Read",
                  "actor": "council",
                  "date": "2014-04-17",},
                 {"description": "Vote by the Committee on the Whole. Do pass.",
                  "actor": "council",
                  "date": "2014-04-18",},
                 {"description": "Signed into law",
                  "actor": "council",
                  "date": "2014-04-19",},
             ],
             "sponsors_people": [
                "Shayla Fritz",
                "Gunnar Luna",
             ],
             "sponsors_committee": [
                 "Standing Committee on Public Safety",
             ],
            "votes": [
                {"motion": "Vote by the Committee on the Whole.",
                 "yes_count": 3,
                 "no_count": 1,
                 "passed": True,
                 "type": "passage:bill",
                 "date": "2014-04-18",
                 "session": "2011",
                 "roll": {
                     "yes": [
                        "Gunnar Luna",
                        "Regina Cruz",
                        "Makenzie Keller",
                     ],
                     "no": [
                        "Eliana Meyer",
                     ],
                     "other": [
                     ],
                 }
                },
            ]},
        ]

        for bill in bills:
            b = Bill(identifier=bill['name'],
                     title=bill['title'],
                     legislative_session=bill['session'])
            b.add_source("ftp://example.com/some/bill")


            for vote in bill['votes']:
                v = Vote(motion_text=vote['motion'],
                         organization_id=make_psuedo_id(
                             name="Test City Council",
                             classification="legislature"
                         ),
                         yes_count=vote['yes_count'],
                         no_count=vote['no_count'],
                         result='pass' if vote['passed'] else 'fail',
                         classification=vote['type'],
                         start_date=vote['date'],
                         legislative_session=vote['session'],
                        )
                v.add_source("http://example.com/votes/vote.xls")

                for yv in vote['roll']['yes']:
                    v.yes(yv)

                for nv in vote['roll']['no']:
                    v.no(nv)

                yield v


            for sponsor in bill['sponsors_people']:
                b.add_sponsorship(name=sponsor, classification='primary',
                              entity_type='person', primary=True)

            for sponsor in bill['sponsors_committee']:
                b.add_sponsorship(name=sponsor, classification='primary',
                              entity_type='organization', primary=True)

            for version in bill['versions']:
                b.add_version_link(note="Bill Version", url=version)

            for action in bill['actions']:
                action['organization'] = make_psuedo_id(name=action.pop(
                    'actor'
                ))
                b.add_action(**action)

            yield b