Ejemplo n.º 1
0
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
    # When they vote in a substitute they mark it as XHB
    bill_id = bill_id.replace('XHB', 'HB')
    passed = len(vote_record['yes']) > len(vote_record['no'])
    vote_event = VoteEvent(
        result='pass' if passed else 'fail',
        chamber=chamber,
        start_date=vote_record['date'].strftime('%Y-%m-%d'),
        motion_text=motion_text,
        classification='passage',
        legislative_session=session,
        bill=bill_id,
        bill_chamber='upper' if bill_id[0] == 'S' else 'lower'
    )
    vote_event.pupa_id = url
    vote_event.set_count('yes', len(vote_record['yes']))
    vote_event.set_count('no', len(vote_record['no']))
    vote_event.set_count('excused', len(vote_record['excused']))
    vote_event.set_count('absent', len(vote_record['absent']))
    vote_event.set_count('other', len(vote_record['other']))
    for vote_type in ['yes', 'no', 'excused', 'absent', 'other']:
        for voter in vote_record[vote_type]:
            vote_event.vote(vote_type, voter)

    vote_event.add_source(url)
    return vote_event
Ejemplo n.º 2
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r'Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)', text)
        yes, no = int(votes[0][0]), int(votes[0][1])

        vtype = 'other'
        for regex, type in motion_classifiers.items():
            if re.match(regex, text):
                vtype = type
                break

        v = VoteEvent(
            chamber=chamber,
            start_date=TIMEZONE.localize(date),
            motion_text=text,
            result='pass' if yes > no else 'fail',
            classification=vtype,
            bill=bill,
        )
        v.set_count('yes', yes)
        v.set_count('no', no)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if 'av' in url:
                self.add_house_votes(v, url)
            elif 'sv' in url:
                self.add_senate_votes(v, url)

        return v
Ejemplo n.º 3
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = Vote(
            chamber='upper',
            start_date=date.strftime("%Y-%m-%d"),
            motion_text='Passage',
            # setting 'fail' for now.
            result='fail',
            classification='passage',
            bill=bill
        )
        vote.add_source(url)

        text = convert_pdf(filename, 'text').decode('utf-8')
        os.remove(filename)

        if re.search('Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+', text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url, date)
            return

        data = re.split(r'(Yea|Nay|Absent)s?:', text)[::-1]
        data = filter(None, data)
        keymap = dict(yea='yes', nay='no')
        actual_vote = collections.defaultdict(int)
        vote_count = {
            'yes': 0,
            'no': 0,
            'other': 0
        }
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), 'other')
            values = data.pop()
            for name in re.split(r'(?:[\s,]+and\s|[\s,]{2,})', values):
                if name.lower().strip() == 'none.':
                    continue
                name = name.replace('..', '')
                name = re.sub(r'\.$', '', name)
                name = name.strip('-1234567890 \n')
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = 'pass' if vote_count['yes'] > (vote_count['no'] +
                                                     vote_count['other']) else 'fail'

        yield vote
Ejemplo n.º 4
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json['amendmentNumber']:
            motion = '{}: {}'.format(
                vote_json['amendmentNumber'], vote_json['action'])
        else:
            motion = vote_json['action']

        result = 'pass' if vote_json['yesVotesCount'] > vote_json['noVotesCount'] else 'fail'

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json['chamber']],
            start_date=self.parse_local_date(vote_json['voteDate']),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification='other',
        )

        v.set_count(option='yes', value=vote_json['yesVotesCount'])
        v.set_count('no', vote_json['noVotesCount'])
        v.set_count('absent', vote_json['absentVotesCount'])
        v.set_count('excused', vote_json['excusedVotesCount'])
        v.set_count('other', vote_json['conflictVotesCount'])

        for name in vote_json['yesVotes'].split(','):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json['noVotes'].split(','):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json['absentVotes'].split(','):
            if name.strip():
                v.vote(option="absent",
                       voter=name)

        for name in vote_json['excusedVotes'].split(','):
            if name.strip():
                v.vote(option="excused",
                       voter=name)

        for name in vote_json['conflictVotes'].split(','):
            if name.strip():
                v.vote(option="other",
                       voter=name)

        source_url = 'http://lso.wyoleg.gov/Legislation/{}/{}'.format(
            session, vote_json['billNumber'])
        v.add_source(source_url)

        yield v
Ejemplo n.º 5
0
    def scrape_vote(self, bill, date, motion, url):
        try:
            page = self.get(url).text
            if 'not yet official' in page:
                # Sometimes they link to vote pages before they go live
                pass

            else:
                page = lxml.html.fromstring(page)

                if url.endswith('Senate'):
                    actor = 'upper'
                else:
                    actor = 'lower'

                votevals = ['yes', 'no', 'not voting',  'other']
                count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
                yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
                no_count = int(page.xpath(count_path % "Nays").split()[-1])
                not_voting_count = int(page.xpath(count_path % "Non Voting").split()[-1])
                other_count = int(page.xpath(count_path % "Present").split()[-1])
                passed = yes_count > no_count + not_voting_count + other_count
                vote = VoteEvent(start_date='2017-03-04', motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 classification='passage',
                                 chamber=actor,
                                 bill=bill)
                try:
                    excused_count = int(page.xpath(count_path % "Excused").split()[-1])
                    vote.set_count('excused', excused_count)
                    votevals.append('excused')
                except:
                    pass
                vote.set_count('yes', yes_count)
                vote.set_count('no', no_count)
                vote.set_count('not voting', not_voting_count)
                vote.set_count('other', other_count)
                vote.add_source(url)

                xpath = (
                    '//*[contains(@class, "ms-standardheader")]/'
                    'following-sibling::table')
                divs = page.xpath(xpath)

                for (voteval, div) in zip(votevals, divs):
                    for a in div.xpath('.//a'):
                        name = a.text_content().strip()
                        if not name:
                            continue
                        else:
                            vote.vote(voteval, name)
                yield vote
        except:
            # sometiems the link is there but is dead
            pass
Ejemplo n.º 6
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        resp = self.get(vote_url)
        html = resp.text

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if resp.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return

        vote_count = doc.xpath(".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date()
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" % motion)
            return

        vote = VoteEvent(chamber='lower', start_date=date, motion_text=motion,
                         result='pass' if yeas > nays else 'fail',
                         classification='passage',
                         legislative_session=session, bill=bill_id,
                         bill_chamber=chamber)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        yield vote
Ejemplo n.º 7
0
def test_full_vote_event():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    j.legislative_sessions.create(name='1900', identifier='1900')
    sp1 = ScrapePerson('John Smith', primary_org='lower')
    sp2 = ScrapePerson('Adam Smith', primary_org='lower')
    org = ScrapeOrganization(name='House', classification='lower')
    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org._id)
    vote_event = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
                                 start_date='1900-04-01', classification='passage:bill',
                                 result='pass', bill_chamber='lower', bill='HB 1',
                                 organization=org._id)
    vote_event.set_count('yes', 20)
    vote_event.yes('John Smith')
    vote_event.no('Adam Smith')

    oi = OrganizationImporter('jid')
    oi.import_data([org.as_dict()])

    pi = PersonImporter('jid')
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter('jid', pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter('jid', oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter('jid', pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ['passage:bill']
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == 'yes'
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == 'John Smith':
            assert v.option == 'yes'
            assert v.voter == Person.objects.get(name='John Smith')
        else:
            assert v.option == 'no'
            assert v.voter == Person.objects.get(name='Adam Smith')
Ejemplo n.º 8
0
    def scrape_votes(self, bill, page):
        base_url = 'https://apps.azleg.gov/api/BillStatusFloorAction'
        for header in page['FloorHeaders']:
            params = {
                'billStatusId': page['BillId'],
                'billStatusActionId': header['BillStatusActionId'],
                'includeVotes': 'true',
            }
            resp = self.get(base_url, params=params)
            actions = json.loads(resp.content.decode('utf-8'))

            for action in actions:
                if action['Action'] == 'No Action':
                    continue
                action_date = datetime.datetime.strptime(action['ReportDate'], '%Y-%m-%dT%H:%M:%S')
                vote = VoteEvent(
                    chamber={
                        'S': 'upper',
                        'H': 'lower',
                    }[header['LegislativeBody']],
                    motion_text=action['Action'],
                    classification='passage',
                    result=(
                        'pass'
                        if action['UnanimouslyAdopted'] or action['Ayes'] > action['Nays']
                        else 'fail'
                    ),
                    start_date=action_date.strftime('%Y-%m-%d'),
                    bill=bill,
                )
                vote.add_source(resp.url)
                vote.set_count('yes', action['Ayes'] or 0)
                vote.set_count('no', action['Nays'] or 0)
                vote.set_count('other', (action['Present'] or 0))
                vote.set_count('absent', (action['Absent'] or 0))
                vote.set_count('excused', (action['Excused'] or 0))
                vote.set_count('not voting', (action['NotVoting'] or 0))

                for v in action['Votes']:
                    vote_type = {
                        'Y': 'yes',
                        'N': 'no',
                    }.get(v['Vote'], 'other')
                    vote.vote(vote_type, v['Legislator']['FullName'])
                vote.pupa_id = resp.url+str(action['ReferralNumber'])
                yield vote
Ejemplo n.º 9
0
    def parse_vote(self, bill, link):
        member_doc = lxml.html.fromstring(self.get(link).text)
        motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
        opinions = member_doc.xpath("//div[@id='main_content']/h3/text()")
        if len(opinions) > 0:
            temp = opinions[0].split()
            vote_chamber = temp[0]
            vote_date = datetime.datetime.strptime(temp[-1], '%m/%d/%Y')
            vote_status = " ".join(temp[2:-2])
            vote_status = vote_status if vote_status.strip() else motion[0]
            vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'

            for i in opinions:
                try:
                    count = int(i[i.find("(") + 1:i.find(")")])
                except:
                    pass
                if "yea" in i.lower():
                    yes_count = count
                elif "nay" in i.lower():
                    no_count = count
                elif "present" in i.lower():
                    p_count = count
                elif "absent" in i.lower():
                    a_count = count
            vote = VoteEvent(
                bill=bill,
                start_date=vote_date.strftime('%Y-%m-%d'),
                chamber=vote_chamber,
                motion_text=vote_status,
                result='pass' if yes_count > no_count else 'fail',
                classification='passage',
            )
            vote.pupa_id = link

            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('abstain', p_count)
            vote.set_count('absent', a_count)

            vote.add_source(link)

            a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
            for i in range(1, len(a_links)):
                if i <= yes_count:
                    vote.vote('yes', re.sub(',', '', a_links[i]).split()[0])
                elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
                    vote.vote('no', re.sub(',', '', a_links[i]).split()[0])
                else:
                    vote.vote('other', re.sub(',', '', a_links[i]).split()[0])
            yield vote
        else:
            self.warning("No Votes for: %s", link)
Ejemplo n.º 10
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url, uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == 'Votes:':
            # New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")[:3]

        def proc_block(obj, typ):
            if obj is None:
                return {
                    "type": None,
                    "count": None,
                    "votes": []
                }
            votes = []
            for vote in obj.xpath(".//br"):
                if vote.tail:
                    vote = vote.tail.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {
                "type": typ,
                "count": count,
                "votes": votes
            }

        vote_dict = {
            "yes": proc_block(yes, 'yes'),
            "no": proc_block(no, 'no'),
            "other": proc_block(other, 'other'),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0
        print(motion)
        vote = Vote(chamber=actor,
                    start_date=date,
                    motion_text=motion,
                    identifier=str(uniqid),
                    result='pass' if (yes_count > no_count) else 'fail',
                    classification='passage',
                    bill=bill)
        vote.extras = {'_vote_id': uniqid}
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                vote.vote(key, voter)

        yield vote
Ejemplo n.º 11
0
    def scrape_vote(self, bill, motion, url):
        page = self.get(url, retry_on_404=True).text
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = VoteEvent(
            chamber=chamber,
            start_date=date.strftime('%Y-%m-%d'),
            motion_text=motion,
            result='pass' if outcome == 'PREVAILS' else 'fail',
            classification='passage',
            bill=bill,
        )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.vote('yes', name)
            elif vtype == 'N':
                vote.vote('no', name)
            elif vtype == 'X' or vtype == 'E':
                vote.vote('other', name)

        yield vote
Ejemplo n.º 12
0
def record_votes(root, session, chamber):
    for el in root.xpath('//div{}'.format(''.join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text='passage' if mv.passed else 'other',
            result='pass' if mv.passed else 'fail',
            classification='passage' if mv.passed else 'other',
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber
        )

        v.set_count('yes', mv.yeas or 0)
        v.set_count('no', mv.nays or 0)
        v.set_count('not voting', mv.present or 0)

        for each in mv.votes['yeas']:
            v.yes(each)
        for each in mv.votes['nays']:
            v.no(each)
        for each in mv.votes['present']:
            v.vote('not voting', each)
        for each in mv.votes['absent']:
            v.vote('absent', each)

        yield v
Ejemplo n.º 13
0
    def parse_roll_call(self, bill, link, chamber, date):
        url = link.attrib['href']
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        xpath = 'string(//div[@class="Column-OneFourth"]/div[3])'
        motion = page.xpath(xpath).strip()
        motion = re.sub(r'\s+', ' ', motion)

        if motion == 'FP':
            motion = 'FINAL PASSAGE'

        if motion == 'FINAL PASSAGE':
            type = 'passage'
        elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
            type = 'amendment'
        else:
            type = 'other'
            motion = link.text_content()

        yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
        nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
        lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
        nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
        other = lve + nv

        vote = VoteEvent(
            chamber=chamber,
            start_date=tz.localize(date),
            motion_text=motion,
            classification=type,
            result='pass' if yeas > (nays + other) else 'fail',
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.set_count('other', other)

        for div in page.xpath('//*[contains(@class, "RollCalls-Vote")]'):
            name = div.text_content().strip()
            name = re.sub(r'^[\s,]+', '', name)
            name = re.sub(r'[\s,]+$', '', name)
            class_attr = div.attrib['class'].lower()
            if 'yea' in class_attr:
                voteval = 'yes'
            elif 'nay' in class_attr:
                voteval = 'no'
            elif 'nvote' in class_attr:
                voteval = 'other'
            elif 'lve' in class_attr:
                voteval = 'other'
            else:
                msg = 'Unrecognized vote val: %s' % class_attr
                raise Exception(msg)
            vote.vote(voteval, name)

        return vote
Ejemplo n.º 14
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId'
        form = {
            'rollCallId': vote_id,
            'sort': '',
            'group': '',
            'filter': '',
        }

        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page['Model']
            vote_chamber = self.chamber_map[roll['ChamberName']]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(roll['TakenAtDateTime'],
                                             '%m/%d/%y %I:%M %p').strftime('%Y-%m-%d')

            # TODO: What does this code mean?
            vote_motion = roll['RollCallVoteType']

            vote_passed = 'pass' if roll['RollCallStatus'] == 'Passed' else 'fail'
            other_count = (int(roll['NotVotingCount']) +
                           int(roll['VacantVoteCount']) +
                           int(roll['AbsentVoteCount']) +
                           int(roll['ConflictVoteCount'])
                           )
            vote = Vote(chamber=vote_chamber,
                        start_date=vote_date,
                        motion_text=vote_motion,
                        result=vote_passed,
                        classification='other',
                        bill=bill.identifier,
                        legislative_session=session
                        )
            vote.add_source(vote_url)
            vote.set_count('yes', roll['YesVoteCount'])
            vote.set_count('no', roll['NoVoteCount'])
            vote.set_count('other', other_count)

            for row in roll['AssemblyMemberVotes']:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row['ShortName'])]
                    name = voter['DisplayName']
                except KeyError:
                    self.warning('could not find legislator short name %s',
                                 row['ShortName'])
                    name = row['ShortName']
                if row['SelectVoteTypeCode'] == 'Y':
                    vote.yes(name)
                elif row['SelectVoteTypeCode'] == 'N':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            # bill.add_vote_event(vote)
            yield vote
Ejemplo n.º 15
0
    def handle_page(self):
        (date, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        totals = self.doc.xpath('//table//table')[-1].text_content()
        totals = re.sub(r'(?mu)\s+', " ", totals).strip()
        (yes_count, no_count, other_count) = [
            int(x) for x in re.search(
                r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+'
                r'Total Missed:\s+(\d+)', totals).groups()
        ]
        result = 'pass' if yes_count > no_count else 'fail'

        (committee, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()')
        (action, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath('//table//table//table//td'):
            if not member_vote.text_content().strip():
                continue

            (member, ) = member_vote.xpath('span[2]//text()')
            (member_vote, ) = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote
Ejemplo n.º 16
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data['voteDate'],
                                                   '%Y-%m-%d')

        if vote_data['voteType'] == 'FLOOR':
            motion = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            motion = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote = VoteEvent(
            chamber='upper',
            start_date=vote_datetime.strftime('%Y-%m-%d'),
            motion_text=motion,
            classification='passage',
            result='fail',
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data['memberVotes']['items']

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.vote('other', legislator['fullName'])
                    other_count += 1

        vote.result = 'pass' if yes_count > no_count else 'fail'
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        return vote
Ejemplo n.º 17
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data["voteDate"],
                                                   "%Y-%m-%d")

        if vote_data["voteType"] == "FLOOR":
            motion = "Floor Vote"
        elif vote_data["voteType"] == "COMMITTEE":
            motion = "{} Vote".format(vote_data["committee"]["name"])
        else:
            raise ValueError("Unknown vote type encountered.")

        vote = VoteEvent(
            chamber="upper",
            start_date=vote_datetime.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="fail",
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data["memberVotes"]["items"]

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if "items" in vote_rolls.get("AYE", {}):
            for legislator in vote_rolls["AYE"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        if "items" in vote_rolls.get("AYEWR", {}):
            for legislator in vote_rolls["AYEWR"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        # Count all nay votes.
        if "items" in vote_rolls.get("NAY", {}):
            for legislator in vote_rolls["NAY"]["items"]:
                vote.no(legislator["fullName"])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ("EXC", "ABS", "ABD")
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]["items"]:
                    vote.vote("other", legislator["fullName"])
                    other_count += 1

        vote.result = "pass" if yes_count > no_count else "fail"
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        return vote
Ejemplo n.º 18
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(
            vote_data['voteDate'], '%Y-%m-%d')

        if vote_data['voteType'] == 'FLOOR':
            motion = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            motion = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote = VoteEvent(
            chamber='upper',
            start_date=vote_datetime.strftime('%Y-%m-%d'),
            motion_text=motion,
            classification='passage',
            result='fail',
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data['memberVotes']['items']

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.vote('other', legislator['fullName'])
                    other_count += 1

        vote.result = 'pass' if yes_count > no_count else 'fail'
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        return vote
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
    passed = len(vote_record['yes']) > len(vote_record['no'])
    vote_event = VoteEvent(
        result='pass' if passed else 'fail',
        chamber=chamber,
        start_date=vote_record['date'].strftime('%Y-%m-%d'),
        motion_text=motion_text,
        classification='passage',
        legislative_session=session,
        bill=bill_id,
        bill_chamber='upper' if bill_id[0] is 'S' else 'lower'
    )
    vote_event.pupa_id = url
    vote_event.set_count('yes', len(vote_record['yes']))
    vote_event.set_count('no', len(vote_record['no']))
    vote_event.set_count('excused', len(vote_record['excused']))
    vote_event.set_count('absent', len(vote_record['absent']))
    vote_event.set_count('other', len(vote_record['other']))
    for vote_type in ['yes', 'no', 'excused', 'absent', 'other']:
        for voter in vote_record[vote_type]:
            vote_event.vote(vote_type, voter)

    vote_event.add_source(url)
    return vote_event
Ejemplo n.º 20
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session, url, bill_chamber):
        for action in action_table.xpath('*')[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime('%Y-%m-%d')
            actor = action[1].text_content().upper()
            string = action[2].text_content()
            actor = {
                "S": "upper",
                "H": "lower",
                "D": "legislature",  # "Data Systems",
                "$": "Appropriation measure",
                "CONAM": "Constitutional Amendment"
            }[actor]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]['name']
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string, date, chamber=actor,
                                  classification=act_type)
            for committee in real_committees:
                act.add_related_entity(name=committee, entity_type="organization")
            vote = self.parse_vote(string)
            if vote:
                v, motion = vote
                vote = VoteEvent(start_date=date,
                                 chamber=actor,
                                 bill=bill_id,
                                 bill_chamber=bill_chamber,
                                 legislative_session=session,
                                 motion_text=motion,
                                 result='pass' if 'passed' in string.lower() else 'fail',
                                 classification='passage')
                vote.add_source(url)
                vote.set_count('yes', int(v['n_yes'] or 0))
                vote.set_count('no', int(v['n_no'] or 0))
                vote.set_count('not voting', int(v['n_excused'] or 0))
                for voter in split_specific_votes(v['yes']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['yes_resv']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['no']):
                    vote.no(voter)
                for voter in split_specific_votes(v['excused']):
                    vote.vote('not voting', voter)

                yield vote
Ejemplo n.º 21
0
    def handle_page(self):
        date, = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        yes_count = int(
            self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
        no_count = int(
            self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
        other_count = int(
            self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
        result = 'pass' if yes_count > no_count else 'fail'

        committee, = self.doc.xpath(
            '//span[contains(@id, "lblCommittee")]/text()')
        action, = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath(
                '//ul[contains(@class, "vote-list")]/li'):
            if not member_vote.text_content().strip():
                continue

            member, = member_vote.xpath('span[2]//text()')
            member_vote, = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote
Ejemplo n.º 22
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url,
                              uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == "Votes:":
            # New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, _, no, _, other = rows.xpath(".//td")[:5]

        def proc_block(obj, typ):
            if obj is None:
                return {"type": None, "count": None, "votes": []}
            votes = []
            for vote in obj.xpath("./text()"):
                if vote.strip():
                    vote = vote.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {"type": typ, "count": count, "votes": votes}

        vote_dict = {
            "yes": proc_block(yes, "yes"),
            "no": proc_block(no, "no"),
            "other": proc_block(other, "other"),
        }

        yes_count = vote_dict["yes"]["count"]
        no_count = vote_dict["no"]["count"] or 0
        other_count = vote_dict["other"]["count"] or 0
        vote = Vote(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            identifier=str(uniqid),
            result="pass" if (yes_count > no_count) else "fail",
            classification="passage",
            bill=bill,
        )
        vote.extras = {"_vote_id": uniqid}
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)
        for key in vote_dict:
            for voter in vote_dict[key]["votes"]:
                vote.vote(key, voter)

        yield vote
Ejemplo n.º 23
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url,
                              uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == 'Votes:':
            # New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")[:3]

        def proc_block(obj, typ):
            if obj is None:
                return {"type": None, "count": None, "votes": []}
            votes = []
            for vote in obj.xpath(".//br"):
                if vote.tail:
                    vote = vote.tail.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {"type": typ, "count": count, "votes": votes}

        vote_dict = {
            "yes": proc_block(yes, 'yes'),
            "no": proc_block(no, 'no'),
            "other": proc_block(other, 'other'),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0
        print(motion)
        vote = Vote(chamber=actor,
                    start_date=date,
                    motion_text=motion,
                    identifier=str(uniqid),
                    result='pass' if (yes_count > no_count) else 'fail',
                    classification='passage',
                    bill=bill)
        vote.extras = {'_vote_id': uniqid}
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                vote.vote(key, voter)

        yield vote
Ejemplo n.º 24
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r"YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)"
            r"(.*)ABSENT( OR NOT VOTING)? -?\s?"
            r"(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
        else:
            vote_chamber = ""

        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=motion,
            result="pass" if passed else "fail",
            identifier=str(uniqid),
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        yes_votes = re.split(r"\s{2,}", match.group(2).strip())
        no_votes = re.split(r"\s{2,}", match.group(4).strip())
        other_votes = re.split(r"\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote("other", other)

        yield vote
Ejemplo n.º 25
0
    def parse_vote(self, chamber, bill, row, action_text, action_date, url):
        yes = int(
            row.xpath(
                './/div[label[contains(text(), "A Favor")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        no = int(
            row.xpath(
                './/div[label[contains(text(), "En Contra")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        abstain = int(
            row.xpath(
                './/div[label[contains(text(), "Abstenido")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])
        absent = int(
            row.xpath(
                './/div[label[contains(text(), "Ausente")]]/span[contains(@class,"smalltxt")]/text()'
            )[0])

        vote_chamber = self.parse_vote_chamber(chamber, action_text)

        classification = "passage" if u"Votación Final" in action_text else "other"

        vote = Vote(
            chamber=vote_chamber,
            start_date=action_date,
            motion_text=action_text,
            result="pass" if (yes > no) else "fail",
            bill=bill,
            classification=classification,
        )
        vote.add_source(url)
        vote.set_count("yes", yes)
        vote.set_count("no", no)
        vote.set_count("absent", absent)
        vote.set_count("abstain", abstain)

        # we don't want to add the attached vote PDF as a version,
        # so add it as a document
        # TODO: maybe this should be set as the source?
        self.parse_version(bill, row, is_document=True)

        yield vote
Ejemplo n.º 26
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = (
            "http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
            "GetRollCalls?billNumber=%s&biennium=%s" % (bill_num, self.biennium)
        )
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {"House": "lower", "Senate": "upper"}[agency]

            vote = Vote(
                chamber=chamber,
                start_date=date,
                motion_text="{} (#{})".format(motion, seq_no),
                result="pass" if yes_count > (no_count + other_count) else "fail",
                classification="other",
                bill=bill,
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("other", other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == "Yea":
                    vote.yes(name)
                elif vtype == "Nay":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
Ejemplo n.º 27
0
def viva_voce_votes(root, session, chamber):
    for el in root.xpath(u'//div[starts-with(., "All Members are deemed")]'):
        mv = MaybeViva(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(chamber=chamber,
                      start_date=None,
                      motion_text='passage' if mv.passed else 'other',
                      result='pass' if mv.passed else 'fail',
                      classification='passage' if mv.passed else 'other',
                      legislative_session=session[0:2],
                      bill=mv.bill_id,
                      bill_chamber=mv.chamber)

        v.set_count('yes', 0)
        v.set_count('no', 0)
        v.set_count('absent', 0)
        v.set_count('not voting', 0)

        yield v
Ejemplo n.º 28
0
    def handle_page(self):
        (date, ) = self.doc.xpath('//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()')
        date = datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'
                                          ).isoformat().replace('T', ' ')

        totals = self.doc.xpath('//table//table')[-1].text_content()
        totals = re.sub(r'(?mu)\s+', " ", totals).strip()
        (yes_count, no_count, other_count) = [int(x) for x in re.search(
            r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+'
            'Total Missed:\s+(\d+)', totals).groups()]
        result = 'pass' if yes_count > no_count else 'fail'

        (committee, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()')
        (action, ) = self.doc.xpath('//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(start_date=date,
                         bill=self.kwargs['bill'],
                         chamber='lower',
                         motion_text=motion,
                         result=result,
                         classification='committee',
                         )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath('//table//table//table//td'):
            if not member_vote.text_content().strip():
                continue

            (member, ) = member_vote.xpath('span[2]//text()')
            (member_vote, ) = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError("Unknown vote type found: {}".format(member_vote))

        yield vote
Ejemplo n.º 29
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath(".//span")
        motion = row.text.replace(u"\u00a0", " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = (
            spans[0].text_content().rsplit("-", 3))
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(("Absent", "Excused")):
                other_votes += self.get_names(span.tail)
        for key, val in {
                "adopted": "pass",
                "passed": "pass",
                "failed": "fail"
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            bill=bill_id,
            bill_chamber=bill_chamber,
            result=passed,
            classification="passage",
            legislative_session=session,
        )
        vote.add_source(source)
        vote.set_count("yes", int(yes_count))
        vote.set_count("no", int(no_count))
        vote.set_count("absent", int(other_count))
        for name in yes_votes:
            if name and name != "None":
                vote.yes(name)
        for name in no_votes:
            if name and name != "None":
                vote.no(name)
        for name in other_votes:
            if name and name != "None":
                vote.vote("absent", name)
        yield vote
Ejemplo n.º 30
0
    def scrape_chamber_votes(self, chamber, session):
        url = {
            "upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
            "lower": "%s/%s" % (RI_URL_BASE, "HVotes")
        }[chamber]
        action = "%s/%s" % (url, "votes.asp")
        dates = self.get_vote_dates(url, session)
        for date in dates:
            votes = self.parse_vote_page(self.post_to(action, date), url,
                                         session)
            for vote_dict in votes:
                for vote in vote_dict.values():
                    count = vote['count']
                    chamber = {
                        "H": "lower",
                        "S": "upper"
                    }[vote['meta']['chamber']]

                    try:
                        bill_id = self._bill_id_by_type[(chamber,
                                                         vote['meta']['bill'])]
                    except:
                        self.warning('no such bill_id %s %s', chamber,
                                     vote['meta']['bill'])
                        continue

                    v = VoteEvent(
                        chamber=chamber,
                        start_date=vote['time'].strftime('%Y-%m-%d'),
                        motion_text=vote['meta']['extra']['motion'],
                        result='pass' if count['passage'] else 'fail',
                        classification='passage',
                        legislative_session=session,
                        bill=bill_id,
                        bill_chamber=chamber,
                    )
                    v.set_count('yes', int(count['YEAS']))
                    v.set_count('no', int(count['NAYS']))
                    v.set_count('other', int(count['NOT VOTING']))
                    v.add_source(vote['source'])
                    v.pupa_id = vote['source']

                    for vt in vote['votes']:
                        key = {
                            'Y': 'yes',
                            'N': 'no',
                        }.get(vt['vote'], 'other')
                        v.vote(key, vt['name'])
                    yield v
Ejemplo n.º 31
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                             '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                             '(\d+)(.*)',
                             re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
        else:
            vote_chamber = ''

        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    identifier=str(uniqid),
                    classification='passage',
                    bill=bill)
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        yes_votes = re.split('\s{2,}', match.group(2).strip())
        no_votes = re.split('\s{2,}', match.group(4).strip())
        other_votes = re.split('\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote('other', other)

        yield vote
Ejemplo n.º 32
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
            r'(.*)ABSENT( OR NOT VOTING)? -?\s?'
            r'(\d+)(.*)', re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
        else:
            vote_chamber = ''

        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    identifier=str(uniqid),
                    classification='passage',
                    bill=bill)
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        yes_votes = re.split(r'\s{2,}', match.group(2).strip())
        no_votes = re.split(r'\s{2,}', match.group(4).strip())
        other_votes = re.split(r'\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote('other', other)

        yield vote
Ejemplo n.º 33
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = ("http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
               "GetRollCalls?billNumber=%s&biennium=%s" % (
                   bill_num, self.biennium))
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(
                xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(
                xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {'House': 'lower', 'Senate': 'upper'}[agency]

            vote = Vote(chamber=chamber, start_date=date,
                        motion_text='{} (#{})'.format(motion, seq_no),
                        result='pass' if yes_count > (no_count + other_count) else 'fail',
                        classification='other', bill=bill)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == 'Yea':
                    vote.yes(name)
                elif vtype == 'Nay':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            yield vote
Ejemplo n.º 34
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = ("http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
               "GetRollCalls?billNumber=%s&biennium=%s" %
               (bill_num, self.biennium))
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {'House': 'lower', 'Senate': 'upper'}[agency]

            vote = Vote(chamber=chamber,
                        start_date=date,
                        motion_text=motion,
                        result='pass' if yes_count >
                        (no_count + other_count) else 'fail',
                        classification='other',
                        bill=bill)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == 'Yea':
                    vote.yes(name)
                elif vtype == 'Nay':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            yield vote
Ejemplo n.º 35
0
    def parse_committee_votes(self, bill, url):
        bill.add_source(url)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        chamber = ('upper'
                   if 'Senate' in doc.xpath('string(//h1)') else 'lower')
        committee = tuple(doc.xpath('//h2')[0].itertext())[-2].strip()
        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
                date = link.xpath('../../td')[0].text_content()
                try:
                    date = datetime.datetime.strptime(date, fmt)
                except ValueError:
                    continue
                break

            # Motion
            motion = link.text_content().split(' - ')[-1].strip()
            motion = 'Committee vote (%s): %s' % (committee, motion)

            # Roll call
            vote_url = link.attrib['href']
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            vote = VoteEvent(
                chamber=chamber,
                start_date=tz.localize(date),
                motion_text=motion,
                classification='other',
                result='pass' if rollcall['passed'] else 'fail',
                bill=bill,
            )
            vote.pupa_id = vote_url
            vote.set_count('yes', rollcall['yes_count'])
            vote.set_count('no', rollcall['no_count'])
            vote.set_count('other', rollcall['other_count'])

            for voteval in ('yes', 'no', 'other'):
                for name in rollcall.get(voteval + '_votes', []):
                    vote.vote(voteval, name)

            vote.add_source(url)
            vote.add_source(vote_url)

            yield vote
Ejemplo n.º 36
0
def viva_voce_votes(root, session, chamber):
    for el in root.xpath(u'//div[starts-with(., "All Members are deemed")]'):
        mv = MaybeViva(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else "other",
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", 0)
        v.set_count("no", 0)
        v.set_count("absent", 0)
        v.set_count("not voting", 0)

        yield v
Ejemplo n.º 37
0
def viva_voce_votes(root, session, chamber):
    for el in root.xpath(u'//div[starts-with(., "All Members are deemed")]'):
        mv = MaybeViva(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text='passage' if mv.passed else 'other',
            result='pass' if mv.passed else 'fail',
            classification='passage' if mv.passed else 'other',
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber
        )

        v.set_count('yes', 0)
        v.set_count('no', 0)
        v.set_count('absent', 0)
        v.set_count('not voting', 0)

        yield v
Ejemplo n.º 38
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content(
        ).rsplit('-', 3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {
                'adopted': 'pass',
                'passed': 'pass',
                'failed': 'fail'
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(chamber=actor,
                         start_date=date,
                         motion_text=motion,
                         bill=bill_id,
                         bill_chamber=bill_chamber,
                         result=passed,
                         classification="passage",
                         legislative_session=session)
        vote.add_source(source)
        vote.set_count('yes', int(yes_count))
        vote.set_count('no', int(no_count))
        vote.set_count('absent', int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.vote('absent', name)
        yield vote
Ejemplo n.º 39
0
    def parse_committee_votes(self, bill, url):
        bill.add_source(url)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        chamber = ('upper' if 'Senate' in doc.xpath('string(//h1)') else 'lower')
        committee = tuple(doc.xpath('//h2')[0].itertext())[-2].strip()
        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
                date = link.xpath('../../td')[0].text_content()
                try:
                    date = datetime.datetime.strptime(date, fmt)
                except ValueError:
                    continue
                break

            # Motion
            motion = link.text_content().split(' - ')[-1].strip()
            motion = 'Committee vote (%s): %s' % (committee, motion)

            # Roll call
            vote_url = link.attrib['href']
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            vote = VoteEvent(
                chamber=chamber,
                start_date=tz.localize(date),
                motion_text=motion,
                classification='other',
                result='pass' if rollcall['passed'] else 'fail',
                bill=bill,
            )
            vote.pupa_id = vote_url
            vote.set_count('yes', rollcall['yes_count'])
            vote.set_count('no', rollcall['no_count'])
            vote.set_count('other', rollcall['other_count'])

            for voteval in ('yes', 'no', 'other'):
                for name in rollcall.get(voteval + '_votes', []):
                    vote.vote(voteval, name)

            vote.add_source(url)
            vote.add_source(vote_url)

            yield vote
Ejemplo n.º 40
0
    def parse_committee_votes(self, bill, url):
        bill.add_source(url)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        chamber = "upper" if "Senate" in doc.xpath("string(//h1)") else "lower"
        committee = tuple(doc.xpath("//h2")[0].itertext())[-2].strip()
        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
                date = link.xpath("../../td")[0].text_content()
                try:
                    date = datetime.datetime.strptime(date, fmt)
                except ValueError:
                    continue
                break

            # Motion
            motion = link.text_content().split(" - ")[-1].strip()
            motion = "Committee vote (%s): %s" % (committee, motion)

            # Roll call
            vote_url = link.attrib["href"]
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            vote = VoteEvent(
                chamber=chamber,
                start_date=tz.localize(date),
                motion_text=motion,
                classification="other",
                result="pass" if rollcall["passed"] else "fail",
                bill=bill,
            )
            vote.pupa_id = vote_url
            vote.set_count("yes", rollcall["yes_count"])
            vote.set_count("no", rollcall["no_count"])
            vote.set_count("other", rollcall["other_count"])

            for voteval in ("yes", "no", "other"):
                for name in rollcall.get(voteval + "_votes", []):
                    vote.vote(voteval, name)

            vote.add_source(url)
            vote.add_source(vote_url)

            yield vote
Ejemplo n.º 41
0
    def scrape_chamber_votes(self, chamber, session):
        url = {
            "upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
            "lower": "%s/%s" % (RI_URL_BASE, "HVotes"),
        }[chamber]
        action = "%s/%s" % (url, "votes.asp")
        dates = self.get_vote_dates(url, session)
        for date in dates:
            votes = self.parse_vote_page(self.post_to(action, date), url,
                                         session)
            for vote_dict in votes:
                for vote in vote_dict.values():
                    count = vote["count"]
                    chamber = {
                        "H": "lower",
                        "S": "upper"
                    }[vote["meta"]["chamber"]]

                    try:
                        bill_id = self._bill_id_by_type[(chamber,
                                                         vote["meta"]["bill"])]
                    except KeyError:
                        self.warning("no such bill_id %s %s", chamber,
                                     vote["meta"]["bill"])
                        continue

                    v = VoteEvent(
                        chamber=chamber,
                        start_date=vote["time"].strftime("%Y-%m-%d"),
                        motion_text=vote["meta"]["extra"]["motion"],
                        result="pass" if count["passage"] else "fail",
                        classification="passage",
                        legislative_session=session,
                        bill=bill_id,
                        bill_chamber=chamber,
                    )
                    v.set_count("yes", int(count["YEAS"]))
                    v.set_count("no", int(count["NAYS"]))
                    v.set_count("other", int(count["NOT VOTING"]))
                    v.add_source(vote["source"])
                    v.pupa_id = vote["source"]

                    for vt in vote["votes"]:
                        key = {"Y": "yes", "N": "no"}.get(vt["vote"], "other")
                        v.vote(key, vt["name"])
                    yield v
Ejemplo n.º 42
0
    def scrape_chamber_votes(self, chamber, session):
        url = {
            "upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
            "lower": "%s/%s" % (RI_URL_BASE, "HVotes")
        }[chamber]
        action = "%s/%s" % (url, "votes.asp")
        dates = self.get_vote_dates(url, session)
        for date in dates:
            votes = self.parse_vote_page(self.post_to(action, date), url, session)
            for vote_dict in votes:
                for vote in vote_dict.values():
                    count = vote['count']
                    chamber = {
                        "H": "lower",
                        "S": "upper"
                    }[vote['meta']['chamber']]

                    try:
                        bill_id = self._bill_id_by_type[(chamber, vote['meta']['bill'])]
                    except KeyError:
                        self.warning('no such bill_id %s %s', chamber, vote['meta']['bill'])
                        continue

                    v = VoteEvent(
                        chamber=chamber,
                        start_date=vote['time'].strftime('%Y-%m-%d'),
                        motion_text=vote['meta']['extra']['motion'],
                        result='pass' if count['passage'] else 'fail',
                        classification='passage',
                        legislative_session=session,
                        bill=bill_id,
                        bill_chamber=chamber,
                    )
                    v.set_count('yes', int(count['YEAS']))
                    v.set_count('no', int(count['NAYS']))
                    v.set_count('other', int(count['NOT VOTING']))
                    v.add_source(vote['source'])
                    v.pupa_id = vote['source']

                    for vt in vote['votes']:
                        key = {
                            'Y': 'yes',
                            'N': 'no',
                        }.get(vt['vote'], 'other')
                        v.vote(key, vt['name'])
                    yield v
Ejemplo n.º 43
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber, source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-', 3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {'adopted': 'pass', 'passed': 'pass', 'failed': 'fail'}.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(chamber=actor,
                         start_date=date,
                         motion_text=motion,
                         bill=bill_id,
                         bill_chamber=bill_chamber,
                         result=passed,
                         classification="passage",
                         legislative_session=session)
        vote.add_source(source)
        vote.set_count('yes', int(yes_count))
        vote.set_count('no', int(no_count))
        vote.set_count('absent', int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.vote('absent', name)
        yield vote
Ejemplo n.º 44
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result='pass' if self.passed() else 'fail',
            classification='passage',
            bill=self.bill,
        )
        v.set_count('yes', self.yes_count())
        v.set_count('no', self.no_count())
        v.set_count('other', self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote('other', voter)
        v.add_source(self.url)
        return v
Ejemplo n.º 45
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result='pass' if self.passed() else 'fail',
            classification='passage',
            bill=self.bill,
        )
        v.set_count('yes', self.yes_count())
        v.set_count('no', self.no_count())
        v.set_count('other', self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote('other', voter)
        v.add_source(self.url)
        return v
Ejemplo n.º 46
0
    def parse_vote(
        self, bill, journal_entry_number, action, act_chamber, act_date, url
    ):
        # html = self.get(url).text
        # doc = lxml.html.fromstring(html)
        yes = no = other = 0
        result = ""
        vote_counts = action.split()
        for vote_count in vote_counts:
            if re.match(r"[\D][\d]", vote_count):
                if "Y" in vote_count:
                    yes = int(vote_count[1:])
                elif "N" in vote_count:
                    no = int(vote_count[1:])
                elif "E" in vote_count or "A" in vote_count:
                    other += int(vote_count[1:])

        if "PASSED" in action:
            result = "pass"
        elif "FAILED" in action:
            result = "fail"
        else:
            result = "pass" if yes > no else "fail"

        vote = VoteEvent(
            bill=bill,
            start_date=act_date.strftime("%Y-%m-%d"),
            chamber=act_chamber,
            motion_text=action + " #" + journal_entry_number,
            result=result,
            classification="passage",
        )

        vote.set_count("yes", yes)
        vote.set_count("no", no)
        vote.set_count("other", other)
        vote.add_source(url)

        yield vote
Ejemplo n.º 47
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result="pass" if self.passed() else "fail",
            classification="passage",
            bill=self.bill,
        )
        v.pupa_id = self.url  # URL contains sequence number
        v.set_count("yes", self.yes_count())
        v.set_count("no", self.no_count())
        v.set_count("other", self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote("other", voter)
        v.add_source(self.url)
        return v
Ejemplo n.º 48
0
    def process_committee_vote(self, committee_action, bill):
        try:
            date = committee_action["ActionDate"]
            vote_info = committee_action["Vote"]

        except KeyError:
            self.logger.warning("Committee vote has no data. Skipping.")
            return
        date = self.date_format(date)

        other_count = 0
        for v in vote_info:
            vote_count = 0 if v["VoteCount"] == "" else int(v["VoteCount"])

            if v["VoteType"] == "Yes":
                yes_count = vote_count
            elif v["VoteType"] == "No":
                no_count = vote_count
            else:
                other_count += vote_count

        result = "fail"
        if yes_count > no_count:
            result = "pass"

        v = VoteEvent(
            chamber="legislature",
            start_date=date,
            motion_text="Committee Vote",
            result=result,
            classification="committee",
            bill=bill,
        )
        v.set_count("yes", yes_count)
        v.set_count("no", no_count)
        v.set_count("other", other_count)

        return v
Ejemplo n.º 49
0
    def process_committee_vote(self, committee_action, bill):
        try:
            date = committee_action["ActionDate"]
            vote_info = committee_action["Vote"]

        except KeyError:
            self.logger.warning("Committee vote has no data. Skipping.")
            return
        date = self.date_format(date)

        other_count = 0
        for v in vote_info:
            vote_count = 0 if v["VoteCount"] == "" else int(v["VoteCount"])

            if v["VoteType"] == "Yes":
                yes_count = vote_count
            elif v["VoteType"] == "No":
                no_count = vote_count
            else:
                other_count += vote_count

        result = 'fail'
        if yes_count > no_count:
            result = 'pass'

        v = VoteEvent(chamber='legislature',
                      start_date=date,
                      motion_text='Committee Vote',
                      result=result,
                      classification='committee',
                      bill=bill
                      )
        v.set_count('yes', yes_count)
        v.set_count('no', no_count)
        v.set_count('other', other_count)

        return v
Ejemplo n.º 50
0
    def process_committee_vote(self, committee_action, bill):
        try:
            date = committee_action["ActionDate"]
            vote_info = committee_action["Vote"]

        except KeyError:
            self.logger.warning("Committee vote has no data. Skipping.")
            return
        date = self.date_format(date)

        other_count = 0
        for v in vote_info:
            vote_count = 0 if v["VoteCount"] == "" else int(v["VoteCount"])

            if v["VoteType"] == "Yes":
                yes_count = vote_count
            elif v["VoteType"] == "No":
                no_count = vote_count
            else:
                other_count += vote_count

        result = 'fail'
        if yes_count > no_count:
            result = 'pass'

        v = VoteEvent(chamber='legislature',
                      start_date=date,
                      motion_text='Committee Vote',
                      result=result,
                      classification='committee',
                      bill=bill)
        v.set_count('yes', yes_count)
        v.set_count('no', no_count)
        v.set_count('other', other_count)

        return v
Ejemplo n.º 51
0
    def _process_votes(self, rollcalls, bill_id, original_chamber, session, proxy):
        result_types = {
            'FAILED': False,
            'DEFEATED': False,
            'PREVAILED': True,
            'PASSED': True,
            'SUSTAINED': True,
            'NOT SECONDED': False,
            'OVERRIDDEN': True,
            'ADOPTED': True,
        }

        for r in rollcalls:
            proxy_link = proxy["url"] + r["link"]
            (path, resp) = self.urlretrieve(proxy_link)
            text = convert_pdf(path, 'text').decode("utf-8")
            lines = text.split("\n")
            os.remove(path)

            chamber = "lower" if "house of representatives" in lines[0].lower() else "upper"
            date_parts = lines[1].strip().split()[-3:]
            date_str = " ".join(date_parts).title() + " " + lines[2].strip()

            vote_date = datetime.datetime.strptime(date_str, "%b %d, %Y %I:%M:%S %p")
            vote_date = pytz.timezone('America/Indiana/Indianapolis').localize(vote_date)
            vote_date = vote_date.isoformat()

            passed = None

            for res, val in result_types.items():
                # We check multiple lines now because the result of the
                # roll call vote as parsed can potentially be split.
                # PDF documents suck.
                for line in lines[3:5]:
                    if res in line.upper():
                        passed = val
                        break

            if passed is None:
                raise AssertionError("Missing bill passage type")

            motion = " ".join(lines[4].split()[:-2])
            try:
                yeas = int(lines[4].split()[-1])
                nays = int(lines[5].split()[-1])
                excused = int(lines[6].split()[-1])
                not_voting = int(lines[7].split()[-1])
            except ValueError:
                self.logger.warning("Vote format is weird, skipping")
                continue

            vote = VoteEvent(chamber=chamber,
                             legislative_session=session,
                             bill=bill_id,
                             bill_chamber=original_chamber,
                             start_date=vote_date,
                             motion_text=motion,
                             result="pass" if passed else "fail",
                             classification="passage")

            vote.set_count('yes', yeas)
            vote.set_count('no', nays)
            vote.set_count('excused', excused)
            vote.set_count('not voting', not_voting)
            vote.add_source(proxy_link)

            currently_counting = ""

            possible_vote_lines = lines[8:]
            for l in possible_vote_lines:
                l = l.replace("NOT\xc2\xa0VOTING", "NOT VOTING")
                l = l.replace("\xc2\xa0", " -")
                if "yea-" in l.lower().replace(" ", ""):
                    currently_counting = "yes"
                elif "nay-" in l.lower().replace(" ", ""):
                    currently_counting = "no"
                elif "excused-" in l.lower().replace(" ", ""):
                    currently_counting = "excused"
                elif "notvoting-" in l.lower().replace(" ", ""):
                    currently_counting = "not voting"
                elif currently_counting == "":
                    pass
                elif re.search(r'v\. \d\.\d', l):
                    # this gets rid of the version number
                    # which is often found at the bottom of the doc
                    pass
                else:
                    voters = l.split("  ")
                    for v in voters:
                        if v.strip():
                            vote.vote(currently_counting, v.strip())

            yield vote
Ejemplo n.º 52
0
    def parse_vote_pdf(self, vote_url, bill):

        filename, response = self.urlretrieve(vote_url)

        text = convert_pdf(filename, type="text").decode()
        lines = text.splitlines()

        if "Senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        date_string = lines[0].split("Calendar Date:")[1].strip()
        date = datetime.datetime.strptime(date_string, "%b %d, %Y %I:%M (%p)")

        page_index = None
        for index, line in enumerate(lines):
            if "Yeas" in line and "Nays" in line:
                page_index = index
                break

        vote_counts = 5 * [0]
        vote_types = ["yes", "no", "not voting", "excused", "absent"]

        if page_index:

            counts = re.split(r"\s{2,}", lines[page_index].strip())

            for index, count in enumerate(counts):
                number, string = count.split(" ", 1)
                number = int(number)
                vote_counts[index] = number
        else:
            raise ValueError("Vote Counts Not found at %s" % vote_url)

        passed = vote_counts[0] > vote_counts[1]

        # Consent calendar votes address multiple bills in one VoteEvent
        # eg, http://mgaleg.maryland.gov/2018RS/votes/Senate/0478.pdf
        is_consent_calendar = any(
            ["Consent Calendar" in line for line in lines[:page_index]]
        )
        consent_calendar_bills = None
        motion = ""
        if is_consent_calendar:
            motion = re.split(r"\s{2,}", lines[page_index - 4].strip())[0]
            consent_calendar_bills = re.split(r"\s{2,}", lines[page_index - 1].strip())
            assert (
                consent_calendar_bills
            ), "Could not find bills for consent calendar vote"

        motion_keywords = [
            "favorable",
            "reading",
            "amendment",
            "motion",
            "introduced",
            "bill pass",
            "committee",
        ]
        motion_lines = [
            3,
            2,
            4,
            5,
        ]  # Relative LineNumbers to be checked for existence of motion

        for i in motion_lines:
            if any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                break
            motion = re.split(r"\s{2,}", lines[page_index - i].strip())[0]
        else:
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # This condition covers for the bad formating in SB 1260
                motion = lines[page_index - 3]
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # Check this one for SB 747
                motion = "No motion given"
                self.warning("No motion given")

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )

        # Include bill ID to avoid duplication for consent calendars
        vote.pupa_id = "{}#{}".format(vote_url, bill.identifier)

        for index, vote_type in enumerate(vote_types):
            vote.set_count(vote_type, vote_counts[index])
        page_index = page_index + 2

        # Keywords for identifying where names are located in the pdf
        show_stoppers = [
            "Voting Nay",
            "Not Voting",
            "COPY",
            "Excused",
            "indicates vote change",
            "Indicates Vote Change",
        ]
        vote_index = 0

        # For matching number of names extracted with vote counts(extracted independently)
        vote_name_counts = 5 * [0]

        while page_index < len(lines):

            current_line = lines[page_index].strip()

            if not current_line or "Voting Yea" in current_line:
                page_index += 1
                continue

            if any(show_stopper in current_line for show_stopper in show_stoppers):
                page_index += 1
                vote_index = vote_index + 1
                continue

            names = re.split(r"\s{2,}", current_line)

            vote_name_counts[vote_index] += len(names)

            for name in names:
                vote.vote(vote_types[vote_index], name)
            page_index += 1

        if vote_counts != vote_name_counts:
            raise ValueError("Votes Count and Number of Names don't match")

        return vote
Ejemplo n.º 53
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = 'lower'
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = 'upper'
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if 'BUDGET ADDRESS' in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1))

        other_count = page.xpath(
            "string(//span[contains(., 'Those absent')])")
        other_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1))

        need_count = page.xpath(
            "string(//span[contains(., 'Necessary for')])")
        need_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1)
        date = date.replace(' ', '')
        date = datetime.datetime.strptime(date + " " + bill.legislative_session,
                                          "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=name,
                    result='pass' if yes_count > need_count else 'fail',
                    classification='passage',
                    bill=bill
                    )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (
                    i + name_offset)).strip()

                if not name or name == 'VACANT':
                    continue

                if "Y" in row.xpath("string(td[%d])" %
                                    (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" %
                                      (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote('other', name)

        yield vote
Ejemplo n.º 54
0
    def parse_vote(self, bill, link):
        # Server sometimes sends proper error headers,
        # sometimes not
        try:
            self.info("Get {}".format(link))
            text = requests.get(link).text
        except requests.exceptions.HTTPError as err:
            self.warning("{} fetching vote {}, skipping".format(err, link))
            return

        if "Varnish cache server" in text:
            self.warning("Scrape rate is too high, try re-scraping with "
                         "The --rpm set to a lower number")
            return

        if "Page Not Found" in text or "Page Unavailable" in text:
            self.warning("missing vote, skipping")
            return
        member_doc = lxml.html.fromstring(text)
        motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
        chamber_date_line = "".join(
            member_doc.xpath("//div[@id='main_content']/h3[1]//text()"))
        chamber_date_line_words = chamber_date_line.split()
        vote_chamber = chamber_date_line_words[0]
        vote_date = datetime.datetime.strptime(chamber_date_line_words[-1],
                                               "%m/%d/%Y")
        vote_status = " ".join(chamber_date_line_words[2:-2])
        opinions = member_doc.xpath(
            "//div[@id='main_content']/h3[position() > 1]/text()")
        if len(opinions) > 0:
            vote_status = vote_status if vote_status.strip() else motion[0]
            vote_chamber = "upper" if vote_chamber == "Senate" else "lower"

            for i in opinions:
                try:
                    count = int(i[i.find("(") + 1:i.find(")")])
                except ValueError:
                    # This is likely not a vote-count text chunk
                    # It's probably '`On roll call the vote was:`
                    pass
                else:
                    if "yea" in i.lower():
                        yes_count = count
                    elif "nay" in i.lower():
                        no_count = count
                    elif "present" in i.lower():
                        p_count = count
                    elif "absent" in i.lower():
                        a_count = count

            vote = VoteEvent(
                bill=bill,
                start_date=vote_date.strftime("%Y-%m-%d"),
                chamber=vote_chamber,
                motion_text=vote_status,
                result="pass" if yes_count > no_count else "fail",
                classification="passage",
            )
            vote.pupa_id = link

            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("abstain", p_count)
            vote.set_count("absent", a_count)

            vote.add_source(link)

            a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
            for i in range(1, len(a_links)):
                if i <= yes_count:
                    vote.vote("yes", re.sub(",", "", a_links[i]).split()[0])
                elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
                    vote.vote("no", re.sub(",", "", a_links[i]).split()[0])
                else:
                    vote.vote("other", re.sub(",", "", a_links[i]).split()[0])
            yield vote
        else:
            self.warning("No Votes for: %s", link)
Ejemplo n.º 55
0
    def scrape_vote(self, bill, vote_chamber, bill_id, vote_id, vote_date,
                    action_text):
        url = ('http://alisondb.legislature.state.al.us/Alison/'
               'GetRollCallVoteResults.aspx?'
               'VOTE={0}&BODY={1}&INST={2}&SESS={3}'.format(
                   vote_id, vote_chamber, bill_id, self.session_id))
        doc = lxml.html.fromstring(self.get(url=url).text)

        voters = {'Y': [], 'N': [], 'P': [], 'A': []}

        voters_and_votes = doc.xpath('//table/tr/td/font/text()')
        capture_vote = False
        name = ''
        for item in voters_and_votes:
            if capture_vote:
                capture_vote = False
                if name:
                    voters[item].append(name)
            else:
                capture_vote = True
                name = item
                if (name.endswith(", Vacant") or name.startswith("Total ")
                        or not name.strip()):
                    name = ''

        # Check name counts against totals listed on the site
        total_yea = doc.xpath('//*[starts-with(text(), "Total Yea")]/text()')
        if total_yea:
            total_yea = int(total_yea[0].split(":")[-1])
            assert total_yea == len(voters['Y']), "Yea count incorrect"
        else:
            total_yea = len(voters['Y'])

        total_nay = doc.xpath('//*[starts-with(text(), "Total Nay")]/text()')
        if total_nay:
            total_nay = int(total_nay[0].split(":")[-1])
            assert total_nay == len(voters['N']), "Nay count incorrect"
        else:
            total_nay = len(voters['N'])

        total_absent = doc.xpath(
            '//*[starts-with(text(), "Total Absent")]/text()')
        if total_absent:
            total_absent = int(total_absent[0].split(":")[-1])
            assert total_absent == len(voters['A']), "Absent count incorrect"
        total_other = len(voters['P']) + len(voters['A'])

        vote = VoteEvent(
            chamber=self.CHAMBERS[vote_chamber[0]],
            start_date=vote_date,
            motion_text=action_text,
            result='pass' if total_yea > total_nay else 'fail',
            classification='passage',
            bill=bill,
        )
        vote.set_count('yes', total_yea)
        vote.set_count('no', total_nay)
        vote.set_count('other', total_other)
        vote.add_source(url)
        for member in voters['Y']:
            vote.vote('yes', member)
        for member in voters['N']:
            vote.vote('no', member)
        for member in (voters['A'] + voters['P']):
            vote.vote('other', member)

        yield vote
Ejemplo n.º 56
0
    def parse_roll_call(self, bill, link, chamber, date):
        url = link.attrib['href']
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        xpath = 'string(//div[@class="Column-OneFourth"]/div[3])'
        motion = page.xpath(xpath).strip()
        motion = re.sub(r'\s+', ' ', motion)

        if motion == 'FP':
            motion = 'FINAL PASSAGE'

        if motion == 'FINAL PASSAGE':
            type = 'passage'
        elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
            type = 'amendment'
        else:
            type = 'other'
            motion = link.text_content()

        yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
        nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
        lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
        nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
        other = lve + nv

        vote = VoteEvent(
            chamber=chamber,
            start_date=tz.localize(date),
            motion_text=motion,
            classification=type,
            result='pass' if yeas > (nays + other) else 'fail',
            bill=bill,
        )
        # pupa_id situation here is a bit weird, same vote can be used for
        # multiple bills see:
        # http://www.legis.state.pa.us/CFDOCS/Legis/RC/Public/rc_view_action2.cfm?sess_yr=2017&sess_ind=0&rc_body=H&rc_nbr=11       # noqa
        # so we toss the bill id onto the end of the URL
        vote.pupa_id = url + '#' + bill.identifier
        vote.add_source(url)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.set_count('other', other)

        for div in page.xpath('//*[contains(@class, "RollCalls-Vote")]'):
            name = div.text_content().strip()
            name = re.sub(r'^[\s,]+', '', name)
            name = re.sub(r'[\s,]+$', '', name)
            class_attr = div.attrib['class'].lower()
            if 'yea' in class_attr:
                voteval = 'yes'
            elif 'nay' in class_attr:
                voteval = 'no'
            elif 'nvote' in class_attr:
                voteval = 'other'
            elif 'lve' in class_attr:
                voteval = 'other'
            else:
                msg = 'Unrecognized vote val: %s' % class_attr
                raise Exception(msg)
            vote.vote(voteval, name)

        return vote
Ejemplo n.º 57
0
    def scrape_votes(self, session, zip_url):
        votes = {}
        last_line = []

        for line in self.zf.open('tblrollcallsummary.txt'):
            if line.strip() == "":
                continue

            line = line.split('|')
            if len(line) < 14:
                if len(last_line + line[1:]) == 14:
                    line = last_line
                    self.warning('used bad vote line')
                else:
                    last_line = line
                    self.warning('bad vote line %s' % '|'.join(line))
            session_yr = line[0]
            body = line[1]
            vote_num = line[2]
            timestamp = line[3]
            bill_id = line[4].strip()
            yeas = int(line[5])
            nays = int(line[6])
            # present = int(line[7])
            # absent = int(line[8])
            motion = line[11].strip() or '[not available]'

            if session_yr == session and bill_id in self.bills_by_id:
                actor = 'lower' if body == 'H' else 'upper'
                time = dt.datetime.strptime(timestamp,
                                            '%m/%d/%Y %I:%M:%S %p')
                # TODO: stop faking passed somehow
                passed = yeas > nays
                vote = Vote(chamber=actor,
                            start_date=time.strftime("%Y-%m-%d"),
                            motion_text=motion,
                            result='pass' if passed else 'fail',
                            classification='passage',
                            bill=self.bills_by_id[bill_id])
                vote.set_count('yes', yeas)
                vote.set_count('no', nays)
                vote.add_source(zip_url)
                votes[body+vote_num] = vote

        for line in self.zf.open('tblrollcallhistory.txt'):
            # 2012    | H   | 2    | 330795  | HB309  | Yea |1/4/2012 8:27:03 PM
            session_yr, body, v_num, employee, bill_id, vote, date \
                    = line.split('|')

            if not bill_id:
                continue

            if session_yr == session and bill_id.strip() in self.bills_by_id:
                try:
                    leg = self.legislators[employee]['name']
                except KeyError:
                    self.warning("Error, can't find person %s" % employee)
                    continue

                vote = vote.strip()
                if body+v_num not in votes:
                    self.warning("Skipping processing this vote:")
                    self.warning("Bad ID: %s" % (body+v_num))
                    continue
                other_count = 0
                # code = self.legislators[employee]['seat']
                if vote == 'Yea':
                    votes[body+v_num].yes(leg)
                elif vote == 'Nay':
                    votes[body+v_num].no(leg)
                else:
                    votes[body+v_num].other(leg)
                    other_count += 1
                votes[body+v_num].set_count('other', other_count)
        for vote in votes.values():
            yield vote
Ejemplo n.º 58
0
    def scrape(self, session=None):
        HTML_TAGS_RE = r'<.*?>'

        if session is None:
            session = self.latest_session()

        year_slug = self.jurisdiction.get_year_slug(session)

        # Load all bills and resolutions via the private API
        bills_url = \
            'http://legislature.vermont.gov/bill/loadBillsReleased/{}/'.\
            format(year_slug)
        bills_json = self.get(bills_url).text
        bills = json.loads(bills_json)['data'] or []

        bills_url = \
            'http://legislature.vermont.gov/bill/loadBillsIntroduced/{}/'.\
            format(year_slug)
        bills_json = self.get(bills_url).text
        bills.extend(json.loads(bills_json)['data'] or [])

        resolutions_url = \
            'http://legislature.vermont.gov/bill/loadAllResolutionsByChamber/{}/both'.\
            format(year_slug)
        resolutions_json = self.get(resolutions_url).text
        bills.extend(json.loads(resolutions_json)['data'] or [])

        # Parse the information from each bill
        for info in bills:
            # Strip whitespace from strings
            info = {k: v.strip() for k, v in info.items()}

            # Identify the bill type and chamber
            if info['BillNumber'].startswith('J.R.H.'):
                bill_type = 'joint resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('J.R.S.'):
                bill_type = 'joint resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('H.C.R.'):
                bill_type = 'concurrent resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.C.R.'):
                bill_type = 'concurrent resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('H.R.'):
                bill_type = 'resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.R.'):
                bill_type = 'resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('PR.'):
                bill_type = 'constitutional amendment'
                if info['Body'] == 'H':
                    bill_chamber = 'lower'
                elif info['Body'] == 'S':
                    bill_chamber = 'upper'
                else:
                    raise AssertionError("Amendment not tied to chamber")

            elif info['BillNumber'].startswith('H.'):
                bill_type = 'bill'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.'):
                bill_type = 'bill'
                bill_chamber = 'upper'

            else:
                raise AssertionError(
                    "Unknown bill type found: '{}'".
                    format(info['BillNumber'])
                )

            bill_id = info['BillNumber'].replace('.', '').replace(' ', '')
            # put one space back in between type and number
            bill_id = re.sub(r'([a-zA-Z]+)(\d+)', r'\1 \2', bill_id)

            # Create the bill using its basic information
            bill = Bill(
                identifier=bill_id,
                legislative_session=session,
                chamber=bill_chamber,
                title=info['Title'],
                classification=bill_type
            )
            if 'resolution' in bill_type:
                bill.add_source(resolutions_url)
            else:
                bill.add_source(bills_url)

            # Load the bill's information page to access its metadata
            bill_url = 'http://legislature.vermont.gov/bill/status/{0}/{1}'.\
                format(year_slug, info['BillNumber'])
            doc = self.lxmlize(bill_url)
            bill.add_source(bill_url)

            # Capture sponsors
            sponsors = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Sponsor(s)"]/'
                'following-sibling::dd[1]/ul/li'
            )
            sponsor_type = 'primary'
            for sponsor in sponsors:
                if sponsor.xpath('span/text()') == ['Additional Sponsors']:
                    sponsor_type = 'cosponsor'
                    continue

                sponsor_name = sponsor.xpath('a/text()')[0].\
                    replace("Rep.", "").replace("Sen.", "").strip()
                if sponsor_name and not \
                        (sponsor_name[:5] == "Less" and len(sponsor_name) == 5):
                    bill.add_sponsorship(
                        name=sponsor_name,
                        classification=sponsor_type,
                        entity_type='person',
                        primary=(sponsor_type == 'primary')
                    )

            # Capture bill text versions
            # Warning: There's a TODO in VT's source code saying 'move this to where it used to be'
            # so leave in the old and new positions
            versions = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Bill/Resolution Text"]/'
                'following-sibling::dd[1]/ul/li/a |'
                '//ul[@class="bill-path"]//a'
            )

            for version in versions:
                if version.xpath('text()'):
                    bill.add_version_link(
                        note=version.xpath('text()')[0],
                        url=version.xpath('@href')[0].replace(' ', '%20'),
                        media_type='application/pdf'
                    )

            # Identify the internal bill ID, used for actions and votes
            # If there is no internal bill ID, then it has no extra information
            try:
                internal_bill_id = re.search(
                    r'"bill/loadBillDetailedStatus/.+?/(\d+)"',
                    lxml.etree.tostring(doc).decode('utf-8')
                ).group(1)
            except AttributeError:
                self.warning("Bill {} appears to have no activity".format(info['BillNumber']))
                yield bill
                continue

            # Capture actions
            actions_url = 'http://legislature.vermont.gov/bill/loadBillDetailedStatus/{0}/{1}'.\
                format(year_slug, internal_bill_id)
            actions_json = self.get(actions_url).text
            actions = json.loads(actions_json)['data']
            bill.add_source(actions_url)

            chambers_passed = set()
            for action in actions:
                action = {k: v for k, v in action.items() if v is not None}

                if "Signed by Governor" in action['FullStatus']:
                    actor = 'executive'
                elif action['ChamberCode'] == 'H':
                    actor = 'lower'
                elif action['ChamberCode'] == 'S':
                    actor = 'upper'
                else:
                    raise AssertionError("Unknown actor for bill action")

                # Categorize action
                if "Signed by Governor" in action['FullStatus']:
                    # assert chambers_passed == set("HS")
                    action_type = 'executive-signature'
                elif "Vetoed by the Governor" in action['FullStatus']:
                    action_type = 'executive-veto'
                elif "Read first time" in action['FullStatus'] \
                        or "Read 1st time" in action['FullStatus']:
                    action_type = 'introduction'
                elif "Reported favorably" in action['FullStatus']:
                    action_type = 'committee-passage-favorable'
                elif actor == 'lower' and any(x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("H")
                elif actor == 'upper' and any(x.lower().startswith(' aspassed')
                                              or x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("S")
                else:
                    action_type = None

                bill.add_action(
                    description=re.sub(HTML_TAGS_RE, "", action['FullStatus']),
                    date=datetime.datetime.strftime(
                        datetime.datetime.strptime(action['StatusDate'], '%m/%d/%Y'),
                        '%Y-%m-%d'
                    ),
                    chamber=actor,
                    classification=action_type
                )

            # Capture votes
            votes_url = 'http://legislature.vermont.gov/bill/loadBillRollCalls/{0}/{1}'.format(
                year_slug, internal_bill_id)
            votes_json = self.get(votes_url).text
            votes = json.loads(votes_json)['data']
            bill.add_source(votes_url)

            for vote in votes:
                roll_call_id = vote['VoteHeaderID']
                roll_call_url = ('http://legislature.vermont.gov/bill/'
                                 'loadBillRollCallDetails/{0}/{1}'.format(
                                     year_slug, roll_call_id))
                roll_call_json = self.get(roll_call_url).text
                roll_call = json.loads(roll_call_json)['data']

                roll_call_yea = []
                roll_call_nay = []
                roll_call_not_voting = []
                for member in roll_call:
                    (member_name, _district) = member['MemberName'].split(" of ")
                    member_name = member_name.strip()

                    if member['MemberVote'] == "Yea":
                        roll_call_yea.append(member_name)
                    elif member['MemberVote'] == "Nay":
                        roll_call_nay.append(member_name)
                    else:
                        roll_call_not_voting.append(member_name)

                if ("Passed -- " in vote['FullStatus'] or
                        "Veto of Governor overridden" in vote['FullStatus']):
                    did_pass = True
                elif ("Failed -- " in vote['FullStatus'] or
                      'Veto of the Governor sustained' in vote['FullStatus']):
                    did_pass = False
                else:
                    raise AssertionError("Roll call vote result is unclear")

                # Check vote counts
                yea_count = int(re.search(r'Yeas = (\d+)', vote['FullStatus']).group(1))
                nay_count = int(re.search(r'Nays = (\d+)', vote['FullStatus']).group(1))

                vote_to_add = VoteEvent(
                    bill=bill,
                    chamber=('lower' if vote['ChamberCode'] == 'H' else 'upper'),
                    start_date=datetime.datetime.strftime(
                        datetime.datetime.strptime(vote['StatusDate'], '%m/%d/%Y'),
                        '%Y-%m-%d'
                    ),
                    motion_text=re.sub(HTML_TAGS_RE, "", vote['FullStatus']).strip(),
                    result='pass' if did_pass else 'fail',
                    classification='passage',
                    legislative_session=session,
                )
                vote_to_add.add_source(roll_call_url)

                vote_to_add.set_count('yes', yea_count)
                vote_to_add.set_count('no', nay_count)
                vote_to_add.set_count('not voting', len(roll_call_not_voting))

                for member in roll_call_yea:
                    vote_to_add.yes(member)
                for member in roll_call_nay:
                    vote_to_add.no(member)
                for member in roll_call_not_voting:
                    vote_to_add.vote('not voting', member)

                yield vote_to_add

            # Capture extra information-  Not yet implemented
            # Witnesses:
            #   http://legislature.vermont.gov/bill/loadBillWitnessList/{year_slug}/{internal_bill_id}
            # Conference committee members:
            #   http://legislature.vermont.gov/bill/loadBillConference/{year_slug}/{bill_number}
            # Committee meetings:
            #   http://legislature.vermont.gov/committee/loadHistoryByBill/{year_slug}?LegislationId={internal_bill_id}

            yield bill
Ejemplo n.º 59
0
    def scrape_action_page(self, bill, page):
        action_rows = page.xpath('//tbody/tr')
        for row in action_rows:
            action_date = row.xpath('td[1]/text()')[0]
            action_date = datetime.strptime(action_date, '%m/%d/%Y')
            action_year = action_date.year
            action_date = action_date.strftime('%Y-%m-%d')

            if row.xpath('td[2]/text()'):
                action_actor = row.xpath('td[2]/text()')[0]
                action_actor = self.chamber_map_reverse[action_actor.strip()]

            action_name = row.xpath('string(td[3])')

            # House votes
            if "Supplement" in action_name:
                actor = "lower"
                vote_action = action_name.split(' -')[0]
                y = int(action_name.strip().split('-')[1].split('YEAS')[0])
                n = int(action_name.strip().split('YEAS to')[1].split('NAYS')[0])

                # get supplement number
                n_supplement = int(action_name.strip().split('No. ')[1].split(r')')[0])
                cached_vote = VoteEvent(
                    chamber=actor,
                    start_date=action_date,
                    motion_text=vote_action,
                    result='pass' if y > n else 'fail',
                    classification='passage',
                    bill=bill,
                )
                cached_vote.set_count('yes', y)
                cached_vote.set_count('no', n)

                housevote_pdf = 'http://www.mass.gov/legis/journal/combined{}RCs.pdf'.format(
                    action_year
                )
                # note: 2014-2015 different format and no data on website for years prior to 2014
                self.scrape_house_vote(cached_vote, housevote_pdf, n_supplement)
                cached_vote.add_source(housevote_pdf)

                cached_vote.pupa_id = '{}#{}'.format(housevote_pdf, n_supplement)

                yield cached_vote

            # Senate votes
            if "Roll Call" in action_name:
                actor = "upper"
                # placeholder
                vote_action = action_name.split(' -')[0]
                try:
                    y, n = re.search('(\d+) yeas .*? (\d+) nays', action_name.lower()).groups()
                    y = int(y)
                    n = int(n)
                except AttributeError:
                    y = int(re.search(r"yeas\s*(\d*)", action_name.lower()).group(1))
                    n = int(re.search(r"nays\s*(\d*)", action_name.lower()).group(1))

                # TODO: other count isn't included, set later
                cached_vote = VoteEvent(
                    chamber=actor,
                    start_date=action_date,
                    motion_text=vote_action,
                    result='pass' if y > n else 'fail',
                    classification='passage',
                    bill=bill,
                )
                cached_vote.set_count('yes', y)
                cached_vote.set_count('no', n)

                rollcall_pdf = 'http://malegislature.gov' + row.xpath('string(td[3]/a/@href)')
                self.scrape_senate_vote(cached_vote, rollcall_pdf)
                cached_vote.add_source(rollcall_pdf)
                yield cached_vote

            attrs = self.categorizer.categorize(action_name)
            action = bill.add_action(
                action_name.strip(),
                action_date,
                chamber=action_actor,
                classification=attrs['classification'],
            )
            for com in attrs.get('committees', []):
                action.add_related_entity(com, entity_type='organization')