Beispiel #1
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session,
                                 url, bill_chamber):
        for action in action_table.xpath('*')[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime('%Y-%m-%d')
            actor = action[1].text_content()
            string = action[2].text_content()
            actor = {
                "S": "upper",
                "H": "lower",
                "D": "Data Systems",
                "$": "Appropriation measure",
                "ConAm": "Constitutional Amendment"
            }[actor]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]['name']
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string,
                                  date,
                                  chamber=actor,
                                  classification=act_type)
            for committee in real_committees:
                act.add_related_entity(name=committee,
                                       entity_type="organization")
            vote = self.parse_vote(string)
            if vote:
                v, motion = vote
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    bill=bill_id,
                    bill_chamber=bill_chamber,
                    legislative_session=session,
                    motion_text=motion,
                    result='pass' if 'passed' in string.lower() else 'fail',
                    classification='passage')
                vote.add_source(url)
                vote.set_count('yes', int(v['n_yes'] or 0))
                vote.set_count('no', int(v['n_no'] or 0))
                vote.set_count('not voting', int(v['n_excused'] or 0))
                for voter in split_specific_votes(v['yes']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['yes_resv']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['no']):
                    vote.no(voter)
                for voter in split_specific_votes(v['excused']):
                    vote.vote('not voting', voter)

                yield vote
Beispiel #2
0
def record_votes(root, session, chamber):
    for el in root.xpath('//div{}'.format(''.join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(chamber=chamber,
                      start_date=None,
                      motion_text='passage' if mv.passed else 'other',
                      result='pass' if mv.passed else 'fail',
                      classification='passage' if mv.passed else 'other',
                      legislative_session=session[0:2],
                      bill=mv.bill_id,
                      bill_chamber=mv.chamber)

        v.set_count('yes', mv.yeas or 0)
        v.set_count('no', mv.nays or 0)
        v.set_count('not voting', mv.present or 0)

        for each in mv.votes['yeas']:
            v.yes(each)
        for each in mv.votes['nays']:
            v.no(each)
        for each in mv.votes['present']:
            v.vote('not voting', each)
        for each in mv.votes['absent']:
            v.vote('absent', each)

        yield v
Beispiel #3
0
def record_votes(root, session, chamber):
    for el in root.xpath('//div{}'.format(''.join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text='passage' if mv.passed else 'other',
            result='pass' if mv.passed else 'fail',
            classification='passage' if mv.passed else 'other',
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber
        )

        v.set_count('yes', mv.yeas or 0)
        v.set_count('no', mv.nays or 0)
        v.set_count('not voting', mv.present or 0)

        for each in mv.votes['yeas']:
            v.yes(each)
        for each in mv.votes['nays']:
            v.no(each)
        for each in mv.votes['present']:
            v.vote('not voting', each)
        for each in mv.votes['absent']:
            v.vote('absent', each)

        yield v
Beispiel #4
0
def record_votes(root, session, chamber):
    for el in root.xpath("//div{}".format("".join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else "other",
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", mv.yeas or 0)
        v.set_count("no", mv.nays or 0)
        v.set_count("not voting", mv.present or 0)

        for each in mv.votes["yeas"]:
            v.yes(each)
        for each in mv.votes["nays"]:
            v.no(each)
        for each in mv.votes["present"]:
            v.vote("not voting", each)
        for each in mv.votes["absent"]:
            v.vote("absent", each)

        yield v
Beispiel #5
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data["voteDate"],
                                                   "%Y-%m-%d")
        if vote_data["voteType"] == "FLOOR":
            motion = "Floor Vote"
        elif vote_data["voteType"] == "COMMITTEE":
            motion = "{} Vote".format(vote_data["committee"]["name"])
        else:
            raise ValueError("Unknown vote type encountered.")

        if vote_data["version"]:
            motion += " - Version: " + vote_data["version"]

        vote = VoteEvent(
            chamber="upper",
            start_date=vote_datetime.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="fail",
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data["memberVotes"]["items"]

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if "items" in vote_rolls.get("AYE", {}):
            for legislator in vote_rolls["AYE"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        if "items" in vote_rolls.get("AYEWR", {}):
            for legislator in vote_rolls["AYEWR"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        # Count all nay votes.
        if "items" in vote_rolls.get("NAY", {}):
            for legislator in vote_rolls["NAY"]["items"]:
                vote.no(legislator["fullName"])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ("EXC", "ABS", "ABD")
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]["items"]:
                    vote.vote("other", legislator["fullName"])
                    other_count += 1

        vote.result = "pass" if yes_count > no_count else "fail"
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        return vote
Beispiel #6
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" +
                    session + "_" + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(
                datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == "House":
                chamber = "lower"
            elif chamber == "Senate":
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if yes_count > no_count else "fail",
                bill=bill,
                classification="passed",
            )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
Beispiel #7
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId'
        form = {
            'rollCallId': vote_id,
            'sort': '',
            'group': '',
            'filter': '',
        }

        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page['Model']
            vote_chamber = self.chamber_map[roll['ChamberName']]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(roll['TakenAtDateTime'],
                                             '%m/%d/%y %I:%M %p').strftime('%Y-%m-%d')

            # TODO: What does this code mean?
            vote_motion = roll['RollCallVoteType']

            vote_passed = 'pass' if roll['RollCallStatus'] == 'Passed' else 'fail'
            other_count = (int(roll['NotVotingCount']) +
                           int(roll['VacantVoteCount']) +
                           int(roll['AbsentVoteCount']) +
                           int(roll['ConflictVoteCount'])
                           )
            vote = Vote(chamber=vote_chamber,
                        start_date=vote_date,
                        motion_text=vote_motion,
                        result=vote_passed,
                        classification='other',
                        bill=bill.identifier,
                        legislative_session=session
                        )
            vote.add_source(vote_url)
            vote.set_count('yes', roll['YesVoteCount'])
            vote.set_count('no', roll['NoVoteCount'])
            vote.set_count('other', other_count)

            for row in roll['AssemblyMemberVotes']:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row['ShortName'])]
                    name = voter['DisplayName']
                except KeyError:
                    self.warning('could not find legislator short name %s',
                                 row['ShortName'])
                    name = row['ShortName']
                if row['SelectVoteTypeCode'] == 'Y':
                    vote.yes(name)
                elif row['SelectVoteTypeCode'] == 'N':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            # bill.add_vote_event(vote)
            yield vote
Beispiel #8
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(
            vote_data['voteDate'], '%Y-%m-%d')

        if vote_data['voteType'] == 'FLOOR':
            motion = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            motion = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote = VoteEvent(
            chamber='upper',
            start_date=vote_datetime.strftime('%Y-%m-%d'),
            motion_text=motion,
            classification='passage',
            result='fail',
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data['memberVotes']['items']

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.vote('other', legislator['fullName'])
                    other_count += 1

        vote.result = 'pass' if yes_count > no_count else 'fail'
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        return vote
Beispiel #9
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data['voteDate'],
                                                   '%Y-%m-%d')

        if vote_data['voteType'] == 'FLOOR':
            motion = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            motion = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote = VoteEvent(
            chamber='upper',
            start_date=vote_datetime.strftime('%Y-%m-%d'),
            motion_text=motion,
            classification='passage',
            result='fail',
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data['memberVotes']['items']

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.vote('other', legislator['fullName'])
                    other_count += 1

        vote.result = 'pass' if yes_count > no_count else 'fail'
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        return vote
Beispiel #10
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json['amendmentNumber']:
            motion = '{}: {}'.format(
                vote_json['amendmentNumber'], vote_json['action'])
        else:
            motion = vote_json['action']

        result = 'pass' if vote_json['yesVotesCount'] > vote_json['noVotesCount'] else 'fail'

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json['chamber']],
            start_date=self.parse_local_date(vote_json['voteDate']),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification='other',
        )

        v.set_count(option='yes', value=vote_json['yesVotesCount'])
        v.set_count('no', vote_json['noVotesCount'])
        v.set_count('absent', vote_json['absentVotesCount'])
        v.set_count('excused', vote_json['excusedVotesCount'])
        v.set_count('other', vote_json['conflictVotesCount'])

        for name in vote_json['yesVotes'].split(','):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json['noVotes'].split(','):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json['absentVotes'].split(','):
            if name.strip():
                v.vote(option="absent",
                       voter=name)

        for name in vote_json['excusedVotes'].split(','):
            if name.strip():
                v.vote(option="excused",
                       voter=name)

        for name in vote_json['conflictVotes'].split(','):
            if name.strip():
                v.vote(option="other",
                       voter=name)

        source_url = 'http://lso.wyoleg.gov/Legislation/{}/{}'.format(
            session, vote_json['billNumber'])
        v.add_source(source_url)

        yield v
Beispiel #11
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json['amendmentNumber']:
            motion = '{}: {}'.format(
                vote_json['amendmentNumber'], vote_json['action'])
        else:
            motion = vote_json['action']

        result = 'pass' if vote_json['yesVotesCount'] > vote_json['noVotesCount'] else 'fail'

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json['chamber']],
            start_date=self.parse_local_date(vote_json['voteDate']),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification='other',
        )

        v.set_count(option='yes', value=vote_json['yesVotesCount'])
        v.set_count('no', vote_json['noVotesCount'])
        v.set_count('absent', vote_json['absentVotesCount'])
        v.set_count('excused', vote_json['excusedVotesCount'])
        v.set_count('other', vote_json['conflictVotesCount'])

        for name in vote_json['yesVotes'].split(','):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json['noVotes'].split(','):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json['absentVotes'].split(','):
            if name.strip():
                v.vote(option="absent",
                       voter=name)

        for name in vote_json['excusedVotes'].split(','):
            if name.strip():
                v.vote(option="excused",
                       voter=name)

        for name in vote_json['conflictVotes'].split(','):
            if name.strip():
                v.vote(option="other",
                       voter=name)

        source_url = 'http://lso.wyoleg.gov/Legislation/{}/{}'.format(
            session, vote_json['billNumber'])
        v.add_source(source_url)

        yield v
    def handle_page(self):
        (date, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        totals = self.doc.xpath('//table//table')[-1].text_content()
        totals = re.sub(r'(?mu)\s+', " ", totals).strip()
        (yes_count, no_count, other_count) = [
            int(x) for x in re.search(
                r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+'
                'Total Missed:\s+(\d+)', totals).groups()
        ]
        result = 'pass' if yes_count > no_count else 'fail'

        (committee, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()')
        (action, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath('//table//table//table//td'):
            if not member_vote.text_content().strip():
                continue

            (member, ) = member_vote.xpath('span[2]//text()')
            (member_vote, ) = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote
Beispiel #13
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        resp = self.get(vote_url)
        html = resp.text

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if resp.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return

        vote_count = doc.xpath(
            ".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date()
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" %
                                motion)
            return

        vote = VoteEvent(chamber='lower',
                         start_date=date,
                         motion_text=motion,
                         result='pass' if yeas > nays else 'fail',
                         classification='passage',
                         legislative_session=session,
                         bill=bill_id,
                         bill_chamber=chamber)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        yield vote
Beispiel #14
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json["amendmentNumber"]:
            motion = "{}: {}".format(vote_json["amendmentNumber"],
                                     vote_json["action"])
        else:
            motion = vote_json["action"]

        result = ("pass"
                  if vote_json["yesVotesCount"] > vote_json["noVotesCount"]
                  else "fail")

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json["chamber"]],
            start_date=self.parse_local_date(vote_json["voteDate"]),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification="other",
        )

        v.set_count(option="yes", value=vote_json["yesVotesCount"])
        v.set_count("no", vote_json["noVotesCount"])
        v.set_count("absent", vote_json["absentVotesCount"])
        v.set_count("excused", vote_json["excusedVotesCount"])
        v.set_count("other", vote_json["conflictVotesCount"])

        for name in vote_json["yesVotes"].split(","):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json["noVotes"].split(","):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json["absentVotes"].split(","):
            if name.strip():
                v.vote(option="absent", voter=name)

        for name in vote_json["excusedVotes"].split(","):
            if name.strip():
                v.vote(option="excused", voter=name)

        for name in vote_json["conflictVotes"].split(","):
            if name.strip():
                v.vote(option="other", voter=name)

        source_url = "http://lso.wyoleg.gov/Legislation/{}/{}".format(
            session, vote_json["billNumber"])
        v.add_source(source_url)

        yield v
Beispiel #15
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session, url, bill_chamber):
        for action in action_table.xpath('*')[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime('%Y-%m-%d')
            actor = action[1].text_content().upper()
            string = action[2].text_content()
            actor = {
                "S": "upper",
                "H": "lower",
                "D": "legislature",  # "Data Systems",
                "$": "Appropriation measure",
                "CONAM": "Constitutional Amendment"
            }[actor]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]['name']
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string, date, chamber=actor,
                                  classification=act_type)
            for committee in real_committees:
                act.add_related_entity(name=committee, entity_type="organization")
            vote = self.parse_vote(string)
            if vote:
                v, motion = vote
                vote = VoteEvent(start_date=date,
                                 chamber=actor,
                                 bill=bill_id,
                                 bill_chamber=bill_chamber,
                                 legislative_session=session,
                                 motion_text=motion,
                                 result='pass' if 'passed' in string.lower() else 'fail',
                                 classification='passage')
                vote.add_source(url)
                vote.set_count('yes', int(v['n_yes'] or 0))
                vote.set_count('no', int(v['n_no'] or 0))
                vote.set_count('not voting', int(v['n_excused'] or 0))
                for voter in split_specific_votes(v['yes']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['yes_resv']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['no']):
                    vote.no(voter)
                for voter in split_specific_votes(v['excused']):
                    vote.vote('not voting', voter)

                yield vote
Beispiel #16
0
    def handle_page(self):
        date, = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        yes_count = int(
            self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
        no_count = int(
            self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
        other_count = int(
            self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
        result = 'pass' if yes_count > no_count else 'fail'

        committee, = self.doc.xpath(
            '//span[contains(@id, "lblCommittee")]/text()')
        action, = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath(
                '//ul[contains(@class, "vote-list")]/li'):
            if not member_vote.text_content().strip():
                continue

            member, = member_vote.xpath('span[2]//text()')
            member_vote, = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote
Beispiel #17
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r"YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)"
            r"(.*)ABSENT( OR NOT VOTING)? -?\s?"
            r"(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
        else:
            vote_chamber = ""

        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=motion,
            result="pass" if passed else "fail",
            identifier=str(uniqid),
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        yes_votes = re.split(r"\s{2,}", match.group(2).strip())
        no_votes = re.split(r"\s{2,}", match.group(4).strip())
        other_votes = re.split(r"\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote("other", other)

        yield vote
Beispiel #18
0
def test_full_vote_event():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    j.legislative_sessions.create(name='1900', identifier='1900')
    sp1 = ScrapePerson('John Smith', primary_org='lower')
    sp2 = ScrapePerson('Adam Smith', primary_org='lower')
    org = ScrapeOrganization(name='House', classification='lower')
    bill = ScrapeBill('HB 1',
                      '1900',
                      'Axe & Tack Tax Act',
                      from_organization=org._id)
    vote_event = ScrapeVoteEvent(legislative_session='1900',
                                 motion_text='passage',
                                 start_date='1900-04-01',
                                 classification='passage:bill',
                                 result='pass',
                                 bill_chamber='lower',
                                 bill='HB 1',
                                 organization=org._id)
    vote_event.set_count('yes', 20)
    vote_event.yes('John Smith')
    vote_event.no('Adam Smith')

    oi = OrganizationImporter('jid')
    oi.import_data([org.as_dict()])

    pi = PersonImporter('jid')
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter('jid', pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter('jid', oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter('jid', pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ['passage:bill']
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == 'yes'
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == 'John Smith':
            assert v.option == 'yes'
            assert v.voter == Person.objects.get(name='John Smith')
        else:
            assert v.option == 'no'
            assert v.voter == Person.objects.get(name='Adam Smith')
Beispiel #19
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        resp = self.get(vote_url)
        html = resp.text

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if resp.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return

        vote_count = doc.xpath(".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date()
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" % motion)
            return

        vote = VoteEvent(chamber='lower', start_date=date, motion_text=motion,
                         result='pass' if yeas > nays else 'fail',
                         classification='passage',
                         legislative_session=session, bill=bill_id,
                         bill_chamber=chamber)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        yield vote
Beispiel #20
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = (
            "http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
            "GetRollCalls?billNumber=%s&biennium=%s" % (bill_num, self.biennium)
        )
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {"House": "lower", "Senate": "upper"}[agency]

            vote = Vote(
                chamber=chamber,
                start_date=date,
                motion_text="{} (#{})".format(motion, seq_no),
                result="pass" if yes_count > (no_count + other_count) else "fail",
                classification="other",
                bill=bill,
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("other", other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == "Yea":
                    vote.yes(name)
                elif vtype == "Nay":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
    def handle_page(self):
        (date, ) = self.doc.xpath('//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()')
        date = datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'
                                          ).isoformat().replace('T', ' ')

        totals = self.doc.xpath('//table//table')[-1].text_content()
        totals = re.sub(r'(?mu)\s+', " ", totals).strip()
        (yes_count, no_count, other_count) = [int(x) for x in re.search(
            r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+'
            'Total Missed:\s+(\d+)', totals).groups()]
        result = 'pass' if yes_count > no_count else 'fail'

        (committee, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()')
        (action, ) = self.doc.xpath('//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(start_date=date,
                         bill=self.kwargs['bill'],
                         chamber='lower',
                         motion_text=motion,
                         result=result,
                         classification='committee',
                         )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath('//table//table//table//td'):
            if not member_vote.text_content().strip():
                continue

            (member, ) = member_vote.xpath('span[2]//text()')
            (member_vote, ) = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError("Unknown vote type found: {}".format(member_vote))

        yield vote
Beispiel #22
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath(".//span")
        motion = row.text.replace(u"\u00a0", " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = (
            spans[0].text_content().rsplit("-", 3))
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(("Absent", "Excused")):
                other_votes += self.get_names(span.tail)
        for key, val in {
                "adopted": "pass",
                "passed": "pass",
                "failed": "fail"
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            bill=bill_id,
            bill_chamber=bill_chamber,
            result=passed,
            classification="passage",
            legislative_session=session,
        )
        vote.add_source(source)
        vote.set_count("yes", int(yes_count))
        vote.set_count("no", int(no_count))
        vote.set_count("absent", int(other_count))
        for name in yes_votes:
            if name and name != "None":
                vote.yes(name)
        for name in no_votes:
            if name and name != "None":
                vote.no(name)
        for name in other_votes:
            if name and name != "None":
                vote.vote("absent", name)
        yield vote
Beispiel #23
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile('YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
                             '(.*)ABSENT( OR NOT VOTING)? -?\s?'
                             '(\d+)(.*)',
                             re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
        else:
            vote_chamber = ''

        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    identifier=str(uniqid),
                    classification='passage',
                    bill=bill)
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        yes_votes = re.split('\s{2,}', match.group(2).strip())
        no_votes = re.split('\s{2,}', match.group(4).strip())
        other_votes = re.split('\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote('other', other)

        yield vote
Beispiel #24
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
            r'(.*)ABSENT( OR NOT VOTING)? -?\s?'
            r'(\d+)(.*)', re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
        else:
            vote_chamber = ''

        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    identifier=str(uniqid),
                    classification='passage',
                    bill=bill)
        vote.add_source(url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        yes_votes = re.split(r'\s{2,}', match.group(2).strip())
        no_votes = re.split(r'\s{2,}', match.group(4).strip())
        other_votes = re.split(r'\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote('other', other)

        yield vote
Beispiel #25
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = ("http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
               "GetRollCalls?billNumber=%s&biennium=%s" % (
                   bill_num, self.biennium))
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(
                xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(
                xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {'House': 'lower', 'Senate': 'upper'}[agency]

            vote = Vote(chamber=chamber, start_date=date,
                        motion_text='{} (#{})'.format(motion, seq_no),
                        result='pass' if yes_count > (no_count + other_count) else 'fail',
                        classification='other', bill=bill)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == 'Yea':
                    vote.yes(name)
                elif vtype == 'Nay':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            yield vote
Beispiel #26
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = ("http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
               "GetRollCalls?billNumber=%s&biennium=%s" %
               (bill_num, self.biennium))
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {'House': 'lower', 'Senate': 'upper'}[agency]

            vote = Vote(chamber=chamber,
                        start_date=date,
                        motion_text=motion,
                        result='pass' if yes_count >
                        (no_count + other_count) else 'fail',
                        classification='other',
                        bill=bill)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('other', other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == 'Yea':
                    vote.yes(name)
                elif vtype == 'Nay':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            yield vote
Beispiel #27
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content(
        ).rsplit('-', 3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {
                'adopted': 'pass',
                'passed': 'pass',
                'failed': 'fail'
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(chamber=actor,
                         start_date=date,
                         motion_text=motion,
                         bill=bill_id,
                         bill_chamber=bill_chamber,
                         result=passed,
                         classification="passage",
                         legislative_session=session)
        vote.add_source(source)
        vote.set_count('yes', int(yes_count))
        vote.set_count('no', int(no_count))
        vote.set_count('absent', int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.vote('absent', name)
        yield vote
def test_full_vote_event():
    j = Jurisdiction.objects.create(id='jid', division_id='did')
    j.legislative_sessions.create(name='1900', identifier='1900')
    sp1 = ScrapePerson('John Smith', primary_org='lower')
    sp2 = ScrapePerson('Adam Smith', primary_org='lower')
    org = ScrapeOrganization(name='House', classification='lower')
    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org._id)
    vote_event = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
                                 start_date='1900-04-01', classification='passage:bill',
                                 result='pass', bill_chamber='lower', bill='HB 1',
                                 organization=org._id)
    vote_event.set_count('yes', 20)
    vote_event.yes('John Smith')
    vote_event.no('Adam Smith')

    oi = OrganizationImporter('jid')
    oi.import_data([org.as_dict()])

    pi = PersonImporter('jid')
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter('jid', pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter('jid', oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter('jid', pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ['passage:bill']
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == 'yes'
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == 'John Smith':
            assert v.option == 'yes'
            assert v.voter == Person.objects.get(name='John Smith')
        else:
            assert v.option == 'no'
            assert v.voter == Person.objects.get(name='Adam Smith')
Beispiel #29
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber, source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-', 3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {'adopted': 'pass', 'passed': 'pass', 'failed': 'fail'}.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(chamber=actor,
                         start_date=date,
                         motion_text=motion,
                         bill=bill_id,
                         bill_chamber=bill_chamber,
                         result=passed,
                         classification="passage",
                         legislative_session=session)
        vote.add_source(source)
        vote.set_count('yes', int(yes_count))
        vote.set_count('no', int(no_count))
        vote.set_count('absent', int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.vote('absent', name)
        yield vote
Beispiel #30
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result='pass' if self.passed() else 'fail',
            classification='passage',
            bill=self.bill,
        )
        v.set_count('yes', self.yes_count())
        v.set_count('no', self.no_count())
        v.set_count('other', self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote('other', voter)
        v.add_source(self.url)
        return v
Beispiel #31
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result='pass' if self.passed() else 'fail',
            classification='passage',
            bill=self.bill,
        )
        v.set_count('yes', self.yes_count())
        v.set_count('no', self.no_count())
        v.set_count('other', self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote('other', voter)
        v.add_source(self.url)
        return v
Beispiel #32
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result="pass" if self.passed() else "fail",
            classification="passage",
            bill=self.bill,
        )
        v.pupa_id = self.url  # URL contains sequence number
        v.set_count("yes", self.yes_count())
        v.set_count("no", self.no_count())
        v.set_count("other", self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote("other", voter)
        v.add_source(self.url)
        return v
Beispiel #33
0
    def scrape_bill(self, chamber, session, bill_id):
        # try and get bill for the first year of the session biennium
        url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
            session[:4], bill_id.replace(' ', '-'))
        html = self.get(url).text
        # Otherwise, try second year of the session biennium
        if ('Page Not Found' in html or
                'The bill you are looking for is not available yet' in html):
            url = 'http://legislature.mi.gov/doc.aspx?%s-%s' % (
                session[-4:], bill_id.replace(' ', '-'))
            html = self.get(url).text
            if ('Page Not Found' in html or
                    'The bill you are looking for is not available yet' in html):
                self.warning("Cannot open bill page for {}; skipping".format(bill_id))
                return

        doc = lxml.html.fromstring(html)
        doc.make_links_absolute('http://legislature.mi.gov')

        title = doc.xpath('//span[@id="frg_billstatus_ObjectSubject"]')[0].text_content()

        # get B/R/JR/CR part and look up bill type
        bill_type = bill_types[bill_id.split(' ')[0][1:]]

        bill = Bill(bill_id, session, title, chamber=chamber,
                    classification=bill_type)
        bill.add_source(url)

        # sponsors
        sponsors = doc.xpath('//span[@id="frg_billstatus_SponsorList"]/a')
        for sponsor in sponsors:
            name = sponsor.text.replace(u'\xa0', ' ')
            # sometimes district gets added as a link
            if name.isnumeric():
                continue

            if len(sponsors) > 1:
                classification = (
                    'primary'
                    if sponsor.tail and 'primary' in sponsor.tail
                    else 'cosponsor'
                )
            else:
                classification = 'primary'
            bill.add_sponsorship(
                name=name,
                chamber=chamber,
                entity_type='person',
                primary=classification == 'primary',
                classification=classification,
            )

        bill.subject = doc.xpath('//span[@id="frg_billstatus_CategoryList"]/a/text()')

        # actions (skip header)
        for row in doc.xpath('//table[@id="frg_billstatus_HistoriesGridView"]/tr')[1:]:
            tds = row.xpath('td')  # date, journal link, action
            date = tds[0].text_content()
            journal = tds[1].text_content()
            action = tds[2].text_content()
            date = TIMEZONE.localize(datetime.datetime.strptime(date, "%m/%d/%Y"))
            # instead of trusting upper/lower case, use journal for actor
            actor = 'upper' if 'SJ' in journal else 'lower'
            classification = categorize_action(action)
            bill.add_action(action, date, chamber=actor, classification=classification)

            # check if action mentions a sub
            submatch = re.search(r'WITH SUBSTITUTE\s+([\w\-\d]+)', action, re.IGNORECASE)
            if submatch and tds[2].xpath('a'):
                version_url = tds[2].xpath('a/@href')[0]
                version_name = tds[2].xpath('a/text()')[0].strip()
                version_name = 'Substitute {}'.format(version_name)
                self.info("Found Substitute {}".format(version_url))
                if version_url.lower().endswith('.pdf'):
                    mimetype = 'application/pdf'
                elif version_url.lower().endswith('.htm'):
                    mimetype = 'text/html'
                bill.add_version_link(version_name, version_url, media_type=mimetype)

            # check if action mentions a vote
            rcmatch = re.search(r'Roll Call # (\d+)', action, re.IGNORECASE)
            if rcmatch:
                rc_num = rcmatch.groups()[0]
                # in format mileg.aspx?page=getobject&objectname=2011-SJ-02-10-011
                journal_link = tds[1].xpath('a/@href')
                if journal_link:
                    objectname = journal_link[0].rsplit('=', 1)[-1]
                    chamber_name = {'upper': 'Senate', 'lower': 'House'}[actor]
                    vote_url = BASE_URL + '/documents/%s/Journal/%s/htm/%s.htm' % (
                        session, chamber_name, objectname)
                    results = self.parse_roll_call(vote_url, rc_num)
                    vote = VoteEvent(
                        start_date=date,
                        chamber=actor,
                        bill=bill,
                        motion_text=action,
                        result='pass' if len(results['yes']) > len(results['no']) else 'fail',
                        classification='passage',
                    )

                    # check the expected counts vs actual
                    count = re.search(r'YEAS (\d+)', action, re.IGNORECASE)
                    count = int(count.groups()[0]) if count else 0
                    if count != len(results['yes']):
                        self.warning('vote count mismatch for %s %s, %d != %d' %
                                     (bill_id, action, count, len(results['yes'])))
                    count = re.search(r'NAYS (\d+)', action, re.IGNORECASE)
                    count = int(count.groups()[0]) if count else 0
                    if count != len(results['no']):
                        self.warning('vote count mismatch for %s %s, %d != %d' %
                                     (bill_id, action, count, len(results['no'])))

                    vote.set_count('yes', len(results['yes']))
                    vote.set_count('no', len(results['no']))
                    vote.set_count('other', len(results['other']))

                    for name in results['yes']:
                        vote.yes(name)
                    for name in results['no']:
                        vote.no(name)
                    for name in results['other']:
                        vote.vote('other', name)

                    vote.add_source(vote_url)
                    yield vote
                else:
                    self.warning("missing journal link for %s %s" %
                                 (bill_id, journal))

        # versions
        for row in doc.xpath('//table[@id="frg_billstatus_DocumentGridTable"]/tr'):
            parsed = self.parse_doc_row(row)
            if parsed:
                name, url = parsed
                if url.endswith('.pdf'):
                    mimetype = 'application/pdf'
                elif url.endswith('.htm'):
                    mimetype = 'text/html'
                bill.add_version_link(name, url, media_type=mimetype)

        # documents
        for row in doc.xpath('//table[@id="frg_billstatus_HlaTable"]/tr'):
            document = self.parse_doc_row(row)
            if document:
                name, url = document
                bill.add_document_link(name, url)
        for row in doc.xpath('//table[@id="frg_billstatus_SfaTable"]/tr'):
            document = self.parse_doc_row(row)
            if document:
                name, url = document
                bill.add_document_link(name, url)

        yield bill
Beispiel #34
0
    def scrape_votes(self, bill, url):
        page = lxml.html.fromstring(self.get(url).text.replace(u'\xa0', ' '))

        seen_rcs = set()

        re_ns = "http://exslt.org/regular-expressions"
        path = "//p[re:test(text(), 'OKLAHOMA\s+(HOUSE|STATE\s+SENATE)')]"
        for header in page.xpath(path, namespaces={'re': re_ns}):
            bad_vote = False
            # Each chamber has the motion name on a different line of the file
            if 'HOUSE' in header.xpath("string()"):
                chamber = 'lower'
                motion_index = 8
            else:
                chamber = 'upper'
                motion_index = 13

            motion = header.xpath(
                "string(following-sibling::p[%d])" % motion_index).strip()
            motion = re.sub(r'\s+', ' ', motion)
            if not motion.strip():
                self.warning("Motion text not found")
                return
            match = re.match(r'^(.*) (PASSED|FAILED)$', motion)
            if match:
                motion = match.group(1)
                passed = match.group(2) == 'PASSED'
            else:
                passed = None

            rcs_p = header.xpath(
                "following-sibling::p[contains(., 'RCS#')]")[0]
            rcs_line = rcs_p.xpath("string()").replace(u'\xa0', ' ')
            rcs = re.search(r'RCS#\s+(\d+)', rcs_line).group(1)

            if rcs in seen_rcs:
                continue
            else:
                seen_rcs.add(rcs)

            date_line = rcs_p.getnext().xpath("string()")
            date = re.search(r'\d+/\d+/\d+', date_line).group(0)
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            vtype = None
            counts = collections.defaultdict(int)
            votes = collections.defaultdict(list)

            seen_yes = False

            for sib in header.xpath("following-sibling::p")[13:]:
                line = sib.xpath("string()").replace('\r\n', ' ').strip()
                if "*****" in line:
                    break
                regex = (r'(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL '
                         'PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)(.*)')
                match = re.match(regex, line)
                if match:
                    if match.group(1) == 'YEAS' and 'RCS#' not in line:
                        vtype = 'yes'
                        seen_yes = True
                    elif match.group(1) == 'NAYS' and seen_yes:
                        vtype = 'no'
                    elif match.group(1) == 'VACANT':
                        continue  # skip these
                    elif seen_yes:
                        vtype = 'other'
                    if seen_yes and match.group(3).strip():
                        self.warning("Bad vote format, skipping.")
                        bad_vote = True
                    counts[vtype] += int(match.group(2))
                elif seen_yes:
                    for name in line.split('   '):
                        if not name:
                            continue
                        if 'HOUSE' in name or 'SENATE ' in name:
                            continue
                        votes[vtype].append(name.strip())

            if bad_vote:
                continue

            if passed is None:
                passed = counts['yes'] > (counts['no'] + counts['other'])

            vote = Vote(chamber=chamber,
                        start_date=date.strftime('%Y-%m-%d'),
                        motion_text=motion,
                        result='pass' if passed else 'fail',
                        bill=bill,
                        classification='passage')
            vote.set_count('yes', counts['yes'])
            vote.set_count('no', counts['no'])
            vote.set_count('other', counts['other'])
            vote.pupa_id = url + '#' + rcs

            vote.add_source(url)

            for name in votes['yes']:
                vote.yes(name)
            for name in votes['no']:
                if ':' in name:
                    raise Exception(name)
                vote.no(name)
            for name in votes['other']:
                vote.vote('other', name)

            yield vote
Beispiel #35
0
    def process_vote(self, votes, url, base_url, bill, legislators, chamber_dict, vote_results):
        for v in votes["items"]:
            try:
                v["yeas"]
            except KeyError:
                # sometimes the actual vote is buried a second layer deep
                v = self.get(base_url+v["link"]).json()
                try:
                    v["yeas"]
                except KeyError:
                    self.logger.warning("No vote info available, skipping")
                    continue

            try:
                chamber = chamber_dict[v["chamber"]]
            except KeyError:
                chamber = "lower" if "house" in v["apn"] else "upper"
            try:
                date = self._tz.localize(datetime.datetime.strptime(v["date"], "%m/%d/%y"))
                date = "{:%Y-%m-%d}".format(date)
            except KeyError:
                try:
                    date = self._tz.localize(datetime.datetime.strptime(v["occurred"], "%m/%d/%y"))
                    date = "{:%Y-%m-%d}".format(date)
                except KeyError:
                    self.logger.warning("No date found for vote, skipping")
                    continue
            try:
                motion = v["action"]
            except KeyError:
                motion = v["motiontype"]

            # Sometimes Ohio's SOLAR will only return part of the JSON, so in that case skip
            if (not motion and isinstance(v['yeas'], str)
               and isinstance(v['nays'], str)):
                waringText = 'Malformed JSON found for vote ("revno" of {}); skipping'
                self.warning(waringText.format(v['revno']))
                continue

            result = v.get("results") or v.get("passed")
            if result is None:
                if len(v['yeas']) > len(v['nays']):
                    result = "passed"
                else:
                    result = "failed"

            passed = vote_results[result.lower()]
            if "committee" in v:
                vote = VoteEvent(chamber=chamber,
                                 start_date=date,
                                 motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 # organization=v["committee"],
                                 bill=bill,
                                 classification='passed'
                                 )
            else:
                vote = VoteEvent(chamber=chamber,
                                 start_date=date,
                                 motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 classification='passed',
                                 bill=bill
                                 )
            vote.pupa_id = str(v['revno'])
            # the yea and nay counts are not displayed, but vote totals are
            # and passage status is.
            yes_count = 0
            no_count = 0
            absent_count = 0
            excused_count = 0
            for voter_id in v["yeas"]:
                vote.yes(legislators[voter_id])
                yes_count += 1
            for voter_id in v["nays"]:
                vote.no(legislators[voter_id])
                no_count += 1
            if "absent" in v:
                for voter_id in v["absent"]:
                    vote.vote('absent', legislators[voter_id])
                    absent_count += 1
            if "excused" in v:
                for voter_id in v["excused"]:
                    vote.vote('excused', legislators[voter_id])
                    excused_count += 1

            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('absent', absent_count)
            vote.set_count('excused', excused_count)
            # check to see if there are any other things that look
            # like vote categories, throw a warning if so
            for key, val in v.items():
                if (type(val) == list and len(val) > 0 and
                   key not in ["yeas", "nays", "absent", "excused"]):
                    if val[0] in legislators:
                        self.logger.warning("{k} looks like a vote type that's not being counted."
                                            " Double check it?".format(k=key))
            vote.add_source(url)

            yield vote
Beispiel #36
0
    def scrape_pdf_for_votes(self, session, actor, date, motion, href):
        warned = False
        # vote indicator, a few spaces, a name, newline or multiple spaces
        # VOTE_RE = re.compile('(Y|N|E|NV|A|P|-)\s{2,5}(\w.+?)(?:\n|\s{2})')
        COUNT_RE = re.compile(
            r'^(\d+)\s+YEAS?\s+(\d+)\s+NAYS?\s+(\d+)\s+PRESENT(?:\s+(\d+)\s+NOT\sVOTING)?\s*$'
        )
        PASS_FAIL_WORDS = {
            'PASSED': 'pass',
            'PREVAILED': 'fail',
            'ADOPTED': 'pass',
            'CONCURRED': 'pass',
            'FAILED': 'fail',
            'LOST': 'fail',
        }

        pdflines = self.fetch_pdf_lines(href)

        if not pdflines:
            return False

        yes_count = no_count = present_count = 0
        yes_votes = []
        no_votes = []
        present_votes = []
        excused_votes = []
        not_voting = []
        absent_votes = []
        passed = None
        counts_found = False
        vote_lines = []
        for line in pdflines:
            # consider pass/fail as a document property instead of a result of the vote count
            # extract the vote count from the document instead of just using counts of names
            if not line.strip():
                continue
            elif line.strip() in PASS_FAIL_WORDS:
                # Crash on duplicate pass/fail status that differs from previous status
                if passed is not None and passed != PASS_FAIL_WORDS[line.strip()]:
                    raise Exception("Duplicate pass/fail matches in [%s]" % href)
                passed = PASS_FAIL_WORDS[line.strip()]
            elif COUNT_RE.match(line):
                (yes_count, no_count, present_count,
                 not_voting_count) = COUNT_RE.match(line).groups()
                yes_count = int(yes_count)
                no_count = int(no_count)
                present_count = int(present_count)
                counts_found = True
            elif counts_found:
                for value in VOTE_VALUES:
                    if re.search(r'^\s*({})\s+\w'.format(value), line):
                        vote_lines.append(line)
                        break

        votes = find_columns_and_parse(vote_lines)
        for name, vcode in votes.items():
            if name == 'Mr. Speaker':
                name = session_details[session]['speaker']
            elif name == 'Mr. President':
                name = session_details[session]['president']
            else:
                # Converts "Davis,William" to "Davis, William".
                name = re.sub(r'\,([a-zA-Z])', r', \1', name)

            if vcode == 'Y':
                yes_votes.append(name)
            elif vcode == 'N':
                no_votes.append(name)
            elif vcode == 'P':
                present_votes.append(name)
            elif vcode == 'E':
                excused_votes.append(name)
            elif vcode == 'NV':
                not_voting.append(name)
            elif vcode == 'A':
                absent_votes.append(name)

        # fake the counts
        if yes_count == 0 and no_count == 0 and present_count == 0:
            yes_count = len(yes_votes)
            no_count = len(no_votes)
        else:  # audit
            if yes_count != len(yes_votes):
                self.warning("Mismatched yes count [expect: %i] [have: %i]" %
                             (yes_count, len(yes_votes)))
                warned = True
            if no_count != len(no_votes):
                self.warning("Mismatched no count [expect: %i] [have: %i]" %
                             (no_count, len(no_votes)))
                warned = True

        if passed is None:
            if actor['classification'] == 'lower':  # senate doesn't have these lines
                self.warning("No pass/fail word found; fall back to comparing yes and no vote.")
                warned = True
            passed = 'pass' if yes_count > no_count else 'fail'

        classification, _ = _categorize_action(motion)
        vote_event = VoteEvent(legislative_session=session,
                               motion_text=motion,
                               classification=classification,
                               organization=actor,
                               start_date=date,
                               result=passed)
        for name in yes_votes:
            vote_event.yes(name)
        for name in no_votes:
            vote_event.no(name)
        for name in present_votes:
            vote_event.vote('other', name)
        for name in excused_votes:
            vote_event.vote('excused', name)
        for name in not_voting:
            vote_event.vote('not voting', name)
        for name in absent_votes:
            vote_event.vote('absent', name)

        vote_event.set_count('yes', yes_count)
        vote_event.set_count('no', no_count)
        vote_event.set_count('other', present_count)
        vote_event.set_count('excused', len(excused_votes))
        vote_event.set_count('absent', len(absent_votes))
        vote_event.set_count('not voting', len(not_voting))

        vote_event.add_source(href)

        # for distinguishing between votes with the same id and on same day
        vote_event.pupa_id = href

        if warned:
            self.warning("Warnings were issued. Best to check %s" % href)
        return vote_event
Beispiel #37
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/H" in url:
            vote_chamber = 'lower'
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = 'upper'
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if 'BUDGET ADDRESS' in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1))

        other_count = page.xpath(
            "string(//span[contains(., 'Those absent')])")
        other_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1))

        need_count = page.xpath(
            "string(//span[contains(., 'Necessary for')])")
        need_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1)
        date = date.replace(' ', '')
        date = datetime.datetime.strptime(date + " " + bill.legislative_session,
                                          "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=name,
                    result='pass' if yes_count > need_count else 'fail',
                    classification='passage',
                    bill=bill
                    )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)

        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (
                    i + name_offset)).strip()

                if not name or name == 'VACANT':
                    continue

                if "Y" in row.xpath("string(td[%d])" %
                                    (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" %
                                      (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote('other', name)

        yield vote
Beispiel #38
0
    def scrape_vote(self, bill, date, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        header = page.xpath("string(//h3[contains(@id, 'hdVote')])")

        if 'No Bill Action' in header:
            self.warning("bad vote header -- skipping")
            return
        location = header.split(', ')[1]

        if location.startswith('House'):
            chamber = 'lower'
        elif location.startswith('Senate'):
            chamber = 'upper'
        elif location.startswith('Joint'):
            chamber = 'legislature'
        else:
            raise ScrapeError("Bad chamber: %s" % location)

        motion = ', '.join(header.split(', ')[2:]).strip()
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = int(
                page.xpath("string(//span[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//span[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//span[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//span[contains(@id, 'tdAbsent')])"))

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = VoteEvent(chamber=chamber,
                             start_date=date,
                             motion_text=motion,
                             result='pass' if passed else 'fail',
                             classification=type,
                             bill=bill)
            # The vote page URL has a unique ID
            # However, some votes are "consent calendar" events,
            # and relate to the passage of _multiple_ bills
            # These can't be modeled yet in Pupa, but for now we can
            # append a bill ID to the URL that forms the `pupa_id`
            # https://github.com/opencivicdata/pupa/issues/308
            vote.pupa_id = '{}#{}'.format(url,
                                          bill.identifier.replace(' ', ''))

            vote.add_source(url)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('excused', excused_count)
            vote.set_count('absent', absent_count)

            for td in page.xpath("//table[@id='tblVoteTotals']/tbody/tr/td"):
                option_or_person = td.text.strip()
                if option_or_person in ('Aye', 'Yea'):
                    vote.yes(td.getprevious().text.strip())
                elif option_or_person == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif option_or_person == 'Excused':
                    vote.vote('excused', td.getprevious().text.strip())
                elif option_or_person == 'Absent':
                    vote.vote('absent', td.getprevious().text.strip())

            yield vote
Beispiel #39
0
    def process_vote(self, vote, bill, member_ids):
        try:
            motion = vote["ReadingDescription"]
        except KeyError:
            self.logger.warning("Can't even figure out what we're voting on. Skipping.")
            return

        if "VoteResult" not in vote:
            if "postponed" in motion.lower():
                result = "Postponed"
                status = True   # because we're talking abtout the motion, not the amendment
            elif "tabled" in motion.lower():
                result = "Tabled"
                status = True
            else:
                self.logger.warning("Could not find result of vote, skipping.")
                return
        else:

            result = vote["VoteResult"].strip().lower()
            statuses = {"approved": 'pass',
                        "disapproved": 'fail',
                        "failed": 'fail',
                        "declined": 'fail',
                        "passed": 'pass'}

            try:
                status = statuses[result]
            except KeyError:
                self.logger.warning("Unexpected vote result '{result},' skipping vote.".format(
                    result=result)
                )
                return

        date = self.date_format(vote["DateOfVote"])

        leg_votes = vote["MemberVotes"]
        v = VoteEvent(chamber='legislature',
                      start_date=date,
                      motion_text=motion,
                      result=status,
                      classification='passage',
                      bill=bill
                      )
        yes_count = no_count = other_count = 0
        for leg_vote in leg_votes:
            mem_name = member_ids[int(leg_vote["MemberId"])]
            if leg_vote["Vote"] == "1":
                yes_count += 1
                v.yes(mem_name)
            elif leg_vote["Vote"] == "2":
                no_count += 1
                v.no(mem_name)
            else:
                other_count += 1
                v.vote('other', mem_name)

        v.set_count('yes', yes_count)
        v.set_count('no', no_count)
        v.set_count('other', other_count)

        # the documents for the readings are inside the vote
        # level in the json, so we'll deal with them here
        # and also add relevant actions

        if "amendment" in motion.lower():
            if status:
                t = "amendment-passage"
            elif result in ["Tabled", "Postponed"]:
                t = "amendment-deferral"
            else:
                t = "amendment-failure"
        elif "first reading" in motion.lower():
            t = "reading-1"
        elif "1st reading" in motion.lower():
            t = "reading-1"
        elif "second reading" in motion.lower():
            t = "reading-2"
        elif "2nd reading" in motion.lower():
            t = "reading-2"
        elif "third reading" in motion.lower():
            t = "reading-3"
        elif "3rd reading" in motion.lower():
            t = "reading-3"
        elif "final reading" in motion.lower():
            t = "reading-3"
        elif result in ["Tabled", "Postponed"]:
            t = None
        else:
            t = None

        bill.add_action(motion, date, classification=t)

        if "amendment" in t:
            vote["type"] = "amendment"
        elif "reading" in t:
            vote["type"] = t.replace("bill:", "")

        # some documents/versions are hiding in votes.
        if "AttachmentPath" in vote:
            is_version = False
            try:
                if vote["DocumentType"] in ["enrollment", "engrossment", "introduction"]:
                    is_version = True
            except KeyError:
                pass

            if motion in ["enrollment", "engrossment", "introduction"]:
                is_version = True

            self.add_documents(vote["AttachmentPath"], bill, is_version)

        return v
Beispiel #40
0
    def process_vote(self, votes, url, base_url, bill, legislators, chamber_dict, vote_results):
        for v in votes["items"]:
            try:
                v["yeas"]
            except KeyError:
                # sometimes the actual vote is buried a second layer deep
                v = self.get(base_url+v["link"]).json()
                try:
                    v["yeas"]
                except KeyError:
                    self.logger.warning("No vote info available, skipping")
                    continue

            try:
                chamber = chamber_dict[v["chamber"]]
            except KeyError:
                chamber = "lower" if "house" in v["apn"] else "upper"
            try:
                date = self._tz.localize(datetime.datetime.strptime(v["date"], "%m/%d/%y"))
                date = "{:%Y-%m-%d}".format(date)
            except KeyError:
                try:
                    date = self._tz.localize(datetime.datetime.strptime(v["occurred"], "%m/%d/%y"))
                    date = "{:%Y-%m-%d}".format(date)
                except KeyError:
                    self.logger.warning("No date found for vote, skipping")
                    continue
            try:
                motion = v["action"]
            except KeyError:
                motion = v["motiontype"]

            # Sometimes Ohio's SOLAR will only return part of the JSON, so in that case skip
            if (not motion and isinstance(v['yeas'], str)
               and isinstance(v['nays'], str)):
                waringText = 'Malformed JSON found for vote ("revno" of {}); skipping'
                self.warning(waringText.format(v['revno']))
                continue

            result = v.get("results") or v.get("passed")
            if result is None:
                if len(v['yeas']) > len(v['nays']):
                    result = "passed"
                else:
                    result = "failed"

            passed = vote_results[result.lower()]
            if "committee" in v:
                vote = VoteEvent(chamber=chamber,
                                 start_date=date,
                                 motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 # organization=v["committee"],
                                 bill=bill,
                                 classification='passed'
                                 )
            else:
                vote = VoteEvent(chamber=chamber,
                                 start_date=date,
                                 motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 classification='passed',
                                 bill=bill
                                 )
            # Concatenate the bill identifier and vote identifier to avoid collisions
            vote.pupa_id = '{}:{}'.format(bill.identifier.replace(' ', ''), v['revno'])
            # the yea and nay counts are not displayed, but vote totals are
            # and passage status is.
            yes_count = 0
            no_count = 0
            absent_count = 0
            excused_count = 0
            for voter_id in v["yeas"]:
                vote.yes(legislators[voter_id])
                yes_count += 1
            for voter_id in v["nays"]:
                vote.no(legislators[voter_id])
                no_count += 1
            if "absent" in v:
                for voter_id in v["absent"]:
                    vote.vote('absent', legislators[voter_id])
                    absent_count += 1
            if "excused" in v:
                for voter_id in v["excused"]:
                    vote.vote('excused', legislators[voter_id])
                    excused_count += 1

            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('absent', absent_count)
            vote.set_count('excused', excused_count)
            # check to see if there are any other things that look
            # like vote categories, throw a warning if so
            for key, val in v.items():
                if (type(val) == list and len(val) > 0 and
                   key not in ["yeas", "nays", "absent", "excused"]):
                    if val[0] in legislators:
                        self.logger.warning("{k} looks like a vote type that's not being counted."
                                            " Double check it?".format(k=key))
            vote.add_source(url)

            yield vote
Beispiel #41
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ('http://archives.legislature.state.oh.us/bills.cfm?ID=' +
                    session + '_' + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(datetime.datetime.strptime(jlink.text,
                                                                "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == 'House':
                chamber = 'lower'
            elif chamber == 'Senate':
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                        chamber=chamber,
                        start_date=date,
                        motion_text=motion,
                        result='pass' if yes_count > no_count else 'fail',
                        bill=bill,
                        classification='passed'
                   )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
Beispiel #42
0
    def scrape_vote(self, bill, date, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

        if 'No Bill Action' in header:
            self.warning("bad vote header -- skipping")
            return
        location = header.split(', ')[1]

        if location.startswith('House'):
            chamber = 'lower'
        elif location.startswith('Senate'):
            chamber = 'upper'
        elif location.startswith('Joint'):
            chamber = 'legislature'
        else:
            raise ScrapeError("Bad chamber: %s" % location)

        # committee = ' '.join(location.split(' ')[1:]).strip()
        # if not committee or committee.startswith('of Representatives'):
        #     committee = None

        motion = ', '.join(header.split(', ')[2:]).strip()
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = int(
                page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//td[contains(@id, 'tdAbsent')])"))

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = VoteEvent(chamber=chamber,
                             start_date=date,
                             motion_text=motion,
                             result='pass' if passed else 'fail',
                             classification=type,
                             bill=bill
                             )
            vote.pupa_id = url      # vote id is in URL

            vote.add_source(url)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('excused', excused_count)
            vote.set_count('absent', absent_count)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text in ('Aye', 'Yea'):
                    vote.yes(td.getprevious().text.strip())
                elif td.text == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif td.text == 'Excused':
                    vote.vote('excused', td.getprevious().text.strip())
                elif td.text == 'Absent':
                    vote.vote('absent', td.getprevious().text.strip())

            yield vote
Beispiel #43
0
    def scrape_votes(self, bill_page, page_url, bill, insert, year):
        root = lxml.html.fromstring(bill_page)
        trs = root.xpath('/html/body/div/table[6]//tr')
        assert len(trs) >= 1, "Didn't find the Final Passage Votes' table"

        for tr in trs[1:]:
            links = tr.xpath('td/a[contains(text(), "Passage")]')
            if len(links) == 0:
                self.warning("Non-passage vote found for {}; ".format(bill.identifier) +
                             "probably a motion for the calendar. It will be skipped.")
            else:
                assert len(links) == 1, \
                    "Too many votes found for XPath query, on bill {}".format(bill.identifier)
                link = links[0]

            motion = link.text
            if 'Assembly' in motion:
                chamber = 'lower'
            else:
                chamber = 'upper'

            votes = {}
            tds = tr.xpath('td')
            for td in tds:
                if td.text:
                    text = td.text.strip()
                    date = re.match('... .*?, ....', text)
                    count = re.match('(?P<category>.*?) (?P<votes>[0-9]+)[,]?', text)
                    if date:
                        vote_date = datetime.strptime(text, '%b %d, %Y')
                    elif count:
                        votes[count.group('category')] = int(count.group('votes'))

            yes = votes['Yea']
            no = votes['Nay']
            excused = votes['Excused']
            not_voting = votes['Not Voting']
            absent = votes['Absent']
            other = excused + not_voting + absent
            passed = yes > no

            vote = VoteEvent(chamber=chamber, start_date=self._tz.localize(vote_date),
                             motion_text=motion, result='pass' if passed else 'fail',
                             classification='passage', bill=bill,
                             )
            vote.set_count('yes', yes)
            vote.set_count('no', no)
            vote.set_count('other', other)
            vote.set_count('not voting', not_voting)
            vote.set_count('absent', absent)
            # try to get vote details
            try:
                vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (
                    insert, link.get('href'))
                vote.pupa_id = vote_url
                vote.add_source(vote_url)

                if vote_url in self._seen_votes:
                    self.warning('%s is included twice, skipping second', vote_url)
                    continue
                else:
                    self._seen_votes.add(vote_url)

                page = self.get(vote_url).text
                page = page.replace(u"\xa0", " ")
                root = lxml.html.fromstring(page)

                for el in root.xpath('//table[2]/tr'):
                    tds = el.xpath('td')
                    name = tds[1].text_content().strip()
                    vote_result = tds[2].text_content().strip()

                    if vote_result == 'Yea':
                        vote.yes(name)
                    elif vote_result == 'Nay':
                        vote.no(name)
                    else:
                        vote.vote('other', name)
                vote.add_source(page_url)
            except scrapelib.HTTPError:
                self.warning("failed to fetch vote page, adding vote without details")

            yield vote
Beispiel #44
0
    def _parse_votes(self, url, vote, bill):
        '''Given a vote url and a vote object, extract the voters and
        the vote counts from the vote page and update the vote object.
        '''
        if url.lower().endswith('.pdf'):

            try:
                resp = self.get(url)
            except HTTPError:
                # This vote document wasn't found.
                msg = 'No document found at url %r' % url
                self.logger.warning(msg)
                return

            try:
                v = PDFCommitteeVote(url, resp.content, bill)
                return v.asvote()
            except PDFCommitteeVoteParseError:
                # Warn and skip.
                self.warning("Could't parse committee vote at %r" % url)
                return

        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        # Yes, no, excused, absent.
        try:
            vals = doc.xpath('//table')[1].xpath('tr/td/text()')
        except IndexError:
            # Most likely was a bogus link lacking vote data.
            return

        yes_count, no_count, excused_count, absent_count = map(int, vals)

        # Get the motion.
        try:
            motion = doc.xpath('//br')[-1].tail.strip()
        except IndexError:
            # Some of them mysteriously have no motion listed.
            motion = vote['action']

        if not motion:
            motion = vote['action']

        vote['motion'] = motion

        action = vote['action']
        vote_url = vote['vote_url']

        vote = VoteEvent(
            chamber=vote['chamber'],
            start_date=vote['date'],
            motion_text=vote['motion'],
            result='fail',      # placeholder
            classification='passage',
            bill=bill,
            bill_action=vote['action'],
        )
        vote.pupa_id = vote_url         # URL contains sequence number
        vote.add_source(vote_url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('excused', excused_count)
        vote.set_count('absent', absent_count)

        for text in doc.xpath('//table')[2].xpath('tr/td/text()'):
            if not text.strip(u'\xa0'):
                continue
            v, name = filter(None, text.split(u'\xa0'))
            # Considering Name is brackets as short name
            regex = re.compile(r".*?\((.*?)\)")
            short_name = re.findall(regex, name)
            if len(short_name) > 0:
                note = 'Short Name: ' + short_name[0]
            else:
                note = ''
            # Name without brackets like 'Kary, Douglas'
            name = re.sub(r"[\(\[].*?[\)\]]", "", name)
            if v == 'Y':
                vote.yes(name, note=note)
            elif v == 'N':
                vote.no(name, note=note)
            elif v == 'E':
                vote.vote('excused', name, note=note)
            elif v == 'A':
                vote.vote('absent', name, note=note)

        # code to deterimine value of `passed`
        passed = None

        # some actions take a super majority, so we aren't just
        # comparing the yeas and nays here.
        for i in vote_passage_indicators:
            if i in action:
                passed = True
                break
        for i in vote_failure_indicators:
            if i in action and passed:
                # a quick explanation:  originally an exception was
                # thrown if both passage and failure indicators were
                # present because I thought that would be a bug in my
                # lists.  Then I found 2007 HB 160.
                # Now passed = False if the nays outnumber the yays..
                # I won't automatically mark it as passed if the yays
                # ounumber the nays because I don't know what requires
                # a supermajority in MT.
                if no_count >= yes_count:
                    passed = False
                    break
                else:
                    raise Exception("passage and failure indicator"
                                    "both present at: %s" % url)
            if i in action and passed is None:
                passed = False
                break
        for i in vote_ambiguous_indicators:
            if i in action:
                passed = yes_count > no_count
                break
        if passed is None:
            raise Exception("Unknown passage at: %s" % url)

        vote.result = 'pass' if passed else 'fail'

        return vote
Beispiel #45
0
    def _parse_votes(self, url, vote, bill):
        '''Given a vote url and a vote object, extract the voters and
        the vote counts from the vote page and update the vote object.
        '''
        if url.lower().endswith('.pdf'):

            try:
                resp = self.get(url)
            except HTTPError:
                # This vote document wasn't found.
                msg = 'No document found at url %r' % url
                self.logger.warning(msg)
                return

            try:
                v = PDFCommitteeVote(url, resp.content, bill)
                return v.asvote()
            except PDFCommitteeVoteParseError:
                # Warn and skip.
                self.warning("Could't parse committee vote at %r" % url)
                return

        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        # Yes, no, excused, absent.
        try:
            vals = doc.xpath('//table')[1].xpath('tr/td/text()')
        except IndexError:
            # Most likely was a bogus link lacking vote data.
            return

        yes_count, no_count, excused_count, absent_count = map(int, vals)

        # Get the motion.
        try:
            motion = doc.xpath('//br')[-1].tail.strip()
        except IndexError:
            # Some of them mysteriously have no motion listed.
            motion = vote['action']

        if not motion:
            motion = vote['action']

        vote['motion'] = motion

        action = vote['action']
        vote_url = vote['vote_url']

        vote = VoteEvent(
            chamber=vote['chamber'],
            start_date=vote['date'],
            motion_text=vote['motion'],
            result='fail',      # placeholder
            classification='passage',
            bill=bill,
            bill_action=vote['action'],
        )
        vote.pupa_id = vote_url         # URL contains sequence number
        vote.add_source(vote_url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('excused', excused_count)
        vote.set_count('absent', absent_count)

        for text in doc.xpath('//table')[2].xpath('tr/td/text()'):
            if not text.strip(u'\xa0'):
                continue
            v, name = filter(None, text.split(u'\xa0'))
            # Considering Name is brackets as short name
            regex = re.compile(".*?\((.*?)\)")
            short_name = re.findall(regex, name)
            if len(short_name) > 0:
                note = 'Short Name: ' + short_name[0]
            else:
                note = ''
            # Name without brackets like 'Kary, Douglas'
            name = re.sub("[\(\[].*?[\)\]]", "", name)
            if v == 'Y':
                vote.yes(name, note=note)
            elif v == 'N':
                vote.no(name, note=note)
            elif v == 'E':
                vote.vote('excused', name, note=note)
            elif v == 'A':
                vote.vote('absent', name, note=note)

        # code to deterimine value of `passed`
        passed = None

        # some actions take a super majority, so we aren't just
        # comparing the yeas and nays here.
        for i in vote_passage_indicators:
            if i in action:
                passed = True
                break
        for i in vote_failure_indicators:
            if i in action and passed:
                # a quick explanation:  originally an exception was
                # thrown if both passage and failure indicators were
                # present because I thought that would be a bug in my
                # lists.  Then I found 2007 HB 160.
                # Now passed = False if the nays outnumber the yays..
                # I won't automatically mark it as passed if the yays
                # ounumber the nays because I don't know what requires
                # a supermajority in MT.
                if no_count >= yes_count:
                    passed = False
                    break
                else:
                    raise Exception("passage and failure indicator"
                                    "both present at: %s" % url)
            if i in action and passed is None:
                passed = False
                break
        for i in vote_ambiguous_indicators:
            if i in action:
                passed = yes_count > no_count
                break
        if passed is None:
            raise Exception("Unknown passage at: %s" % url)

        vote.result = 'pass' if passed else 'fail'

        return vote
Beispiel #46
0
    def scrape_vote(self, bill, date, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        header = page.xpath("string(//h3[contains(@id, 'hdVote')])")

        if 'No Bill Action' in header:
            self.warning("bad vote header -- skipping")
            return
        location = header.split(', ')[1]

        if location.startswith('House'):
            chamber = 'lower'
        elif location.startswith('Senate'):
            chamber = 'upper'
        elif location.startswith('Joint'):
            chamber = 'legislature'
        else:
            raise ScrapeError("Bad chamber: %s" % location)

        motion = ', '.join(header.split(', ')[2:]).strip()
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = int(
                page.xpath("string(//span[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//span[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//span[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//span[contains(@id, 'tdAbsent')])"))

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = VoteEvent(chamber=chamber,
                             start_date=date,
                             motion_text=motion,
                             result='pass' if passed else 'fail',
                             classification=type,
                             bill=bill
                             )
            # The vote page URL has a unique ID
            # However, some votes are "consent calendar" events,
            # and relate to the passage of _multiple_ bills
            # These can't be modeled yet in Pupa, but for now we can
            # append a bill ID to the URL that forms the `pupa_id`
            # https://github.com/opencivicdata/pupa/issues/308
            vote.pupa_id = '{}#{}'.format(url, bill.identifier.replace(' ', ''))

            vote.add_source(url)
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('excused', excused_count)
            vote.set_count('absent', absent_count)

            for td in page.xpath("//table[@id='tblVoteTotals']/tbody/tr/td"):
                option_or_person = td.text.strip()
                if option_or_person in ('Aye', 'Yea'):
                    vote.yes(td.getprevious().text.strip())
                elif option_or_person == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif option_or_person == 'Excused':
                    vote.vote('excused', td.getprevious().text.strip())
                elif option_or_person == 'Absent':
                    vote.vote('absent', td.getprevious().text.strip())

            yield vote
Beispiel #47
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = (
            "https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId"
        )
        form = {"rollCallId": vote_id, "sort": "", "group": "", "filter": ""}

        self.info("Fetching vote {} for {}".format(vote_id, bill.identifier))
        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page["Model"]
            vote_chamber = self.chamber_map[roll["ChamberName"]]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(
                roll["TakenAtDateTime"], "%m/%d/%y %I:%M %p"
            ).strftime("%Y-%m-%d")

            # TODO: What does this code mean?
            vote_motion = roll["RollCallVoteType"]

            vote_passed = "pass" if roll["RollCallStatus"] == "Passed" else "fail"
            other_count = (
                int(roll["NotVotingCount"])
                + int(roll["VacantVoteCount"])
                + int(roll["AbsentVoteCount"])
                + int(roll["ConflictVoteCount"])
            )
            vote = VoteEvent(
                chamber=vote_chamber,
                start_date=vote_date,
                motion_text=vote_motion,
                result=vote_passed,
                classification="other",
                bill=bill,
                legislative_session=session,
            )
            vote_pdf_url = (
                "https://legis.delaware.gov"
                "/json/RollCallController/GenerateRollCallPdf"
                "?rollCallId={}&chamberId={}".format(
                    vote_id, self.chamber_codes[vote_chamber]
                )
            )
            # Vote URL is just a generic search URL with POSTed data,
            # so provide a different link
            vote.add_source(vote_pdf_url)
            vote.pupa_id = vote_pdf_url
            vote.set_count("yes", roll["YesVoteCount"])
            vote.set_count("no", roll["NoVoteCount"])
            vote.set_count("other", other_count)

            for row in roll["AssemblyMemberVotes"]:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row["ShortName"])]
                    name = voter["DisplayName"]
                except KeyError:
                    self.warning(
                        "could not find legislator short name %s", row["ShortName"]
                    )
                    name = row["ShortName"]
                if row["SelectVoteTypeCode"] == "Y":
                    vote.yes(name)
                elif row["SelectVoteTypeCode"] == "N":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
Beispiel #48
0
    def parse_html_vote(self, bill, actor, date, motion, url, uniqid):
        try:
            page = self.get(url).text
        except scrapelib.HTTPError:
            self.warning("A vote page not found for bill {}".
                         format(bill.identifier))
            return
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)
        descr = page.xpath("//b")[0].text_content()
        if descr == '':
            # New page method
            descr = page.xpath("//div[@id='content']/center")[0].text

        if "on voice vote" in descr:
            return

        if "committee" in descr.lower():
            yield from self.scrape_committee_vote(
                bill, actor, date, motion, page, url, uniqid
            )
            return

        passed = None
        if "Passed" in descr:
            passed = True
        elif "Failed" in descr:
            passed = False
        elif "UTAH STATE LEGISLATURE" in descr:
            return
        elif descr.strip() == '-':
            return
        else:
            self.warning(descr)
            raise NotImplementedError("Can't see if we passed or failed")

        headings = page.xpath("//b")[1:]
        votes = page.xpath("//table")
        sets = zip(headings, votes)
        vdict = {}
        for (typ, votes) in sets:
            txt = typ.text_content()
            arr = [x.strip() for x in txt.split("-", 1)]
            if len(arr) != 2:
                continue
            v_txt, count = arr
            v_txt = v_txt.strip()
            count = int(count)
            people = [x.text_content().strip() for x in
                      votes.xpath(".//font[@face='Arial']")]

            vdict[v_txt] = {
                "count": count,
                "people": people
            }

        vote = Vote(chamber=actor,
                    start_date=date,
                    motion_text=motion,
                    result='pass' if passed else 'fail',
                    bill=bill,
                    classification='passage',
                    identifier=str(uniqid))
        vote.set_count('yes', vdict['Yeas']['count'])
        vote.set_count('no', vdict['Nays']['count'])
        vote.set_count('other', vdict['Absent or not voting']['count'])
        vote.add_source(url)

        for person in vdict['Yeas']['people']:
            vote.yes(person)
        for person in vdict['Nays']['people']:
            vote.no(person)
        for person in vdict['Absent or not voting']['people']:
            vote.vote('other', person)

        yield vote
Beispiel #49
0
    def scrape(self, session=None):
        HTML_TAGS_RE = r'<.*?>'

        if session is None:
            session = self.latest_session()

        year_slug = self.jurisdiction.get_year_slug(session)

        # Load all bills and resolutions via the private API
        bills_url = \
            'http://legislature.vermont.gov/bill/loadBillsReleased/{}/'.\
            format(year_slug)
        bills_json = self.get(bills_url).text
        bills = json.loads(bills_json)['data'] or []

        bills_url = \
            'http://legislature.vermont.gov/bill/loadBillsIntroduced/{}/'.\
            format(year_slug)
        bills_json = self.get(bills_url).text
        bills.extend(json.loads(bills_json)['data'] or [])

        resolutions_url = \
            'http://legislature.vermont.gov/bill/loadAllResolutionsByChamber/{}/both'.\
            format(year_slug)
        resolutions_json = self.get(resolutions_url).text
        bills.extend(json.loads(resolutions_json)['data'] or [])

        # Parse the information from each bill
        for info in bills:
            # Strip whitespace from strings
            info = {k: v.strip() for k, v in info.items()}

            # Identify the bill type and chamber
            if info['BillNumber'].startswith('J.R.H.'):
                bill_type = 'joint resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('J.R.S.'):
                bill_type = 'joint resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('H.C.R.'):
                bill_type = 'concurrent resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.C.R.'):
                bill_type = 'concurrent resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('H.R.'):
                bill_type = 'resolution'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.R.'):
                bill_type = 'resolution'
                bill_chamber = 'upper'

            elif info['BillNumber'].startswith('PR.'):
                bill_type = 'constitutional amendment'
                if info['Body'] == 'H':
                    bill_chamber = 'lower'
                elif info['Body'] == 'S':
                    bill_chamber = 'upper'
                else:
                    raise AssertionError("Amendment not tied to chamber")

            elif info['BillNumber'].startswith('H.'):
                bill_type = 'bill'
                bill_chamber = 'lower'
            elif info['BillNumber'].startswith('S.'):
                bill_type = 'bill'
                bill_chamber = 'upper'

            else:
                raise AssertionError(
                    "Unknown bill type found: '{}'".
                    format(info['BillNumber'])
                )

            bill_id = info['BillNumber'].replace('.', '').replace(' ', '')
            # put one space back in between type and number
            bill_id = re.sub(r'([a-zA-Z]+)(\d+)', r'\1 \2', bill_id)

            # Create the bill using its basic information
            bill = Bill(
                identifier=bill_id,
                legislative_session=session,
                chamber=bill_chamber,
                title=info['Title'],
                classification=bill_type
            )
            if 'resolution' in bill_type:
                bill.add_source(resolutions_url)
            else:
                bill.add_source(bills_url)

            # Load the bill's information page to access its metadata
            bill_url = 'http://legislature.vermont.gov/bill/status/{0}/{1}'.\
                format(year_slug, info['BillNumber'])
            doc = self.lxmlize(bill_url)
            bill.add_source(bill_url)

            # Capture sponsors
            sponsors = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Sponsor(s)"]/'
                'following-sibling::dd[1]/ul/li'
            )
            sponsor_type = 'primary'
            for sponsor in sponsors:
                if sponsor.xpath('span/text()') == ['Additional Sponsors']:
                    sponsor_type = 'cosponsor'
                    continue

                sponsor_name = sponsor.xpath('a/text()')[0].\
                    replace("Rep.", "").replace("Sen.", "").strip()
                if sponsor_name and not \
                        (sponsor_name[:5] == "Less" and len(sponsor_name) == 5):
                    bill.add_sponsorship(
                        name=sponsor_name,
                        classification=sponsor_type,
                        entity_type='person',
                        primary=(sponsor_type == 'primary')
                    )

            # Capture bill text versions
            # Warning: There's a TODO in VT's source code saying 'move this to where it used to be'
            # so leave in the old and new positions
            versions = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Bill/Resolution Text"]/'
                'following-sibling::dd[1]/ul/li/a |'
                '//ul[@class="bill-path"]//a'
            )

            for version in versions:
                if version.xpath('text()'):
                    bill.add_version_link(
                        note=version.xpath('text()')[0],
                        url=version.xpath('@href')[0].replace(' ', '%20'),
                        media_type='application/pdf'
                    )

            # Identify the internal bill ID, used for actions and votes
            # If there is no internal bill ID, then it has no extra information
            try:
                internal_bill_id = re.search(
                    r'"bill/loadBillDetailedStatus/.+?/(\d+)"',
                    lxml.etree.tostring(doc).decode('utf-8')
                ).group(1)
            except AttributeError:
                self.warning("Bill {} appears to have no activity".format(info['BillNumber']))
                yield bill
                continue

            # Capture actions
            actions_url = 'http://legislature.vermont.gov/bill/loadBillDetailedStatus/{0}/{1}'.\
                format(year_slug, internal_bill_id)
            actions_json = self.get(actions_url).text
            actions = json.loads(actions_json)['data']
            bill.add_source(actions_url)

            chambers_passed = set()
            for action in actions:
                action = {k: v for k, v in action.items() if v is not None}

                if "Signed by Governor" in action['FullStatus']:
                    actor = 'executive'
                elif action['ChamberCode'] == 'H':
                    actor = 'lower'
                elif action['ChamberCode'] == 'S':
                    actor = 'upper'
                else:
                    raise AssertionError("Unknown actor for bill action")

                # Categorize action
                if "Signed by Governor" in action['FullStatus']:
                    # assert chambers_passed == set("HS")
                    action_type = 'executive-signature'
                elif "Vetoed by the Governor" in action['FullStatus']:
                    action_type = 'executive-veto'
                elif "Read first time" in action['FullStatus'] \
                        or "Read 1st time" in action['FullStatus']:
                    action_type = 'introduction'
                elif "Reported favorably" in action['FullStatus']:
                    action_type = 'committee-passage-favorable'
                elif actor == 'lower' and any(x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("H")
                elif actor == 'upper' and any(x.lower().startswith(' aspassed')
                                              or x.lower().startswith('aspassed')
                                              for x in action['keywords'].split(';')):
                    action_type = 'passage'
                    chambers_passed.add("S")
                else:
                    action_type = None

                bill.add_action(
                    description=re.sub(HTML_TAGS_RE, "", action['FullStatus']),
                    date=datetime.datetime.strftime(
                        datetime.datetime.strptime(action['StatusDate'], '%m/%d/%Y'),
                        '%Y-%m-%d'
                    ),
                    chamber=actor,
                    classification=action_type
                )

            # Capture votes
            votes_url = 'http://legislature.vermont.gov/bill/loadBillRollCalls/{0}/{1}'.format(
                year_slug, internal_bill_id)
            votes_json = self.get(votes_url).text
            votes = json.loads(votes_json)['data']
            bill.add_source(votes_url)

            for vote in votes:
                roll_call_id = vote['VoteHeaderID']
                roll_call_url = ('http://legislature.vermont.gov/bill/'
                                 'loadBillRollCallDetails/{0}/{1}'.format(
                                     year_slug, roll_call_id))
                roll_call_json = self.get(roll_call_url).text
                roll_call = json.loads(roll_call_json)['data']

                roll_call_yea = []
                roll_call_nay = []
                roll_call_not_voting = []
                for member in roll_call:
                    (member_name, _district) = member['MemberName'].split(" of ")
                    member_name = member_name.strip()

                    if member['MemberVote'] == "Yea":
                        roll_call_yea.append(member_name)
                    elif member['MemberVote'] == "Nay":
                        roll_call_nay.append(member_name)
                    else:
                        roll_call_not_voting.append(member_name)

                if ("Passed -- " in vote['FullStatus'] or
                        "Veto of Governor overridden" in vote['FullStatus']):
                    did_pass = True
                elif ("Failed -- " in vote['FullStatus'] or
                      'Veto of the Governor sustained' in vote['FullStatus']):
                    did_pass = False
                else:
                    raise AssertionError("Roll call vote result is unclear")

                # Check vote counts
                yea_count = int(re.search(r'Yeas = (\d+)', vote['FullStatus']).group(1))
                nay_count = int(re.search(r'Nays = (\d+)', vote['FullStatus']).group(1))

                vote_to_add = VoteEvent(
                    bill=bill,
                    chamber=('lower' if vote['ChamberCode'] == 'H' else 'upper'),
                    start_date=datetime.datetime.strftime(
                        datetime.datetime.strptime(vote['StatusDate'], '%m/%d/%Y'),
                        '%Y-%m-%d'
                    ),
                    motion_text=re.sub(HTML_TAGS_RE, "", vote['FullStatus']).strip(),
                    result='pass' if did_pass else 'fail',
                    classification='passage',
                    legislative_session=session,
                )
                vote_to_add.add_source(roll_call_url)

                vote_to_add.set_count('yes', yea_count)
                vote_to_add.set_count('no', nay_count)
                vote_to_add.set_count('not voting', len(roll_call_not_voting))

                for member in roll_call_yea:
                    vote_to_add.yes(member)
                for member in roll_call_nay:
                    vote_to_add.no(member)
                for member in roll_call_not_voting:
                    vote_to_add.vote('not voting', member)

                yield vote_to_add

            # Capture extra information-  Not yet implemented
            # Witnesses:
            #   http://legislature.vermont.gov/bill/loadBillWitnessList/{year_slug}/{internal_bill_id}
            # Conference committee members:
            #   http://legislature.vermont.gov/bill/loadBillConference/{year_slug}/{bill_number}
            # Committee meetings:
            #   http://legislature.vermont.gov/committee/loadHistoryByBill/{year_slug}?LegislationId={internal_bill_id}

            yield bill
Beispiel #50
0
    def scrape_votes(self, bill, url):
        page = lxml.html.fromstring(self.get(url).text.replace(u"\xa0", " "))

        seen_rcs = set()

        re_ns = "http://exslt.org/regular-expressions"
        path = r"//p[re:test(text(), 'OKLAHOMA\s+(HOUSE|STATE\s+SENATE)')]"
        for header in page.xpath(path, namespaces={"re": re_ns}):
            bad_vote = False
            # Each chamber has the motion name on a different line of the file
            if "HOUSE" in header.xpath("string()"):
                chamber = "lower"
                motion_index = 8
            else:
                chamber = "upper"
                motion_index = 13

            motion = header.xpath(
                "string(following-sibling::p[%d])" % motion_index
            ).strip()
            motion = re.sub(r"\s+", " ", motion)
            if not motion.strip():
                self.warning("Motion text not found")
                return
            match = re.match(r"^(.*) (PASSED|FAILED)$", motion)
            if match:
                motion = match.group(1)
                passed = match.group(2) == "PASSED"
            else:
                passed = None

            rcs_p = header.xpath("following-sibling::p[contains(., 'RCS#')]")[0]
            rcs_line = rcs_p.xpath("string()").replace(u"\xa0", " ")
            rcs = re.search(r"RCS#\s+(\d+)", rcs_line).group(1)

            if rcs in seen_rcs:
                continue
            else:
                seen_rcs.add(rcs)

            date_line = rcs_p.getnext().xpath("string()")
            date = re.search(r"\d+/\d+/\d+", date_line).group(0)
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            vtype = None
            counts = collections.defaultdict(int)
            votes = collections.defaultdict(list)

            seen_yes = False

            for sib in header.xpath("following-sibling::p")[13:]:
                line = sib.xpath("string()").replace("\r\n", " ").strip()
                if "*****" in line:
                    break
                regex = (
                    r"(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL "
                    r"PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)(.*)"
                )
                match = re.match(regex, line)
                if match:
                    if match.group(1) == "YEAS" and "RCS#" not in line:
                        vtype = "yes"
                        seen_yes = True
                    elif match.group(1) == "NAYS" and seen_yes:
                        vtype = "no"
                    elif match.group(1) == "VACANT":
                        continue  # skip these
                    elif seen_yes:
                        vtype = "other"
                    if seen_yes and match.group(3).strip():
                        self.warning("Bad vote format, skipping.")
                        bad_vote = True
                    counts[vtype] += int(match.group(2))
                elif seen_yes:
                    for name in line.split("   "):
                        if not name:
                            continue
                        if "HOUSE" in name or "SENATE " in name:
                            continue
                        votes[vtype].append(name.strip())

            if bad_vote:
                continue

            if passed is None:
                passed = counts["yes"] > (counts["no"] + counts["other"])

            vote = Vote(
                chamber=chamber,
                start_date=date.strftime("%Y-%m-%d"),
                motion_text=motion,
                result="pass" if passed else "fail",
                bill=bill,
                classification="passage",
            )
            vote.set_count("yes", counts["yes"])
            vote.set_count("no", counts["no"])
            vote.set_count("other", counts["other"])
            vote.pupa_id = url + "#" + rcs

            vote.add_source(url)

            for name in votes["yes"]:
                vote.yes(name)
            for name in votes["no"]:
                if ":" in name:
                    raise Exception(name)
                vote.no(name)
            for name in votes["other"]:
                vote.vote("other", name)

            yield vote
Beispiel #51
0
    def scrape_votes_for_chamber(self, chamber, vote_data, bill, link):
        raw_vote_data = re.split(r"\w+? by [\w ]+?\s+-", vote_data.strip())[1:]

        motion_count = 1

        for raw_vote in raw_vote_data:
            raw_vote = raw_vote.split(u"\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")
            motion = raw_vote[0]

            vote_date = re.search(r"(\d+/\d+/\d+)", motion)
            if vote_date:
                vote_date = datetime.datetime.strptime(vote_date.group(), "%m/%d/%Y")

            passed = (
                "Passed" in motion
                or "Recommended for passage" in motion
                or "Rec. for pass" in motion
                or "Adopted" in raw_vote[1]
            )
            vote_regex = re.compile(r"\d+$")
            aye_regex = re.compile(r"^.+voting aye were: (.+) -")
            no_regex = re.compile(r"^.+voting no were: (.+) -")
            not_voting_regex = re.compile(r"^.+present and not voting were: (.+) -")
            yes_count = 0
            no_count = 0
            not_voting_count = 0
            ayes = []
            nos = []
            not_voting = []

            for v in raw_vote[1:]:
                v = v.strip()
                if v.startswith("Ayes...") and vote_regex.search(v):
                    yes_count = int(vote_regex.search(v).group())
                elif v.startswith("Noes...") and vote_regex.search(v):
                    no_count = int(vote_regex.search(v).group())
                elif v.startswith("Present and not voting...") and vote_regex.search(v):
                    not_voting_count += int(vote_regex.search(v).group())
                elif aye_regex.search(v):
                    ayes = aye_regex.search(v).groups()[0].split(", ")
                elif no_regex.search(v):
                    nos = no_regex.search(v).groups()[0].split(", ")
                elif not_voting_regex.search(v):
                    not_voting += not_voting_regex.search(v).groups()[0].split(", ")

            motion = motion.strip()
            motion = motion.replace("&AMP;", "&")  # un-escape ampersands
            if motion in self._seen_votes:
                motion = "{} ({})".format(motion, motion_count)
                motion_count += 1
            self._seen_votes.add(motion)

            vote = VoteEvent(
                motion_text=motion,
                start_date=vote_date.strftime("%Y-%m-%d") if vote_date else None,
                classification="passage",
                result="pass" if passed else "fail",
                chamber=chamber,
                bill=bill,
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", not_voting_count)
            vote.add_source(link)

            seen = set()
            for a in ayes:
                if a in seen:
                    continue
                vote.yes(a)
                seen.add(a)
            for n in nos:
                if n in seen:
                    continue
                vote.no(n)
                seen.add(n)
            for n in not_voting:
                if n in seen:
                    continue
                vote.vote("not voting", n)
                seen.add(n)

            yield vote
Beispiel #52
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = "lower"
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = "upper"
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        page = self.get(url, verify=False).text

        if "BUDGET ADDRESS" in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1)
        date = date.replace(" ", "")
        date = datetime.datetime.strptime(
            date + " " + bill.legislative_session, "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=name,
            result="pass" if yes_count > need_count else "fail",
            classification="passage",
            bill=bill,
        )
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == "VACANT":
                    continue
                name = string.capwords(name)
                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote("other", name)

        yield vote
Beispiel #53
0
    def handle_page(self):
        # Checks to see if any vote totals are provided
        if (len(
                self.doc.xpath(
                    '//span[contains(@id, "ctl00_MainContent_lblTotal")]/text()'
                )) > 0):
            (date,
             ) = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
            date = format_datetime(
                datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p"),
                "US/Eastern")
            # ctl00_MainContent_lblTotal //span[contains(@id, "ctl00_MainContent_lblTotal")]
            yes_count = int(
                self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
            no_count = int(
                self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
            other_count = int(
                self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
            result = "pass" if yes_count > no_count else "fail"

            (committee,
             ) = self.doc.xpath('//span[contains(@id, "lblCommittee")]/text()')
            (action,
             ) = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
            motion = "{} ({})".format(action, committee)

            vote = VoteEvent(
                start_date=date,
                bill=self.kwargs["bill"],
                chamber="lower",
                motion_text=motion,
                result=result,
                classification="committee",
            )
            vote.add_source(self.url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", other_count)

            for member_vote in self.doc.xpath(
                    '//ul[contains(@class, "vote-list")]/li'):
                if not member_vote.text_content().strip():
                    continue

                (member, ) = member_vote.xpath("span[2]//text()")
                (member_vote, ) = member_vote.xpath("span[1]//text()")

                if member_vote == "Y":
                    vote.yes(member)
                elif member_vote == "N":
                    vote.no(member)
                elif member_vote == "-":
                    vote.vote("not voting", member)
                # Parenthetical votes appear to not be counted in the
                # totals for Yea, Nay, _or_ Missed
                elif re.search(r"\([YN]\)", member_vote):
                    continue
                else:
                    raise ValueError(
                        "Unknown vote type found: {}".format(member_vote))

            yield vote
Beispiel #54
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = 'lower'
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = 'upper'
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if 'BUDGET ADDRESS' in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1))

        other_count = page.xpath(
            "string(//span[contains(., 'Those absent')])")
        other_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1))

        need_count = page.xpath(
            "string(//span[contains(., 'Necessary for')])")
        need_count = int(
            re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1)
        date = date.replace(' ', '')
        date = datetime.datetime.strptime(date + " " + bill.legislative_session,
                                          "%m/%d %Y").date()

        # not sure about classification.
        vote = Vote(chamber=vote_chamber,
                    start_date=date,
                    motion_text=name,
                    result='pass' if yes_count > need_count else 'fail',
                    classification='passage',
                    bill=bill
                    )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (
                    i + name_offset)).strip()

                if not name or name == 'VACANT':
                    continue

                if "Y" in row.xpath("string(td[%d])" %
                                    (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" %
                                      (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote('other', name)

        yield vote
Beispiel #55
0
    def scrape_votes_for_chamber(self, chamber, vote_data, bill, link):
        raw_vote_data = re.split(r'\w+? by [\w ]+?\s+-', vote_data.strip())[1:]

        motion_count = 1

        for raw_vote in raw_vote_data:
            raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
            motion = raw_vote[0]

            vote_date = re.search(r'(\d+/\d+/\d+)', motion)
            if vote_date:
                vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y')

            passed = (
                'Passed' in motion or
                'Recommended for passage' in motion or
                'Rec. for pass' in motion or
                'Adopted' in raw_vote[1]
            )
            vote_regex = re.compile(r'\d+$')
            aye_regex = re.compile(r'^.+voting aye were: (.+) -')
            no_regex = re.compile(r'^.+voting no were: (.+) -')
            not_voting_regex = re.compile(r'^.+present and not voting were: (.+) -')
            yes_count = 0
            no_count = 0
            not_voting_count = 0
            ayes = []
            nos = []
            not_voting = []

            for v in raw_vote[1:]:
                v = v.strip()
                if v.startswith('Ayes...') and vote_regex.search(v):
                    yes_count = int(vote_regex.search(v).group())
                elif v.startswith('Noes...') and vote_regex.search(v):
                    no_count = int(vote_regex.search(v).group())
                elif v.startswith('Present and not voting...') and vote_regex.search(v):
                    not_voting_count += int(vote_regex.search(v).group())
                elif aye_regex.search(v):
                    ayes = aye_regex.search(v).groups()[0].split(', ')
                elif no_regex.search(v):
                    nos = no_regex.search(v).groups()[0].split(', ')
                elif not_voting_regex.search(v):
                    not_voting += not_voting_regex.search(v).groups()[0].split(', ')

            motion = motion.strip()
            motion = motion.replace('&AMP;', '&')  # un-escape ampersands
            if motion in self._seen_votes:
                motion = '{} ({})'.format(motion, motion_count)
                motion_count += 1
            self._seen_votes.add(motion)

            vote = VoteEvent(
                motion_text=motion,
                start_date=vote_date.strftime('%Y-%m-%d') if vote_date else None,
                classification='passage',
                result='pass' if passed else 'fail',
                chamber=chamber,
                bill=bill,
            )
            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('not voting', not_voting_count)
            vote.add_source(link)

            seen = set()
            for a in ayes:
                if a in seen:
                    continue
                vote.yes(a)
                seen.add(a)
            for n in nos:
                if n in seen:
                    continue
                vote.no(n)
                seen.add(n)
            for n in not_voting:
                if n in seen:
                    continue
                vote.vote('not voting', n)
                seen.add(n)

            yield vote