Ejemplo n.º 1
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = VoteEvent(
            chamber="upper",
            start_date=date.strftime("%Y-%m-%d"),
            motion_text="Passage",
            # setting 'fail' for now.
            result="fail",
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.pupa_id = url

        text = convert_pdf(filename, "text").decode("utf-8")
        os.remove(filename)

        if re.search(r"Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+", text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url, date)
            return

        data = re.split(r"(Yea|Nay|Absent)s?:", text)[::-1]
        data = filter(None, data)
        keymap = dict(yea="yes", nay="no")
        actual_vote = collections.defaultdict(int)
        vote_count = {"yes": 0, "no": 0, "other": 0}
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), "other")
            values = data.pop()
            for name in re.split(r"(?:[\s,]+and\s|[\s,]{2,})", values):
                if name.lower().strip() == "none.":
                    continue
                name = name.replace("..", "")
                name = re.sub(r"\.$", "", name)
                name = name.strip("-1234567890 \n")
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = (
            "pass"
            if vote_count["yes"] > (vote_count["no"] + vote_count["other"])
            else "fail"
        )

        yield vote
Ejemplo n.º 2
0
    def get_vote_event(self, bill, act, votes, result):
        '''Make VoteEvent object from given Bill, action, votes and result.'''
        organization = json.loads(act['organization_id'].lstrip('~'))
        vote_event = VoteEvent(legislative_session=bill.legislative_session,
                               motion_text=act['description'],
                               organization=organization,
                               classification=None,
                               start_date=act['date'],
                               result=result,
                               bill=bill)

        legistar_web, legistar_api = [src['url'] for src in bill.sources]

        vote_event.add_source(legistar_web)
        vote_event.add_source(legistar_api + '/histories')

        for vote in votes:
            raw_option = vote['VoteValueName'].lower()

            if raw_option == 'suspended':
                continue

            clean_option = self.VOTE_OPTIONS.get(raw_option, raw_option)
            vote_event.vote(clean_option, vote['VotePersonName'].strip())

        return vote_event
Ejemplo n.º 3
0
    def parse_vote_page(self, vote_url, bill):
        vote_html = self.get(vote_url).text
        doc = lxml.html.fromstring(vote_html)
        # chamber
        if "senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        # date in the following format: Mar 23, 2009
        date = doc.xpath('//td[starts-with(text(), "Legislative")]')[0].text
        date = date.replace(u"\xa0", " ")
        date = datetime.datetime.strptime(date[18:], "%b %d, %Y")

        # motion
        motion = "".join(x.text_content() for x in doc.xpath('//td[@colspan="23"]'))
        if motion == "":
            motion = "No motion given"  # XXX: Double check this. See SJ 3.
        motion = motion.replace(u"\xa0", " ")

        # totals
        tot_class = doc.xpath('//td[contains(text(), "Yeas")]')[0].get("class")
        totals = doc.xpath('//td[@class="%s"]/text()' % tot_class)[1:]
        yes_count = int(totals[0].split()[-1])
        no_count = int(totals[1].split()[-1])
        other_count = int(totals[2].split()[-1])
        other_count += int(totals[3].split()[-1])
        other_count += int(totals[4].split()[-1])
        passed = yes_count > no_count

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )
        vote.pupa_id = vote_url  # contains sequence number
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        # go through, find Voting Yea/Voting Nay/etc. and next tds are voters
        func = None
        for td in doc.xpath("//td/text()"):
            td = td.replace(u"\xa0", " ")
            if td.startswith("Voting Yea"):
                func = vote.yes
            elif td.startswith("Voting Nay"):
                func = vote.no
            elif td.startswith("Not Voting"):
                func = vote.other
            elif td.startswith("Excused"):
                func = vote.other
            elif func:
                td = td.rstrip("*")
                func(td)

        return vote
Ejemplo n.º 4
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r'Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)',
                           text)
        yes, no = int(votes[0][0]), int(votes[0][1])

        vtype = 'other'
        for regex, type in motion_classifiers.items():
            if re.match(regex, text):
                vtype = type
                break

        v = VoteEvent(
            chamber=chamber,
            start_date=TIMEZONE.localize(date),
            motion_text=text,
            result='pass' if yes > no else 'fail',
            classification=vtype,
            bill=bill,
        )
        v.set_count('yes', yes)
        v.set_count('no', no)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if 'av' in url:
                self.add_house_votes(v, url)
            elif 'sv' in url:
                self.add_senate_votes(v, url)

        return v
Ejemplo n.º 5
0
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
    # When they vote in a substitute they mark it as XHB
    bill_id = bill_id.replace("XHB", "HB")
    passed = len(vote_record["yes"]) > len(vote_record["no"])
    vote_event = VoteEvent(
        result="pass" if passed else "fail",
        chamber=chamber,
        start_date=vote_record["date"].strftime("%Y-%m-%d"),
        motion_text=motion_text,
        classification="passage",
        legislative_session=session,
        bill=bill_id,
        bill_chamber="upper" if bill_id[0] == "S" else "lower",
    )
    vote_event.pupa_id = url
    vote_event.set_count("yes", len(vote_record["yes"]))
    vote_event.set_count("no", len(vote_record["no"]))
    vote_event.set_count("excused", len(vote_record["excused"]))
    vote_event.set_count("absent", len(vote_record["absent"]))
    vote_event.set_count("other", len(vote_record["other"]))
    for vote_type in ["yes", "no", "excused", "absent", "other"]:
        for voter in vote_record[vote_type]:
            vote_event.vote(vote_type, voter)

    vote_event.add_source(url)
    return vote_event
Ejemplo n.º 6
0
    def scrape_vote(self, bill, motion, url):
        page = self.get(url, retry_on_404=True).text
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = VoteEvent(
            chamber=chamber,
            start_date=date.strftime('%Y-%m-%d'),
            motion_text=motion,
            result='pass' if outcome == 'PREVAILS' else 'fail',
            classification='passage',
            bill=bill,
        )
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.vote('yes', name)
            elif vtype == 'N':
                vote.vote('no', name)
            elif vtype == 'X' or vtype == 'E':
                vote.vote('other', name)

        yield vote
Ejemplo n.º 7
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r"Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)",
                           text)
        yes, no = int(votes[0][0]), int(votes[0][1])

        vtype = "other"
        for regex, type in motion_classifiers.items():
            if re.match(regex, text):
                vtype = type
                break

        v = VoteEvent(
            chamber=chamber,
            start_date=TIMEZONE.localize(date),
            motion_text=text,
            result="pass" if yes > no else "fail",
            classification=vtype,
            bill=bill,
        )
        v.pupa_id = url.split("/")[-1]
        v.set_count("yes", yes)
        v.set_count("no", no)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if "av" in url:
                self.add_house_votes(v, url)
            elif "sv" in url:
                self.add_senate_votes(v, url)

        return v
Ejemplo n.º 8
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session,
                                 url, bill_chamber):
        for action in action_table.xpath('*')[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime('%Y-%m-%d')
            actor = action[1].text_content()
            string = action[2].text_content()
            actor = {
                "S": "upper",
                "H": "lower",
                "D": "Data Systems",
                "$": "Appropriation measure",
                "ConAm": "Constitutional Amendment"
            }[actor]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]['name']
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string,
                                  date,
                                  chamber=actor,
                                  classification=act_type)
            for committee in real_committees:
                act.add_related_entity(name=committee,
                                       entity_type="organization")
            vote = self.parse_vote(string)
            if vote:
                v, motion = vote
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    bill=bill_id,
                    bill_chamber=bill_chamber,
                    legislative_session=session,
                    motion_text=motion,
                    result='pass' if 'passed' in string.lower() else 'fail',
                    classification='passage')
                vote.add_source(url)
                vote.set_count('yes', int(v['n_yes'] or 0))
                vote.set_count('no', int(v['n_no'] or 0))
                vote.set_count('not voting', int(v['n_excused'] or 0))
                for voter in split_specific_votes(v['yes']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['yes_resv']):
                    vote.yes(voter)
                for voter in split_specific_votes(v['no']):
                    vote.no(voter)
                for voter in split_specific_votes(v['excused']):
                    vote.vote('not voting', voter)

                yield vote
Ejemplo n.º 9
0
def record_votes(root, session, chamber):
    for el in root.xpath('//div{}'.format(''.join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(chamber=chamber,
                      start_date=None,
                      motion_text='passage' if mv.passed else 'other',
                      result='pass' if mv.passed else 'fail',
                      classification='passage' if mv.passed else 'other',
                      legislative_session=session[0:2],
                      bill=mv.bill_id,
                      bill_chamber=mv.chamber)

        v.set_count('yes', mv.yeas or 0)
        v.set_count('no', mv.nays or 0)
        v.set_count('not voting', mv.present or 0)

        for each in mv.votes['yeas']:
            v.yes(each)
        for each in mv.votes['nays']:
            v.no(each)
        for each in mv.votes['present']:
            v.vote('not voting', each)
        for each in mv.votes['absent']:
            v.vote('absent', each)

        yield v
Ejemplo n.º 10
0
 def scrape_votes(self, bill, bill_page, chamber):
     vote_links = bill_page.xpath(
         '//div[contains(@class, "col-sm-8")]//a[contains(@href, "view_votes")]')
     for vote_link in vote_links:
         vote_url = vote_link.attrib['href']
         date_td, motion_td, *_ = vote_link.xpath('ancestor::tr/td')
         date = datetime.strptime(date_td.text, '%b %d, %Y')
         motion_text = motion_td.text_content()
         vote_page = self.lxmlize(vote_url)
         passed = (
             'Passed' in motion_text or
             'Advanced' in motion_text
         )
         cells = vote_page.xpath('//table[contains(@class, "calendar-table")]//td')
         vote = VoteEvent(
             bill=bill,
             chamber=chamber,
             start_date=TIMEZONE.localize(date),
             motion_text=motion_text,
             classification='passage',
             result='pass' if passed else 'fail',
         )
         query_params = urllib.parse.parse_qs(urllib.parse.urlparse(vote_url).query)
         vote.pupa_id = query_params['KeyID'][0]
         vote.add_source(vote_url)
         for chunk in range(0, len(cells), 2):
             name = cells[chunk].text
             vote_type = cells[chunk + 1].text
             if name and vote_type:
                 vote.vote(VOTE_TYPE_MAP.get(vote_type.lower(), 'other'), name)
         yield vote
Ejemplo n.º 11
0
def record_votes(root, session, chamber):
    for el in root.xpath("//div{}".format("".join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else "other",
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", mv.yeas or 0)
        v.set_count("no", mv.nays or 0)
        v.set_count("not voting", mv.present or 0)

        for each in mv.votes["yeas"]:
            v.yes(each)
        for each in mv.votes["nays"]:
            v.no(each)
        for each in mv.votes["present"]:
            v.vote("not voting", each)
        for each in mv.votes["absent"]:
            v.vote("absent", each)

        yield v
Ejemplo n.º 12
0
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
    # When they vote in a substitute they mark it as XHB
    bill_id = bill_id.replace('XHB', 'HB')
    passed = len(vote_record['yes']) > len(vote_record['no'])
    vote_event = VoteEvent(
        result='pass' if passed else 'fail',
        chamber=chamber,
        start_date=vote_record['date'].strftime('%Y-%m-%d'),
        motion_text=motion_text,
        classification='passage',
        legislative_session=session,
        bill=bill_id,
        bill_chamber='upper' if bill_id[0] == 'S' else 'lower')
    vote_event.pupa_id = url
    vote_event.set_count('yes', len(vote_record['yes']))
    vote_event.set_count('no', len(vote_record['no']))
    vote_event.set_count('excused', len(vote_record['excused']))
    vote_event.set_count('absent', len(vote_record['absent']))
    vote_event.set_count('other', len(vote_record['other']))
    for vote_type in ['yes', 'no', 'excused', 'absent', 'other']:
        for voter in vote_record[vote_type]:
            vote_event.vote(vote_type, voter)

    vote_event.add_source(url)
    return vote_event
Ejemplo n.º 13
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" +
                    session + "_" + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(
                datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == "House":
                chamber = "lower"
            elif chamber == "Senate":
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if yes_count > no_count else "fail",
                bill=bill,
                classification="passed",
            )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
Ejemplo n.º 14
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data["voteDate"],
                                                   "%Y-%m-%d")
        if vote_data["voteType"] == "FLOOR":
            motion = "Floor Vote"
        elif vote_data["voteType"] == "COMMITTEE":
            motion = "{} Vote".format(vote_data["committee"]["name"])
        else:
            raise ValueError("Unknown vote type encountered.")

        if vote_data["version"]:
            motion += " - Version: " + vote_data["version"]

        vote = VoteEvent(
            chamber="upper",
            start_date=vote_datetime.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="fail",
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data["memberVotes"]["items"]

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if "items" in vote_rolls.get("AYE", {}):
            for legislator in vote_rolls["AYE"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        if "items" in vote_rolls.get("AYEWR", {}):
            for legislator in vote_rolls["AYEWR"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        # Count all nay votes.
        if "items" in vote_rolls.get("NAY", {}):
            for legislator in vote_rolls["NAY"]["items"]:
                vote.no(legislator["fullName"])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ("EXC", "ABS", "ABD")
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]["items"]:
                    vote.vote("other", legislator["fullName"])
                    other_count += 1

        vote.result = "pass" if yes_count > no_count else "fail"
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        return vote
Ejemplo n.º 15
0
def test_vote_event_org_chamber():
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage',
                   chamber='upper')
    assert get_pseudo_id(ve.organization) == {'classification': 'upper'}
Ejemplo n.º 16
0
def toy_vote_event():
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage')
    ve.add_source("http://uri.example.com/", note="foo")
    return ve
    def scrape_vote(self, bill, date, motion, url):
        try:
            page = self.get(url).text
            if 'not yet official' in page:
                # Sometimes they link to vote pages before they go live
                pass

            else:
                page = lxml.html.fromstring(page)

                if url.endswith('Senate'):
                    actor = 'upper'
                else:
                    actor = 'lower'

                votevals = ['yes', 'no', 'not voting', 'other']
                count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
                yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
                no_count = int(page.xpath(count_path % "Nays").split()[-1])
                not_voting_count = int(
                    page.xpath(count_path % "Non Voting").split()[-1])
                other_count = int(
                    page.xpath(count_path % "Present").split()[-1])
                passed = yes_count > no_count + not_voting_count + other_count
                vote = VoteEvent(start_date='2017-03-04',
                                 motion_text=motion,
                                 result='pass' if passed else 'fail',
                                 classification='passage',
                                 chamber=actor,
                                 bill=bill)
                try:
                    excused_count = int(
                        page.xpath(count_path % "Excused").split()[-1])
                    vote.set_count('excused', excused_count)
                    votevals.append('excused')
                except:
                    pass
                vote.set_count('yes', yes_count)
                vote.set_count('no', no_count)
                vote.set_count('not voting', not_voting_count)
                vote.set_count('other', other_count)
                vote.add_source(url)

                xpath = ('//*[contains(@class, "ms-standardheader")]/'
                         'following-sibling::table')
                divs = page.xpath(xpath)

                for (voteval, div) in zip(votevals, divs):
                    for a in div.xpath('.//a'):
                        name = a.text_content().strip()
                        if not name:
                            continue
                        else:
                            vote.vote(voteval, name)
                yield vote
        except:
            # sometiems the link is there but is dead
            pass
Ejemplo n.º 18
0
    def parse_roll_call(self, bill, link, chamber, date):
        url = link.attrib['href']
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        xpath = 'string(//div[@class="Column-OneFourth"]/div[3])'
        motion = page.xpath(xpath).strip()
        motion = re.sub(r'\s+', ' ', motion)

        if motion == 'FP':
            motion = 'FINAL PASSAGE'

        if motion == 'FINAL PASSAGE':
            type = 'passage'
        elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
            type = 'amendment'
        else:
            type = 'other'
            motion = link.text_content()

        yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
        nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
        lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
        nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
        other = lve + nv

        vote = VoteEvent(
            chamber=chamber,
            start_date=tz.localize(date),
            motion_text=motion,
            classification=type,
            result='pass' if yeas > (nays + other) else 'fail',
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.set_count('other', other)

        for div in page.xpath('//*[contains(@class, "RollCalls-Vote")]'):
            name = div.text_content().strip()
            name = re.sub(r'^[\s,]+', '', name)
            name = re.sub(r'[\s,]+$', '', name)
            class_attr = div.attrib['class'].lower()
            if 'yea' in class_attr:
                voteval = 'yes'
            elif 'nay' in class_attr:
                voteval = 'no'
            elif 'nvote' in class_attr:
                voteval = 'other'
            elif 'lve' in class_attr:
                voteval = 'other'
            else:
                msg = 'Unrecognized vote val: %s' % class_attr
                raise Exception(msg)
            vote.vote(voteval, name)

        return vote
Ejemplo n.º 19
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId'
        form = {
            'rollCallId': vote_id,
            'sort': '',
            'group': '',
            'filter': '',
        }

        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page['Model']
            vote_chamber = self.chamber_map[roll['ChamberName']]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(
                roll['TakenAtDateTime'],
                '%m/%d/%y %I:%M %p').strftime('%Y-%m-%d')

            # TODO: What does this code mean?
            vote_motion = roll['RollCallVoteType']

            vote_passed = 'pass' if roll[
                'RollCallStatus'] == 'Passed' else 'fail'
            other_count = (int(roll['NotVotingCount']) +
                           int(roll['VacantVoteCount']) +
                           int(roll['AbsentVoteCount']) +
                           int(roll['ConflictVoteCount']))
            vote = VoteEvent(chamber=vote_chamber,
                             start_date=vote_date,
                             motion_text=vote_motion,
                             result=vote_passed,
                             classification='other',
                             bill=bill,
                             legislative_session=session)
            vote.add_source(vote_url)
            vote.set_count('yes', roll['YesVoteCount'])
            vote.set_count('no', roll['NoVoteCount'])
            vote.set_count('other', other_count)

            for row in roll['AssemblyMemberVotes']:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row['ShortName'])]
                    name = voter['DisplayName']
                except KeyError:
                    self.warning('could not find legislator short name %s',
                                 row['ShortName'])
                    name = row['ShortName']
                if row['SelectVoteTypeCode'] == 'Y':
                    vote.yes(name)
                elif row['SelectVoteTypeCode'] == 'N':
                    vote.no(name)
                else:
                    vote.vote('other', name)

            # bill.add_vote_event(vote)
            yield vote
Ejemplo n.º 20
0
def test_vote_event_org_obj():
    o = Organization('something', classification='committee')
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage',
                   organization=o)
    assert ve.organization == o._id
Ejemplo n.º 21
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        resp = self.get(vote_url)
        html = resp.text

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if resp.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return

        vote_count = doc.xpath(
            ".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y').date()
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" %
                                motion)
            return

        vote = VoteEvent(chamber='lower',
                         start_date=date,
                         motion_text=motion,
                         result='pass' if yeas > nays else 'fail',
                         classification='passage',
                         legislative_session=session,
                         bill=bill_id,
                         bill_chamber=chamber)
        vote.set_count('yes', yeas)
        vote.set_count('no', nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        yield vote
Ejemplo n.º 22
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json['amendmentNumber']:
            motion = '{}: {}'.format(
                vote_json['amendmentNumber'], vote_json['action'])
        else:
            motion = vote_json['action']

        result = 'pass' if vote_json['yesVotesCount'] > vote_json['noVotesCount'] else 'fail'

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json['chamber']],
            start_date=self.parse_local_date(vote_json['voteDate']),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification='other',
        )

        v.set_count(option='yes', value=vote_json['yesVotesCount'])
        v.set_count('no', vote_json['noVotesCount'])
        v.set_count('absent', vote_json['absentVotesCount'])
        v.set_count('excused', vote_json['excusedVotesCount'])
        v.set_count('other', vote_json['conflictVotesCount'])

        for name in vote_json['yesVotes'].split(','):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json['noVotes'].split(','):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json['absentVotes'].split(','):
            if name.strip():
                v.vote(option="absent",
                       voter=name)

        for name in vote_json['excusedVotes'].split(','):
            if name.strip():
                v.vote(option="excused",
                       voter=name)

        for name in vote_json['conflictVotes'].split(','):
            if name.strip():
                v.vote(option="other",
                       voter=name)

        source_url = 'http://lso.wyoleg.gov/Legislation/{}/{}'.format(
            session, vote_json['billNumber'])
        v.add_source(source_url)

        yield v
Ejemplo n.º 23
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = VoteEvent(
            chamber='upper',
            start_date=date.strftime("%Y-%m-%d"),
            motion_text='Passage',
            # setting 'fail' for now.
            result='fail',
            classification='passage',
            bill=bill)
        vote.add_source(url)
        vote.pupa_id = url

        text = convert_pdf(filename, 'text').decode('utf-8')
        os.remove(filename)

        if re.search(r'Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+', text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url,
                                                    date)
            return

        data = re.split(r'(Yea|Nay|Absent)s?:', text)[::-1]
        data = filter(None, data)
        keymap = dict(yea='yes', nay='no')
        actual_vote = collections.defaultdict(int)
        vote_count = {'yes': 0, 'no': 0, 'other': 0}
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), 'other')
            values = data.pop()
            for name in re.split(r'(?:[\s,]+and\s|[\s,]{2,})', values):
                if name.lower().strip() == 'none.':
                    continue
                name = name.replace('..', '')
                name = re.sub(r'\.$', '', name)
                name = name.strip('-1234567890 \n')
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = 'pass' if vote_count['yes'] > (
            vote_count['no'] + vote_count['other']) else 'fail'

        yield vote
    def handle_page(self):
        (date, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblDate"]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        totals = self.doc.xpath('//table//table')[-1].text_content()
        totals = re.sub(r'(?mu)\s+', " ", totals).strip()
        (yes_count, no_count, other_count) = [
            int(x) for x in re.search(
                r'(?m)Total Yeas:\s+(\d+)\s+Total Nays:\s+(\d+)\s+'
                'Total Missed:\s+(\d+)', totals).groups()
        ]
        result = 'pass' if yes_count > no_count else 'fail'

        (committee, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblCommittee"]/text()')
        (action, ) = self.doc.xpath(
            '//span[@id="ctl00_ContentPlaceHolder1_lblAction"]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath('//table//table//table//td'):
            if not member_vote.text_content().strip():
                continue

            (member, ) = member_vote.xpath('span[2]//text()')
            (member_vote, ) = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote
Ejemplo n.º 25
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data['voteDate'],
                                                   '%Y-%m-%d')

        if vote_data['voteType'] == 'FLOOR':
            motion = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            motion = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote = VoteEvent(
            chamber='upper',
            start_date=vote_datetime.strftime('%Y-%m-%d'),
            motion_text=motion,
            classification='passage',
            result='fail',
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data['memberVotes']['items']

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                yes_count += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.vote('other', legislator['fullName'])
                    other_count += 1

        vote.result = 'pass' if yes_count > no_count else 'fail'
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('other', other_count)

        return vote
Ejemplo n.º 26
0
    def parse_vote(self, bill, link):
        member_doc = lxml.html.fromstring(self.get(link).text)
        motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
        opinions = member_doc.xpath("//div[@id='main_content']/h3/text()")
        if len(opinions) > 0:
            temp = opinions[0].split()
            vote_chamber = temp[0]
            vote_date = datetime.datetime.strptime(temp[-1], '%m/%d/%Y')
            vote_status = " ".join(temp[2:-2])
            vote_status = vote_status if vote_status.strip() else motion[0]
            vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'

            for i in opinions:
                try:
                    count = int(i[i.find("(") + 1:i.find(")")])
                except ValueError:
                    # This is likely not a vote-count text chunk
                    # It's probably '`On roll call the vote was:`
                    pass
                else:
                    if "yea" in i.lower():
                        yes_count = count
                    elif "nay" in i.lower():
                        no_count = count
                    elif "present" in i.lower():
                        p_count = count
                    elif "absent" in i.lower():
                        a_count = count

            vote = VoteEvent(
                bill=bill,
                start_date=vote_date.strftime('%Y-%m-%d'),
                chamber=vote_chamber,
                motion_text=vote_status,
                result='pass' if yes_count > no_count else 'fail',
                classification='passage',
            )
            vote.pupa_id = link

            vote.set_count('yes', yes_count)
            vote.set_count('no', no_count)
            vote.set_count('abstain', p_count)
            vote.set_count('absent', a_count)

            vote.add_source(link)

            a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
            for i in range(1, len(a_links)):
                if i <= yes_count:
                    vote.vote('yes', re.sub(',', '', a_links[i]).split()[0])
                elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
                    vote.vote('no', re.sub(',', '', a_links[i]).split()[0])
                else:
                    vote.vote('other', re.sub(',', '', a_links[i]).split()[0])
            yield vote
        else:
            self.warning("No Votes for: %s", link)
Ejemplo n.º 27
0
def test_org_and_chamber_conflict():
    with pytest.raises(ValueError):
        VoteEvent(legislative_session="2009",
                  motion_text="passage of the bill",
                  start_date="2009-01-07",
                  result='pass',
                  classification='passage',
                  organization='test',
                  chamber='lower')
Ejemplo n.º 28
0
def test_vote_event_org_dict():
    odict = {'name': 'Random Committee', 'classification': 'committee'}
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage',
                   organization=odict)
    assert get_pseudo_id(ve.organization) == odict
Ejemplo n.º 29
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json["amendmentNumber"]:
            motion = "{}: {}".format(vote_json["amendmentNumber"],
                                     vote_json["action"])
        else:
            motion = vote_json["action"]

        result = ("pass"
                  if vote_json["yesVotesCount"] > vote_json["noVotesCount"]
                  else "fail")

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json["chamber"]],
            start_date=self.parse_local_date(vote_json["voteDate"]),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification="other",
        )

        v.set_count(option="yes", value=vote_json["yesVotesCount"])
        v.set_count("no", vote_json["noVotesCount"])
        v.set_count("absent", vote_json["absentVotesCount"])
        v.set_count("excused", vote_json["excusedVotesCount"])
        v.set_count("other", vote_json["conflictVotesCount"])

        for name in vote_json["yesVotes"].split(","):
            if name.strip():
                v.yes(name.strip())

        for name in vote_json["noVotes"].split(","):
            if name.strip():
                v.no(name.strip())

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json["absentVotes"].split(","):
            if name.strip():
                v.vote(option="absent", voter=name)

        for name in vote_json["excusedVotes"].split(","):
            if name.strip():
                v.vote(option="excused", voter=name)

        for name in vote_json["conflictVotes"].split(","):
            if name.strip():
                v.vote(option="other", voter=name)

        source_url = "http://lso.wyoleg.gov/Legislation/{}/{}".format(
            session, vote_json["billNumber"])
        v.add_source(source_url)

        yield v
Ejemplo n.º 30
0
    def handle_page(self):
        date, = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
        date = format_datetime(
            datetime.datetime.strptime(date, '%m/%d/%Y %I:%M:%S %p'),
            'US/Eastern')

        yes_count = int(
            self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
        no_count = int(
            self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
        other_count = int(
            self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
        result = 'pass' if yes_count > no_count else 'fail'

        committee, = self.doc.xpath(
            '//span[contains(@id, "lblCommittee")]/text()')
        action, = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
        motion = "{} ({})".format(action, committee)

        vote = VoteEvent(
            start_date=date,
            bill=self.kwargs['bill'],
            chamber='lower',
            motion_text=motion,
            result=result,
            classification='committee',
        )
        vote.add_source(self.url)
        vote.set_count('yes', yes_count)
        vote.set_count('no', no_count)
        vote.set_count('not voting', other_count)

        for member_vote in self.doc.xpath(
                '//ul[contains(@class, "vote-list")]/li'):
            if not member_vote.text_content().strip():
                continue

            member, = member_vote.xpath('span[2]//text()')
            member_vote, = member_vote.xpath('span[1]//text()')

            if member_vote == "Y":
                vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote('not voting', member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r'\([YN]\)', member_vote):
                continue
            else:
                raise ValueError(
                    "Unknown vote type found: {}".format(member_vote))

        yield vote