Exemple #1
0
    def parse_senate_vote(self, sv_text, url):
        """Sets any overrides and creates the vote instance"""
        overrides = {"ONEILL": "O'NEILL"}
        # Add new columns as they appear to be safe
        vote = Vote('upper', '?', 'senate passage', False, 0, 0, 0)
        vote.add_source(url)
        vote, rowHeads, saneRow = self.parse_visual_grid(vote, sv_text, overrides, sVoteHeader, rDate, 'TOTAL', 'TOTAL')

        # Sanity checks on vote data, checks that the calculated total and listed totals match
        sane={'yes': 0, 'no': 0, 'other':0}
        # Make sure the header row and sanity row are in orde
        sorted_rh = sorted(rowHeads.items(), key=operator.itemgetter(0))
        startCount=-1
        for cell in saneRow:
            if startCount >= 0:
                saneVote = sorted_rh[startCount][1]
                if 'Y' == saneVote[0]:
                    sane['yes'] = int(cell[0])
                elif 'N' == saneVote[0]:
                    sane['no'] = int(cell[0])
                else:
                    sane['other'] += int(cell[0])
                startCount += 1
            elif 'TOTAL' in cell[0]:
                startCount = 0
        # Make sure the parsed vote totals match up with counts in the total field
        if sane['yes'] != vote['yes_count'] or sane['no'] != vote['no_count'] or\
           sane['other'] != vote['other_count']:
                raise ValueError("Votes were not parsed correctly")
        # Make sure the date is a date
        if not isinstance(vote['date'], datetime):
                raise ValueError("Date was not parsed correctly")
        # End Sanity Check
        return vote
Exemple #2
0
    def vote(self):
        """Return a billy vote.
        """
        actual_vote_dict = collections.defaultdict(list)
        date = self.date()
        motion = self.motion()
        passed = self.passed()
        counts = self.get_counts()
        yes_count = int(counts.get("Yeas", 0))
        no_count = int(counts.get("Nays", 0))
        vote = Vote(
            self.chamber,
            date,
            motion,
            passed,
            yes_count,
            no_count,
            sum(map(int, counts.values())) - (yes_count + no_count),
            actual_vote=dict(actual_vote_dict),
        )

        for vote_val, voter in self.vote_values():
            getattr(vote, vote_val)(voter)
        vote.add_source(self.url)
        return vote
Exemple #3
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = "http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s"
        url = base_url % (vote_type_id, bill["bill_id"])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id("VoteDate").text)

            # check if voice vote / approved boxes have an 'x'
            voice = doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == "x"
            passed = doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == "x"

            yes_count = extract_int(doc.xpath('//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(doc.xpath('//span[@id="VoteCount2"]/b/text()')[0])
            # every now and then this actually drops below 0 (error in count)
            other_count = max(13 - (yes_count + no_count), 0)

            vote = Vote("upper", vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath("//u"):
                member = member_u.text
                vote_text = member_u.xpath("../../i/text()")[0]
                if "Yes" in vote_text:
                    vote.yes(member)
                elif "No" in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Exemple #4
0
    def scrape_senate_vote(self, bill, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, "text")
        os.remove(path)

        lines = text.split("\n")

        date_match = re.search(r"Date:\s+(\d+/\d+/\d+)", text)
        if not date_match:
            self.log("Couldn't find date on %s" % url)
            return

        time_match = re.search(r"Time:\s+(\d+:\d+:\d+)\s+(AM|PM)", text)
        date = "%s %s %s" % (date_match.group(1), time_match.group(1), time_match.group(2))
        date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p")
        date = self._tz.localize(date)

        vote_type = None
        yes_count, no_count, other_count = None, None, 0
        votes = []
        for line in lines[21:]:
            line = line.strip()
            if not line:
                continue

            if line.startswith("YEAS"):
                yes_count = int(line.split(" - ")[1])
                vote_type = "yes"
            elif line.startswith("NAYS"):
                no_count = int(line.split(" - ")[1])
                vote_type = "no"
            elif line.startswith("EXCUSED") or line.startswith("NOT VOTING"):
                other_count += int(line.split(" - ")[1])
                vote_type = "other"
            else:
                votes.extend([(n.strip(), vote_type) for n in re.split(r"\s{2,}", line)])

        if yes_count is None or no_count is None:
            self.log("Couldne't find vote counts in %s" % url)
            return

        passed = yes_count > no_count + other_count

        clean_bill_id = fix_bill_id(bill["bill_id"])
        motion_line = None
        for i, line in enumerate(lines):
            if line.strip() == clean_bill_id:
                motion_line = i + 2
        motion = lines[motion_line]
        if not motion:
            self.log("Couldn't find motion for %s" % url)
            return

        vote = Vote("upper", date, motion, passed, yes_count, no_count, other_count)
        vote.add_source(url)

        insert_specific_votes(vote, votes)
        check_vote_counts(vote)

        bill.add_vote(vote)
Exemple #5
0
    def scrape_votes(self, link, chamber, bill):
        with self.urlopen(link) as votes_page_html:
            votes_page = lxml.html.fromstring(votes_page_html)
            page_tables = votes_page.cssselect("table")
            votes_table = page_tables[0]
            votes_elements = votes_table.cssselect("td")
            # Eliminate table headings and unnecessary element
            votes_elements = votes_elements[3 : len(votes_elements)]
            ve = grouper(5, votes_elements)
            for actor, date, name_and_text, name, text in ve:
                if "cow" in text.text_content() or "COW" in text.text_content():
                    continue
                vote_date = dt.datetime.strptime(date.text_content(), "%m/%d/%Y")
                motion_and_votes = text.text_content().lstrip("FINAL VOTE - ")
                motion, sep, votes = motion_and_votes.partition(".")
                if "passed" in votes:
                    passed = True
                else:
                    passed = False

                votes_match = re.search("([0-9]+)-([0-9]+)-?([0-9]+)?", votes)
                yes_count = votes_match.group(1)
                no_count = votes_match.group(2)
                other_count = votes_match.group(3)

                if other_count == None:
                    other_count = 0

                vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count)
                vote.add_source(link)
                bill.add_vote(vote)
Exemple #6
0
    def parse_vote(self, bill, action, act_chamber, act_date, url,
        re_vote_text = re.compile(r'The question (?:being|to be reconsidered):\s*"(.*?\?)"', re.S),
        re_header=re.compile(r'\d{2}-\d{2}-\d{4}\s{10,}\w{,20} Journal\s{10,}\d{,6}\s{,4}')):

        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        
        if len(doc.xpath('//pre')) < 2:
            return
        
        # Find all chunks of text representing voting reports.
        votes_text_container = doc.xpath('//pre')
        if len(votes_text_container) < 2:
            return
        votes_text = votes_text_container[1].text_content()
        votes_text = re_vote_text.split(votes_text)
        votes_data = zip(votes_text[1::2], votes_text[2::2])

        # Process each.
        for motion, text in votes_data:

            yes = no = other = 0

            tally = re.findall(r'\b([YNEA])[A-Z]+:\s{,3}(\d{,3})', text)
            for vtype, vcount in tally:
                vcount = int(vcount) if vcount != '-' else 0
                if vtype == 'Y':
                    yes = vcount
                elif vtype == 'N':
                    no = vcount
                else:
                    other += vcount

            vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other)

            # In lengthy documents, the "header" can be repeated in the middle
            # of content. This regex gets rid of it.
            vote_lines = re_header.sub('', text)
            vote_lines = vote_lines.split('\r\n')

            vote_type = None
            for vote_list in vote_lines:
                if vote_list.startswith('Yeas: '):
                    vote_list, vote_type = vote_list[6:], vote.yes
                elif vote_list.startswith('Nays: '):
                    vote_list, vote_type = vote_list[6:], vote.no
                elif vote_list.startswith('Excused: '):
                    vote_list, vote_type = vote_list[9:], vote.other
                elif vote_list.startswith('Absent: '):
                    vote_list, vote_type = vote_list[9:], vote.other
                elif vote_list.strip() == '':
                    vote_type = None
                if vote_type:
                    for name in vote_list.split(','):
                        name = name.strip()
                        if name:
                            vote_type(name)

            vote.add_source(url)
            bill.add_vote(vote)
Exemple #7
0
    def scrape_vote(self, bill, vote_chamber, bill_id, vote_id, vote_date,
                    action_text):
        url = ('http://alisondb.legislature.state.al.us/Alison/'
               'GetRollCallVoteResults.aspx?'
               'VOTE={0}&BODY={1}&INST={2}&SESS={3}'.
               format(vote_id, vote_chamber, bill_id, self.session_id))
        doc = lxml.html.fromstring(self.get(url=url).text)

        voters = {'Y': [], 'N': [], 'P': [], 'A': []}

        voters_and_votes = doc.xpath('//table/tr/td/font/text()')
        capture_vote = False
        name = ''
        for item in voters_and_votes:
            if capture_vote:
                capture_vote = False
                if name:
                    voters[item].append(name)
            else:
                capture_vote = True
                name = item
                if (name.endswith(", Vacant") or
                        name.startswith("Total ") or
                        not name.strip()):
                    name = ''

        # Check name counts against totals listed on the site
        total_yea = doc.xpath('//*[starts-with(text(), "Total Yea")]/text()')
        if total_yea:
            total_yea = int(total_yea[0].split(":")[-1])
            assert total_yea == len(voters['Y']), "Yea count incorrect"
        else:
            total_yea = len(voters['Y'])

        total_nay = doc.xpath('//*[starts-with(text(), "Total Nay")]/text()')
        if total_nay:
            total_nay = int(total_nay[0].split(":")[-1])
            assert total_nay == len(voters['N']), "Nay count incorrect"
        else:
            total_nay = len(voters['N'])

        total_absent = doc.xpath(
            '//*[starts-with(text(), "Total Absent")]/text()')
        if total_absent:
            total_absent = int(total_absent[0].split(":")[-1])
            assert total_absent == len(voters['A']), "Absent count incorrect"
        total_other = len(voters['P']) + len(voters['A'])

        vote = Vote(
            self.CHAMBERS[vote_chamber[0]], vote_date, action_text,
            total_yea > total_nay, total_yea, total_nay, total_other)
        vote.add_source(url)
        for member in voters['Y']:
            vote.yes(member)
        for member in voters['N']:
            vote.no(member)
        for member in (voters['A'] + voters['P']):
            vote.other(member)

        bill.add_vote(vote)
Exemple #8
0
    def scrape_vote(self, bill, date, motion, url):
        page = self.urlopen(url)

        if "not yet official" in page:
            # Sometimes they link to vote pages before they go live
            return

        page = lxml.html.fromstring(page)

        if url.endswith("Senate"):
            actor = "upper"
        else:
            actor = "lower"

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count, no_count, other_count)
        vote.add_source(url)

        vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a"
        for yes in page.xpath(vote_path % "Yeas"):
            vote.yes(yes.text)
        for no in page.xpath(vote_path % "Nays"):
            vote.no(no.text)
        for other in page.xpath(vote_path % "Non Voting"):
            vote.other(other.text)
        for other in page.xpath(vote_path % "Present"):
            vote.other(other.text)

        bill.add_vote(vote)
Exemple #9
0
    def parse_senate_vote(self, url):
        vote = Vote('upper', '?', 'senate passage', False, 0, 0, 0)
        vote.add_source(url)

        fname, resp = self.urlretrieve(url)
        sv_text = convert_sv_text(convert_pdf(fname, 'text'))
        os.remove(fname)
        in_votes = False

        for line in sv_text:
            if not in_votes:
                dmatch = re.search('DATE:(\d{2}-\d{2}-\d{2})', line)
                if dmatch:
                    date = dmatch.groups()[0]
                    vote['date'] =  datetime.strptime(date, '%m-%d-%y')

                if 'YES NO ABS EXC' in line:
                    in_votes = True
                elif 'PASSED' in line:
                    vote['passed'] = True

            else:
                if 'TOTALS' in line:

                    # Lt. Governor voted
                    if 'GOVERNOR' in line:
                        name, spaces, line = re.match(' ([A-Z,.]+)(\s+)X(.*)',
                                                      line).groups()
                        if len(spaces) == 1:
                            vote.yes(name)
                        else:
                            vote.no(name)

                    _, yes, no, abs, exc = line.split()
                    vote['yes_count'] = int(yes)
                    vote['no_count'] = int(no)
                    vote['other_count'] = int(abs)+int(exc)
                    # no longer in votes
                    in_votes = False
                    continue

                # pull votes out
                matches = re.match(' ([A-Z,.]+)(\s+)X\s+([A-Z,.]+)(\s+)X', line).groups()
                name1, spaces1, name2, spaces2 = matches

                # vote can be determined by # of spaces
                if len(spaces1) == 1:
                    vote.yes(name1)
                elif len(spaces1) == 2:
                    vote.no(name1)
                else:
                    vote.other(name1)

                if len(spaces2) == 1:
                    vote.yes(name2)
                elif len(spaces2) == 2:
                    vote.no(name2)
                else:
                    vote.other(name2)
        return vote
Exemple #10
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(', ')[1]

            if location.startswith('House'):
                chamber = 'lower'
            elif location.startswith('Senate'):
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = ' '.join(location.split(' ')[1:]).strip()
            if not committee or committee.startswith('of Representatives'):
                committee = None

            motion = ', '.join(header.split(', ')[2:]).strip()

            yes_count = int(
                page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote['type'] = type

            if committee:
                vote['committee'] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == 'Yea':
                    vote.yes(td.getprevious().text.strip())
                elif td.text == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif td.text in ('Excused', 'Absent'):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Exemple #11
0
    def scrape_bill_details(self, url, bill):
        html = self.get(url, retry_on_404=True).text
        doc = lxml.html.fromstring(html)

        # summary sections
        summary = doc.xpath('//h4[starts-with(text(), "SUMMARY")]/following-sibling::p/text()')
        if summary and summary[0].strip():
            bill['summary'] = summary[0].strip()

        # versions
        for va in doc.xpath('//h4[text()="FULL TEXT"]/following-sibling::ul[1]/li/a[1]'):

            # 11/16/09 \xa0House: Prefiled and ordered printed; offered 01/13/10 10100110D
            date, desc = va.text.split(u' \xa0')
            desc.rsplit(' ', 1)[0]              # chop off last part
            link = va.get('href')
            date = datetime.datetime.strptime(date, '%m/%d/%y')

            # budget bills in VA are searchable but no full text available
            if '+men+' in link:
                self.warning('not adding budget version, bill text not available')
            else:
                # VA duplicates reprinted bills, lets keep the original name
                bill.add_version(desc, BASE_URL+link, date=date,
                                 mimetype='text/html',
                                 on_duplicate='use_old')

        # actions
        for ali in doc.xpath('//h4[text()="HISTORY"]/following-sibling::ul[1]/li'):
            date, action = ali.text_content().split(u' \xa0')
            actor, action = action.split(': ', 1)

            actor = self.actor_map[actor]
            date = datetime.datetime.strptime(date.strip(), '%m/%d/%y')

            # if action ends in (##-Y ##-N) remove that part
            vrematch = self.vote_strip_re.match(action)
            if vrematch:
                action, y, n, o = vrematch.groups()
                vote = Vote(actor, date, action, int(y) > int(n),
                            int(y), int(n), 0)
                vote_url = ali.xpath('a/@href')
                if vote_url:
                    self.parse_vote(vote, vote_url[0])
                    vote.add_source(BASE_URL + vote_url[0])
                # set other count, it isn't provided
                vote['other_count'] = len(vote['other_votes'])
                #vote.validate()
                bill.add_vote(vote)

            # categorize actions
            for pattern, atype in self._action_classifiers:
                if re.match(pattern, action):
                    break
            else:
                atype = 'other'

            # if matched a 'None' atype, don't add the action
            if atype:
                bill.add_action(actor, action, date, type=atype)
Exemple #12
0
    def scrape_vote(self, bill, date, motion, url):
        page = lxml.html.fromstring(self.urlopen(url))

        if url.endswith('Senate'):
            actor = 'upper'
        else:
            actor = 'lower'

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count,
                    no_count, other_count)
        vote.add_source(url)

        vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a"
        for yes in page.xpath(vote_path % "Yeas"):
            vote.yes(yes.text)
        for no in page.xpath(vote_path % "Nays"):
            vote.no(no.text)
        for other in page.xpath(vote_path % "Non Voting"):
            vote.other(other.text)
        for other in page.xpath(vote_path % "Present"):
            vote.other(other.text)

        bill.add_vote(vote)
Exemple #13
0
    def scrape_vote(self, bill, date, motion, url):
        page = self.urlopen(url)

        if 'not yet official' in page:
            # Sometimes they link to vote pages before they go live
            return

        page = lxml.html.fromstring(page)

        if url.endswith('Senate'):
            actor = 'upper'
        else:
            actor = 'lower'

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count,
                    no_count, other_count)
        vote.add_source(url)

        xpath = (
            '//*[contains(@class, "ms-standardheader")]/'
            'following-sibling::table')
        divs = page.xpath(xpath)
        votevals = 'yes no other other'.split()
        for (voteval, div) in zip(votevals, divs):
            for a in div.xpath('.//a'):
                getattr(vote, voteval)(a.text_content())
        bill.add_vote(vote)
Exemple #14
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r'Ayes,? (\d+)[,;]\s+N(?:oes|ays),? (\d+)', text)
        (yes, no) = int(votes[0][0]), int(votes[0][1])

        vtype = 'other'
        for regex, type in motion_classifiers.iteritems():
            if re.match(regex, text):
                vtype = type
                break

        v = Vote(chamber, date, text, yes > no, yes, no, 0, type=vtype)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if 'av' in url:
                self.add_house_votes(v, url)
            elif 'sv' in url:
                self.add_senate_votes(v, url)

        # other count is brute forced
        v['other_count'] = len(v['other_votes'])
        v.validate()
        bill.add_vote(v)
Exemple #15
0
    def add_vote(self, bill, chamber, date, line, text):
        votes = re.findall(r'Ayes (\d+)\, Noes (\d+)', text)
        (yes, no) = int(votes[0][0]), int(votes[0][1])

        vtype = 'other'
        for regex, type in motion_classifiers.iteritems():
            if re.match(regex, text):
                vtype = type
                break

        v = Vote(chamber, date, text, yes > no, yes, no, 0, type=vtype)

        # fetch the vote itself
        link = line.xpath('//a[contains(@href, "/votes/")]')
        if link:
            link = link[0].get('href')
            v.add_source(link)

            filename, resp = self.urlretrieve(link)

            if 'av' in link:
                self.add_house_votes(v, filename)
            elif 'sv' in link:
                self.add_senate_votes(v, filename)

        bill.add_vote(v)
Exemple #16
0
    def scrape_vote(self, bill, chamber, date, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, 'text')
        os.remove(path)

        try:
            motion = text.split('\n')[4].strip()
        except IndexError:
            return

        try:
            yes_count = int(re.search(r'Yeas - (\d+)', text).group(1))
        except AttributeError:
            return

        no_count = int(re.search(r'Nays - (\d+)', text).group(1))
        other_count = int(re.search(r'Not Voting - (\d+)', text).group(1))
        passed = yes_count > (no_count + other_count)

        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        y,n,o = 0,0,0
        break_outter = False

        for line in text.split('\n')[9:]:
            if break_outter:
                break

            if 'after roll call' in line:
                break
            if 'Indication of Vote' in line:
                break
            if 'Presiding' in line:
                continue

            for col in re.split(r'-\d+', line):
                col = col.strip()
                if not col:
                    continue

                match = re.match(r'(Y|N|EX|\*)\s+(.+)$', col)

                if match:
                    if match.group(2) == "PAIR":
                        break_outter = True
                        break
                    if match.group(1) == 'Y':
                        vote.yes(match.group(2))
                    elif match.group(1) == 'N':
                        vote.no(match.group(2))
                    else:
                        vote.other(match.group(2))
                else:
                    vote.other(col.strip())

        vote.validate()
        bill.add_vote(vote)
Exemple #17
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url, uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == 'Votes:':
            #New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")[:3]

        def proc_block(obj, typ):
            if obj is None:
                return {
                    "type": None,
                    "count": None,
                    "votes": []
                }
            votes = []
            for vote in obj.xpath(".//br"):
                if vote.tail:
                    vote = vote.tail.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {
                "type": typ,
                "count": count,
                "votes": votes
            }

        vote_dict = {
            "yes": proc_block(yes, 'yes'),
            "no": proc_block(no, 'no'),
            "other": proc_block(other, 'other'),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0

        vote = Vote(
            actor,
            date,
            motion,
            (yes_count > no_count),
            yes_count,
            no_count,
            other_count,
            _vote_id=uniqid)
        vote.add_source(url)

        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                getattr(vote, key)(voter)

        bill.add_vote(vote)
Exemple #18
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(", ")[1]

            if location.startswith("House"):
                chamber = "lower"
            elif location.startswith("Senate"):
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = " ".join(location.split(" ")[1:]).strip()
            if not committee or committee.startswith("of Representatives"):
                committee = None

            motion = ", ".join(header.split(", ")[2:]).strip()
            if not motion:
                # If we can't detect a motion, skip this vote
                return

            yes_count = int(page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                type = "passage"
            elif motion == "Concurred in amendments":
                type = "amendment"
            elif motion == "Veto override":
                type = "veto_override"
            else:
                type = "other"

            vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count)
            vote["type"] = type

            if committee:
                vote["committee"] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == "Yea":
                    vote.yes(td.getprevious().text.strip())
                elif td.text == "Nay":
                    vote.no(td.getprevious().text.strip())
                elif td.text in ("Excused", "Absent"):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Exemple #19
0
    def scrape_vote(self, bill, action_text, url):
        doc = lxml.html.fromstring(self.urlopen(url))

        date = None
        yes_count = no_count = other_count = None

        # process action_text - might look like "Vote - Senate Floor - Third Reading Passed (46-0) - 01/16/12"
        if action_text.startswith('Vote - Senate Floor - '):
            action_text = action_text[22:]
            chamber = 'upper'
        elif action_text.startswith('Vote - House Floor - '):
            action_text = action_text[21:]
            chamber = 'lower'

        motion, unused_date = action_text.split(' - ')
        yes_count, no_count = re.findall('\((\d+)-(\d+)\)', motion)[0]
        if 'Passed' in motion:
            motion = motion.split(' Passed')[0]
            passed = True
        elif 'Adopted' in motion:
            motion = motion.split(' Adopted')[0]
            passed = True
        elif 'Rejected' in motion:
            motion = motion.split(' Rejected')[0]
            passed = False
        elif 'Floor Amendment' in motion:
            passed = int(yes_count) > int(no_count)
        else:
            raise Exception('unknown motion: %s' % motion)

        vote = Vote(chamber=chamber, date=None, motion=motion,
                    yes_count=int(yes_count), no_count=int(no_count),
                    other_count=0, passed=passed)
        vfunc = None

        nobrs = doc.xpath('//nobr/text()')
        for text in nobrs:
            text = text.replace(u'\xa0', ' ')
            if text.startswith('Calendar Date: '):
                vote['date'] = datetime.datetime.strptime(text.split(': ', 1)[1], '%b %d, %Y %H:%M %p')
            elif 'Yeas' in text and 'Nays' in text and 'Not Voting' in text:
                self.debug(text)
                yeas, nays, nv, exc, absent = re.match('(\d+) Yeas\s+(\d+) Nays\s+(\d+) Not Voting\s+(\d+) Excused \(Absent\)\s+(\d+) Absent', text).groups()
                vote['yes_count'] = int(yeas)
                vote['no_count'] = int(nays)
                vote['other_count'] = int(nv) + int(exc) + int(absent)
            elif 'Voting Yea' in text:
                vfunc = vote.yes
            elif 'Voting Nay' in text:
                vfunc = vote.no
            elif 'Not Voting' in text or 'Excused' in text:
                vfunc = vote.other
            elif vfunc:
                vfunc(text)

        vote.validate()
        vote.add_source(url)
        bill.add_vote(vote)
Exemple #20
0
    def scrape_votes(self, bill, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = re.split("\w+? by [\w ]+?\s+-", raw_vote_data.strip())[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(u"\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")
                motion = raw_vote[0]

                vote_date = re.search("(\d+/\d+/\d+)", motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(vote_date.group(), "%m/%d/%Y")

                passed = "Passed" in motion or "Recommended for passage" in motion or "Adopted" in raw_vote[1]
                vote_regex = re.compile("\d+$")
                aye_regex = re.compile("^.+voting aye were: (.+) -")
                no_regex = re.compile("^.+voting no were: (.+) -")
                other_regex = re.compile("^.+present and not voting were: (.+) -")
                yes_count = 0
                no_count = 0
                other_count = 0
                ayes = []
                nos = []
                others = []

                for v in raw_vote[1:]:
                    v = v.strip()
                    if v.startswith("Ayes...") and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith("Noes...") and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif v.startswith("Present and not voting...") and vote_regex.search(v):
                        other_count += int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(", ")
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(", ")
                    elif other_regex.search(v):
                        others += other_regex.search(v).groups()[0].split(", ")

                if "ChamberVoting=H" in link:
                    chamber = "lower"
                else:
                    chamber = "upper"

                vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count)
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                for o in others:
                    vote.other(o)

                vote.validate()
                bill.add_vote(vote)

        return bill
Exemple #21
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/H" in url:
            vote_chamber = "lower"
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = "upper"
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if "BUDGET ADDRESS" in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath("string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1))

        no_count = page.xpath("string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1)
        date = date.replace(" ", "")
        date = datetime.datetime.strptime(date + " " + bill["session"], "%m/%d %Y").date()

        vote = Vote(vote_chamber, date, name, yes_count > need_count, yes_count, no_count, other_count)
        vote.add_source(url)

        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == "VACANT":
                    continue

                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.other(name)

        bill.add_vote(vote)
Exemple #22
0
    def scrape_current(self, chamber, term):
        chamber_name = "Senate" if chamber == "upper" else "House"
        with self.urlopen(
            ksapi.url + "bill_status/"
        ) as bill_request:  # perhaps we should save this data so we can make on request for both chambers?
            bill_request_json = json.loads(bill_request)
            bills = bill_request_json["content"]
            for bill_data in bills:
                # filtering out other chambers
                bill_equal_chamber = False
                for history in bill_data["HISTORY"]:
                    if history["chamber"] == chamber_name:
                        bill_is_in_chamber = True
                if not bill_is_in_chamber:
                    continue

                    # main
                bill = Bill(term, chamber, bill_data["BILLNO"], bill_data["SHORTTITLE"])
                bill.add_source(ksapi.url + "bill_status/" + bill_data["BILLNO"].lower())
                if bill_data["LONGTITLE"]:
                    bill.add_title(bill_data["LONGTITLE"])
                bill.add_document("apn", ksapi.ksleg + bill_data["apn"])
                bill.add_version("Latest", ksapi.ksleg + bill_data["apn"])

                for sponsor in bill_data["SPONSOR_NAMES"]:
                    bill.add_sponsor("primary" if len(bill_data["SPONSOR_NAMES"]) == 1 else "cosponsor", sponsor)

                for event in bill_data["HISTORY"]:
                    if "committee_names" in event and "conferee_names" in event:
                        actor = " and ".join(bill_data["committee_names"] + bill_data["conferee_names"])
                    elif "committee_names" in history:
                        actor = " and ".join(bill_data["committee_names"])
                    elif "conferee_names" in history:
                        actor = " and ".join(bill_data["conferee_names"])
                    else:
                        actor = "upper" if chamber == "Senate" else "lower"

                    date = datetime.datetime.strptime(event["occurred_datetime"], "%Y-%m-%dT%H:%M:%S")
                    bill.add_action(actor, event["status"], date)

                    if event["action_code"] in ksapi.voted:
                        votes = votes_re.match(event["status"])
                        if votes:
                            vote = Vote(
                                chamber,
                                date,
                                votes.group(1),
                                event["action_code"] in ksapi.passed,
                                int(votes.group(2)),
                                int(votes.group(3)),
                                0,
                            )
                            vote.add_source(ksapi.ksleg + "bill_status/" + bill_data["BILLNO"].lower())
                            bill.add_vote(vote)

                self.save_bill(bill)
Exemple #23
0
    def scrape_votes(self, bill, bill_prefix, number, session):
        vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' +
                    session + '_' + bill_prefix + '_' + str(number))

        page = self.urlopen(vote_url)
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = datetime.datetime.strptime(jlink.text,
                                              "%m/%d/%Y").date()

            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == 'House':
                chamber = 'lower'
            elif chamber == 'Senate':
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = Vote(chamber, date, motion, yes_count > no_count,
                        yes_count, no_count, 0)

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            bill.add_vote(vote)
Exemple #24
0
    def scrape_votes(self, bill, bill_prefix, number, session):
        vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' +
                    session + '_' + bill_prefix + '_' + str(number))

        page = self.urlopen(vote_url)
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = datetime.datetime.strptime(jlink.text,
                                              "%m/%d/%Y").date()

            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == 'House':
                chamber = 'lower'
            elif chamber == 'Senate':
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = Vote(chamber, date, motion, yes_count > no_count,
                        yes_count, no_count, 0)

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            bill.add_vote(vote)
Exemple #25
0
    def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url):
        vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
        vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y')

        vote_doc, resp = self.urlretrieve(vote_url)

        subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc,
                              shell=True,
                              cwd='/tmp/')
        vote_lines = open('/tmp/ksvote.txt').readlines()

        os.remove(vote_doc)

        vote = None
        passed = True
        for line in vote_lines:
            totals = re.findall(
                'Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting|Present and Passing):? (\d+)[;,] (?:Absent or not voting|Absent or Not Voting):? (\d+)',
                line)
            if totals:
                totals = totals[0]
                yeas = int(totals[0])
                nays = int(totals[1])
                nv = int(totals[2])
                absent = int(totals[3])
                # default passed to true
                vote = Vote(vote_chamber, vote_date, vote_status, True, yeas,
                            nays, nv + absent)
            elif line.startswith('Yeas:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.yes(member)
            elif line.startswith('Nays:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.no(member)
            elif line.startswith('Present '):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif line.startswith('Absent or'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif 'the motion did not prevail' in line:
                passed = False

        if vote:
            vote['passed'] = passed
            vote.add_source(vote_url)
            bill.add_vote(vote)
Exemple #26
0
    def scrape_votes(self, bill, votes_url):
        html = self.urlopen(votes_url)
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(votes_url)

        EXPECTED_VOTE_CODES = ['Y','N','E','NV','A','P','-']

        # vote indicator, a few spaces, a name, newline or multiple spaces
        VOTE_RE = re.compile('(Y|N|E|NV|A|P|-)\s{2,5}(\w.+?)(?:\n|\s{2})')

        for link in doc.xpath('//a[contains(@href, "votehistory")]'):

            pieces = link.text.split(' - ')
            date = pieces[-1]
            if len(pieces) == 3:
                motion = pieces[1]
            else:
                motion = 'Third Reading'

            chamber = link.xpath('../following-sibling::td/text()')[0]
            if chamber == 'HOUSE':
                chamber = 'lower'
            elif chamber == 'SENATE':
                chamber = 'upper'
            else:
                self.warning('unknown chamber %s' % chamber)

            date = datetime.datetime.strptime(date, "%A, %B %d, %Y")

            # download the file
            fname, resp = self.urlretrieve(link.get('href'))
            pdflines = convert_pdf(fname, 'text').splitlines()
            os.remove(fname)

            vote = Vote(chamber, date, motion.strip(), False, 0, 0, 0)

            for line in pdflines:
                for match in VOTE_RE.findall(line):
                    vcode, name = match
                    if vcode == 'Y':
                        vote.yes(name)
                    elif vcode == 'N':
                        vote.no(name)
                    else:
                        vote.other(name)

            # fake the counts
            vote['yes_count'] = len(vote['yes_votes'])
            vote['no_count'] = len(vote['no_votes'])
            vote['other_count'] = len(vote['other_votes'])
            vote['passed'] = vote['yes_count'] > vote['no_count']
            vote.add_source(link.get('href'))

            bill.add_vote(vote)
Exemple #27
0
    def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url):
        vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
        vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y')

        vote_doc, resp = self.urlretrieve(vote_url)

        subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc,
                              shell=True, cwd='/tmp/')
        vote_lines = open('/tmp/ksvote.txt').readlines()

        os.remove(vote_doc)

        vote = None
        passed = True
        for line in vote_lines:
            line = line.strip()
            totals = re.findall('Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting:|Present and Passing) (\d+)[;,] (?:Absent or not voting:|Absent or Not Voting) (\d+)',
                                line)
            if totals:
                totals = totals[0]
                yeas = int(totals[0])
                nays = int(totals[1])
                nv = int(totals[2])
                absent = int(totals[3])
                # default passed to true
                vote = Vote(vote_chamber, vote_date, vote_status,
                            True, yeas, nays, nv+absent)
            elif line.startswith('Yeas:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.yes(member)
            elif line.startswith('Nays:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.no(member)
            elif line.startswith('Present '):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif line.startswith('Absent or'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif 'the motion did not prevail' in line:
                passed = False

        if vote:
            vote['passed'] = passed
            vote.add_source(vote_url)
            bill.add_vote(vote)
Exemple #28
0
    def scrape_chamber_votes(self, chamber, session, url):
        xml = self.urlopen(url)
        doc = lxml.etree.fromstring(xml)

        for vxml in doc.xpath('//vote'):
            legislation = vxml.get('legislation')
            motion = vxml.get('caption')
            timestamp = datetime.datetime.strptime(vxml.get('dateTime'),
                                                   '%Y-%m-%dT%H:%M:%S')

            leg_prefix = legislation.split(' ')[0]
            if leg_prefix in ('SB', 'SR'):
                bill_chamber = 'upper'
            elif leg_prefix in ('HB', 'HR'):
                bill_chamber = 'lower'
            elif leg_prefix in ('', 'EX', 'ELECTION'):
                continue
            else:
                raise Exception('unknown legislation prefix: ' + legislation)
            # skip bills from other chamber
            if bill_chamber != chamber:
                continue

            unknown_count = int(vxml.xpath('totals/@unknown')[0])
            excused_count = int(vxml.xpath('totals/@excused')[0])
            nv_count = int(vxml.xpath('totals/@not-voting')[0])
            no_count = int(vxml.xpath('totals/@nays')[0])
            yes_count = int(vxml.xpath('totals/@yeas')[0])
            other_count = unknown_count + excused_count + nv_count

            vote = Vote(chamber,
                        timestamp,
                        motion,
                        passed=yes_count > no_count,
                        yes_count=yes_count,
                        no_count=no_count,
                        other_count=other_count,
                        session=session,
                        bill_id=legislation,
                        bill_chamber=bill_chamber)
            vote.add_source(url)

            for m in vxml.xpath('member'):
                vote_letter = m.get('vote')
                member = m.get('name')
                if vote_letter == 'Y':
                    vote.yes(member)
                elif vote_letter == 'N':
                    vote.no(member)
                else:
                    vote.other(member)

            self.save_vote(vote)
Exemple #29
0
    def scrape_committee_vote(self, bill, actor, date, motion, url, uniqid):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)
        page.make_links_absolute(url)
        committee = page.xpath("//b")[0].text_content()
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")

        def proc_block(obj):
            if obj is None:
                return {"type": None, "count": None, "votes": []}

            typ = obj.xpath("./b")[0].text_content()
            count = obj.xpath(".//b")[0].tail.replace("-", "").strip()
            count = int(count)
            votes = []
            for vote in obj.xpath(".//br"):
                vote = vote.tail
                if vote:
                    vote = vote.strip()
                    votes.append(vote)
            return {"type": typ, "count": count, "votes": votes}

        vote_dict = {
            "yes": proc_block(yes),
            "no": proc_block(no),
            "other": proc_block(other),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0

        vote = Vote(actor,
                    date,
                    motion, (yes_count > no_count),
                    yes_count,
                    no_count,
                    other_count,
                    _vote_id=uniqid)
        vote.add_source(url)

        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                getattr(vote, key)(voter)

        bill.add_vote(vote)
Exemple #30
0
    def scrape_vote(self, bill, chamber, date, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, 'text')
        os.remove(path)

        motion = text.split('\n')[4].strip()

        yes_count = int(re.search(r'Yeas - (\d+)', text).group(1))
        no_count = int(re.search(r'Nays - (\d+)', text).group(1))
        other_count = int(re.search(r'Not Voting - (\d+)', text).group(1))
        passed = yes_count > (no_count + other_count)

        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        y,n,o = 0,0,0
        break_outter = False

        for line in text.split('\n')[9:]:
            if break_outter:
                break

            if 'after roll call' in line:
                break
            if 'Indication of Vote' in line:
                break
            if 'Presiding' in line:
                continue

            for col in re.split(r'-\d+', line):
                col = col.strip()
                if not col:
                    continue

                match = re.match(r'(Y|N|EX|\*)\s+(.+)$', col)

                if match:
                    if match.group(2) == "PAIR":
                        break_outter = True
                        break
                    if match.group(1) == 'Y':
                        vote.yes(match.group(2))
                    elif match.group(1) == 'N':
                        vote.no(match.group(2))
                    else:
                        vote.other(match.group(2))
                else:
                    vote.other(col.strip())

        vote.validate()
        bill.add_vote(vote)
Exemple #31
0
    def scrape_vote(self, bill, motion, url):
        page = self.urlopen(url, retry_on_404=True)
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = Vote(chamber, date, motion,
                    outcome == 'PREVAILS',
                    yes_count, no_count, other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.yes(name)
            elif vtype == 'N':
                vote.no(name)
            elif vtype == 'X' or vtype == 'E':
                vote.other(name)

        bill.add_vote(vote)
Exemple #32
0
    def scrape_votes(self, bill, sponsor, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath(
                "//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = raw_vote_data.strip().split(
                '%s by %s - ' % (bill['bill_id'], sponsor))[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(
                    u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
                motion = raw_vote[0]

                vote_date = re.search('(\d+/\d+/\d+)', motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(
                        vote_date.group(), '%m/%d/%Y')

                passed = ('Passed' in motion) or ('Adopted' in raw_vote[1])
                vote_regex = re.compile('\d+$')
                aye_regex = re.compile('^.+voting aye were: (.+) -')
                no_regex = re.compile('^.+voting no were: (.+) -')
                yes_count = None
                no_count = None
                other_count = 0
                ayes = []
                nos = []

                for v in raw_vote[1:]:
                    if v.startswith('Ayes...') and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith('Noes...') and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(', ')
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(', ')

                if yes_count and no_count:
                    passed = yes_count > no_count
                else:
                    yes_count = no_count = 0

                vote = Vote(bill['chamber'], vote_date, motion, passed,
                            yes_count, no_count, other_count)
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                bill.add_vote(vote)

        return bill
Exemple #33
0
    def scrape_vote(self, bill, motion, url):
        page = self.get(url, retry_on_404=True).text
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = Vote(chamber, date, motion,
                    outcome == 'PREVAILS',
                    yes_count, no_count, other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.yes(name)
            elif vtype == 'N':
                vote.no(name)
            elif vtype == 'X' or vtype == 'E':
                vote.other(name)

        bill.add_vote(vote)
Exemple #34
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            "YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)" "(.*)ABSENT( OR NOT VOTING)? -?\s?" "(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
            vote_location = ""
        else:
            vote_chamber = ""
            vote_location = actor

        vote = Vote(
            vote_chamber,
            date,
            motion,
            passed,
            yes_count,
            no_count,
            other_count,
            location=vote_location,
            _vote_id=uniqid,
        )
        vote.add_source(url)

        yes_votes = re.split("\s{2,}", match.group(2).strip())
        no_votes = re.split("\s{2,}", match.group(4).strip())
        other_votes = re.split("\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.other(other)

        bill.add_vote(vote)
Exemple #35
0
    def scrape_vote(self, bill, chamber, url):
        page = self.urlopen(url)
        if 'There are no details available for this roll call' in page:
            return
        page = page.replace('&nbsp;', ' ')
        page = lxml.html.fromstring(page)

        info_row = page.xpath("//table[1]/tr[2]")[0]

        date = info_row.xpath("string(td[1])")
        date = datetime.datetime.strptime(date, "%m/%d/%Y")

        motion = info_row.xpath("string(td[2])")
        yes_count = int(info_row.xpath("string(td[3])"))
        no_count = int(info_row.xpath("string(td[4])"))
        other_count = int(info_row.xpath("string(td[5])"))
        passed = info_row.xpath("string(td[6])") == 'Pass'

        if motion == 'Shall the bill pass?':
            type = 'passage'
        elif motion == 'Shall the bill be read the third time?':
            type = 'reading:3'
        elif 'be amended as' in motion:
            type = 'amendment'
        else:
            type = 'other'

        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, other_count)
        vote.add_source(url)

        for tr in page.xpath("//table[1]/tr")[3:]:
            if len(tr.xpath("td")) != 2:
                continue


            # avoid splitting duplicate names
            name = tr.xpath("string(td[1])").strip()
            if not name.startswith(DOUBLED_NAMES):
                name = name.split(' of')[0]

            type = tr.xpath("string(td[2])").strip()
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            elif type.startswith('Not Voting'):
                pass
            else:
                vote.other(name)

        bill.add_vote(vote)
Exemple #36
0
    def scrape_vote(self, bill, chamber, url):
        page = self.urlopen(url)
        if 'There are no details available for this roll call' in page:
            return
        page = page.replace('&nbsp;', ' ')
        page = lxml.html.fromstring(page)

        info_row = page.xpath("//table[1]/tr[2]")[0]

        date = info_row.xpath("string(td[1])")
        date = datetime.datetime.strptime(date, "%m/%d/%Y")

        motion = info_row.xpath("string(td[2])")
        yes_count = int(info_row.xpath("string(td[3])"))
        no_count = int(info_row.xpath("string(td[4])"))
        other_count = int(info_row.xpath("string(td[5])"))
        passed = info_row.xpath("string(td[6])") == 'Pass'

        if motion == 'Shall the bill pass?':
            type = 'passage'
        elif motion == 'Shall the bill be read the third time?':
            type = 'reading:3'
        elif 'be amended as' in motion:
            type = 'amendment'
        else:
            type = 'other'

        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        for tr in page.xpath("//table[1]/tr")[3:]:
            if len(tr.xpath("td")) != 2:
                continue

            # avoid splitting duplicate names
            name = tr.xpath("string(td[1])").strip()
            if not name.startswith(DOUBLED_NAMES):
                name = name.split(' of')[0]

            type = tr.xpath("string(td[2])").strip()
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            elif type.startswith('Not Voting'):
                pass
            else:
                vote.other(name)

        bill.add_vote(vote)
Exemple #37
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
            '(.*)ABSENT( OR NOT VOTING)? -?\s?'
            '(\d+)(.*)', re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
            vote_location = ''
        else:
            vote_chamber = ''
            vote_location = actor

        vote = Vote(vote_chamber,
                    date,
                    motion,
                    passed,
                    yes_count,
                    no_count,
                    other_count,
                    location=vote_location,
                    _vote_id=uniqid)
        vote.add_source(url)

        yes_votes = re.split('\s{2,}', match.group(2).strip())
        no_votes = re.split('\s{2,}', match.group(4).strip())
        other_votes = re.split('\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.other(other)

        bill.add_vote(vote)
Exemple #38
0
    def scrape_committee_vote(self, bill, actor, date, motion, page, url,
                              uniqid):
        votes = page.xpath("//table")[0]
        rows = votes.xpath(".//tr")[0]
        if rows[0].text_content() == 'Votes:':
            #New webste
            rows = votes.xpath(".//tr")[2]
        yno = rows.xpath(".//td")
        if len(yno) < 3:
            yes = yno[0]
            no, other = None, None
        else:
            yes, no, other = rows.xpath(".//td")[:3]

        def proc_block(obj, typ):
            if obj is None:
                return {"type": None, "count": None, "votes": []}
            votes = []
            for vote in obj.xpath(".//br"):
                if vote.tail:
                    vote = vote.tail.strip()
                    if vote:
                        votes.append(vote)
            count = len(votes)
            return {"type": typ, "count": count, "votes": votes}

        vote_dict = {
            "yes": proc_block(yes, 'yes'),
            "no": proc_block(no, 'no'),
            "other": proc_block(other, 'other'),
        }

        yes_count = vote_dict['yes']['count']
        no_count = vote_dict['no']['count'] or 0
        other_count = vote_dict['other']['count'] or 0

        vote = Vote(actor,
                    date,
                    motion, (yes_count > no_count),
                    yes_count,
                    no_count,
                    other_count,
                    _vote_id=uniqid)
        vote.add_source(url)

        for key in vote_dict:
            for voter in vote_dict[key]['votes']:
                getattr(vote, key)(voter)

        bill.add_vote(vote)
    def process_vote(self, data):
        chamber = parse_psuedo_id(data['organization'])['classification']
        bill_chamber, bill_id = self.get_bill_details(data['bill'])
        if chamber == 'legislature':
            chamber = 'upper'
        if bill_chamber == 'legislature':
            bill_chamber = 'upper'

        yes_count = None
        no_count = None
        other_count = 0
        for vc in data['counts']:
            if vc['option'] == 'yes':
                yes_count = vc['value']
            elif vc['option'] == 'no':
                no_count = vc['value']
            else:
                other_count += vc['value']

        vote = Vote(
            chamber=chamber,
            date=parse_date(data['start_date']),
            motion=data['motion_text'],
            passed=data['result'] == 'pass',
            yes_count=yes_count,
            no_count=no_count,
            other_count=other_count,
            action=data['bill_action'],
            # TODO: was data['motion_classification'],
            type='other',
            session=data['legislative_session'],
            bill_chamber=bill_chamber,
            bill_id=bill_id,
        )

        for vr in data['votes']:
            if vr['option'] == 'yes':
                vote.yes(vr['voter_name'])
            elif vr['option'] == 'no':
                vote.no(vr['voter_name'])
            else:
                vote.other(vr['voter_name'])

        for source in data['sources']:
            vote.add_source(source['url'])

        vote.update(**data['extras'])

        self.save_vote(vote)
Exemple #40
0
    def scrape_chamber_votes(self, chamber, session, url):
        xml = self.urlopen(url)
        doc = lxml.etree.fromstring(xml)

        for vxml in doc.xpath('//vote'):
            legislation = vxml.get('legislation')
            motion = vxml.get('caption') or 'unknown'
            timestamp = datetime.datetime.strptime(vxml.get('dateTime'),
                                                   '%Y-%m-%dT%H:%M:%S')

            leg_prefix = legislation.split(' ')[0]
            if leg_prefix in ('SB', 'SR'):
                bill_chamber = 'upper'
            elif leg_prefix in ('HB', 'HR'):
                bill_chamber = 'lower'
            elif leg_prefix in ('', 'EX', 'ELECTION'):
                continue
            else:
                raise Exception('unknown legislation prefix: ' + legislation)
            # skip bills from other chamber
            if bill_chamber != chamber:
                continue

            unknown_count = int(vxml.xpath('totals/@unknown')[0])
            excused_count = int(vxml.xpath('totals/@excused')[0])
            nv_count = int(vxml.xpath('totals/@not-voting')[0])
            no_count = int(vxml.xpath('totals/@nays')[0])
            yes_count = int(vxml.xpath('totals/@yeas')[0])
            other_count = unknown_count + excused_count + nv_count

            vote = Vote(chamber, timestamp, motion,
                        passed=yes_count > no_count, yes_count=yes_count,
                        no_count=no_count, other_count=other_count,
                        session=session, bill_id=legislation,
                        bill_chamber=bill_chamber)
            vote.add_source(url)

            for m in vxml.xpath('member'):
                vote_letter = m.get('vote')
                member = m.get('name')
                if vote_letter == 'Y':
                    vote.yes(member)
                elif vote_letter == 'N':
                    vote.no(member)
                else:
                    vote.other(member)

            self.save_vote(vote)
Exemple #41
0
    def scrape_votes(self, bill, sponsor, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = raw_vote_data.strip().split('%s by %s - ' % (bill['bill_id'], sponsor))[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
                motion = raw_vote[0]

                vote_date = re.search('(\d+/\d+/\d+)', motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(vote_date.group(), '%m/%d/%Y')

                passed = ('Passed' in motion) or ('Adopted' in raw_vote[1])
                vote_regex = re.compile('\d+$')
                aye_regex = re.compile('^.+voting aye were: (.+) -')
                no_regex = re.compile('^.+voting no were: (.+) -')
                yes_count = None
                no_count = None
                other_count = 0
                ayes = []
                nos = []

                for v in raw_vote[1:]:
                    if v.startswith('Ayes...') and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith('Noes...') and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(', ')
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(', ')

                if yes_count and no_count:
                    passed = yes_count > no_count
                else:
                    yes_count = no_count = 0


                vote = Vote(bill['chamber'], vote_date, motion, passed, yes_count, no_count, other_count)
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                bill.add_vote(vote)

        return bill
Exemple #42
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = 'http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s'
        url = base_url % (vote_type_id, bill['bill_id'])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id('VoteDate').text)

            # check if voice vote / approved boxes have an 'x'
            voice = (
                doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x')
            passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0]
                      == 'x')

            yes_count = extract_int(
                doc.xpath('//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(
                doc.xpath('//span[@id="VoteCount2"]/b/text()')[0])

            other_count = 0
            for n in xrange(3, 9):
                other_count += extract_int(
                    doc.xpath('//span[@id="VoteCount%s"]/b/text()' % n)[0])

            vote = Vote('upper',
                        vote_date,
                        vote_type,
                        passed,
                        yes_count,
                        no_count,
                        other_count,
                        voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath('//u'):
                member = member_u.text
                # normalize case
                vote_text = member_u.xpath('../../i/text()')[0].upper()
                if 'YES' in vote_text:
                    vote.yes(member)
                elif 'NO' in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Exemple #43
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        html = self.urlopen(vote_url)

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if html.response.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return


        vote_count = doc.xpath(".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y')
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" % motion)
            return


        vote = Vote('lower', date, motion, yeas>nays, yeas, nays, 0,
                    session=session, bill_id=bill_id, bill_chamber=chamber)
        vote.add_source(vote_url)

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        self.save_vote(vote)
Exemple #44
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = Vote('upper',
                    date,
                    'Passage',
                    passed=None,
                    yes_count=0,
                    no_count=0,
                    other_count=0)
        vote.add_source(url)

        text = convert_pdf(filename, 'text')
        os.remove(filename)

        if re.search('Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+', text):
            return self.scrape_senate_vote_3col(bill, vote, text, url, date)

        data = re.split(r'(Yea|Nay|Absent)s?:', text)[::-1]
        data = filter(None, data)
        keymap = dict(yea='yes', nay='no')
        actual_vote = collections.defaultdict(int)
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), 'other')
            values = data.pop()
            for name in re.split(r'(?:[\s,]+and\s|[\s,]{2,})', values):
                if name.lower().strip() == 'none.':
                    continue
                name = name.replace('..', '')
                name = re.sub(r'\.$', '', name)
                name = name.strip('-1234567890 \n')
                if not name:
                    continue
                getattr(vote, key)(name)
                actual_vote[vote_val] += 1
                vote[key + '_count'] += 1
            assert actual_vote[vote_val] == vote[key + '_count']

        vote['passed'] = vote['no_count'] < vote['yes_count']
        bill.add_vote(vote)
Exemple #45
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        html = self.urlopen(vote_url)

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if html.response.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        paragraphs = doc.xpath('//h1/following-sibling::p')

        # first paragraph has motion and vote total
        top_par = paragraphs[0].text_content()
        lines = top_par.splitlines()
        # 3rd line is the motion except in cases where first line is gone
        motion = lines[2] or lines[1]
        # last line is "__ YEA and __ Nay"
        yeas, nays = self.yeanay_re.match(lines[-1]).groups()
        yeas = int(yeas)
        nays = int(nays)

        # second paragraph has date
        date = self.date_re.match(paragraphs[1].text_content()).groups()[0]
        date = datetime.datetime.strptime(date, '%m/%d/%Y')

        vote = Vote('lower',
                    date,
                    motion,
                    yeas > nays,
                    yeas,
                    nays,
                    0,
                    session=session,
                    bill_id=bill_id,
                    bill_chamber=chamber)
        vote.add_source(vote_url)

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        self.save_vote(vote)
Exemple #46
0
    def scrape_chamber_votes(self, chamber, url):
        xml = self.urlopen(url)
        doc = lxml.etree.fromstring(xml)

        for vxml in doc.xpath('//vote'):
            motion = vxml.get('caption') or 'unknown'
            timestamp = datetime.datetime.strptime(vxml.get('dateTime'),
                                                   '%Y-%m-%dT%H:%M:%S')

            # legislaton key is often blank, so we're ignoring it now
            #legislation = vxml.get('legislation')

            unknown_count = int(vxml.xpath('totals/@unknown')[0])
            excused_count = int(vxml.xpath('totals/@excused')[0])
            nv_count = int(vxml.xpath('totals/@not-voting')[0])
            no_count = int(vxml.xpath('totals/@nays')[0])
            yes_count = int(vxml.xpath('totals/@yeas')[0])
            other_count = unknown_count + excused_count + nv_count

            vote = Vote(chamber,
                        timestamp,
                        motion,
                        passed=yes_count > no_count,
                        yes_count=yes_count,
                        no_count=no_count,
                        other_count=other_count)
            vote.add_source(url)

            for m in vxml.xpath('member'):
                vote_letter = m.get('vote')
                member = m.get('name')
                if vote_letter == 'Y':
                    vote.yes(member)
                elif vote_letter == 'N':
                    vote.no(member)
                else:
                    vote.other(member)

            # other count is frequently wrong, not sure why they can't count
            if len(vote['other_votes']) != vote['other_count']:
                self.warning("vote XML had wrong other count: said %s got %s" %
                             (len(vote['other_votes']), vote['other_count']))
                vote['other_count'] = len(vote['other_votes'])

            # store vote
            self.votes[vxml.get('id')] = vote
Exemple #47
0
    def parse_house_vote(self, url):
        """ house votes are pdfs that can be converted to text, require some
        nasty regex to get votes out reliably """

        fname, resp = self.urlretrieve(url)
        text = convert_pdf(fname, 'text')
        if not text.strip():
            self.warning('image PDF %s' % url)
            return
        os.remove(fname)

        # get date
        if text.strip() == 'NEW MEXICO HOUSE OF REPRESENTATIVES':
            self.warning("What the heck: %s" % (url))
            return None

        date = re.findall('(\d+/\d+/\d+)', text)[0]
        date = datetime.strptime(date, '%m/%d/%Y')

        # get totals
        yea, nay, exc, absent = self.HOUSE_TOTAL_RE.findall(text)[0]

        # make vote (faked passage indicator)
        vote = Vote('lower', date, 'house passage',
                    int(yea) > int(nay), int(yea), int(nay),
                    int(absent) + int(exc))
        vote.add_source(url)

        # votes
        real_votes = False
        for v, name in HOUSE_VOTE_RE.findall(text):
            # our regex is a bit broad, wait until we see 'Nays' to start
            # and end when we see CERTIFIED or ____ signature line
            if 'Nays' in name or 'Excused' in name:
                real_votes = True
                continue
            elif 'CERTIFIED' in name or '___' in name:
                break
            elif real_votes and name.strip():
                if v == 'Y':
                    vote.yes(name)
                elif v == 'N':
                    vote.no(name)
                else:  # excused/absent
                    vote.other(name)
        return vote
Exemple #48
0
    def parse_vote(self, bill, action, act_chamber, act_date, url):
        url = "http://www.legis.state.ak.us/basis/%s" % url
        info_page = self.soup_parser(self.urlopen(url))

        yes = no = other = 0

        tally = re.findall('(?:(Y|N|E|A)(-|\d+)\s*)', action)

        for vtype, vcount in tally:
            vcount = int(vcount) if vcount != '-' else 0
            if vtype == 'Y':
                yes = vcount
            elif vtype == 'N':
                no = vcount
            else:
                other += vcount

        votes = info_page.findAll('pre', text=re.compile('Yeas'),
                                  limit=1)[0].split('\n\n')

        motion = info_page.findAll(text=re.compile('The question being'))[0]
        motion = re.findall('The question being:\s*"(.*)\?"', motion,
                            re.DOTALL)[0].replace('\n', ' ')

        vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other)

        for vote_list in votes:
            vote_type = False
            if vote_list.startswith('Yeas: '):
                vote_list, vote_type = vote_list[6:], vote.yes
            elif vote_list.startswith('Nays: '):
                vote_list, vote_type = vote_list[6:], vote.no
            elif vote_list.startswith('Excused: '):
                vote_list, vote_type = vote_list[9:], vote.other
            elif vote_list.startswith('Absent: '):
                vote_list, vote_type = vote_list[9:], vote.other
            if vote_type:
                for name in vote_list.split(','):
                    name = name.strip()
                    if name:
                        vote_type(name)

        vote.add_source(url)
        return vote
Exemple #49
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = 'http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s'
        url = base_url % (vote_type_id, bill['bill_id'])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id('VoteDate').text)

            # check if voice vote / approved boxes have an 'x'
            voice = (
                doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x')
            passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0]
                      == 'x')

            yes_count = extract_int(
                doc.xpath('//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(
                doc.xpath('//span[@id="VoteCount2"]/b/text()')[0])
            # every now and then this actually drops below 0 (error in count)
            other_count = max(13 - (yes_count + no_count), 0)

            vote = Vote('upper',
                        vote_date,
                        vote_type,
                        passed,
                        yes_count,
                        no_count,
                        other_count,
                        voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath('//u'):
                member = member_u.text
                vote_text = member_u.xpath('../../i/text()')[0]
                if 'Yes' in vote_text:
                    vote.yes(member)
                elif 'No' in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Exemple #50
0
    def parse_committee_votes(self, committee, chamber, bill, url):
        bill.add_source(url)
        html = self.urlopen(url)
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            date = link.xpath('../../td')[0].text_content()
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            # Motion
            motion = link.xpath('..')[0].text_content().strip()
            _, motion = motion.split('-', 1)
            motion = motion.strip()

            vote_url = link.attrib['href']

            # Roll call.
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            motion = 'Committee vote (%s): %s' % (committee, motion)

            vote = Vote(chamber,
                        date,
                        motion,
                        type='other',
                        committee=committee,
                        **rollcall)

            for voteval in ('yes', 'no', 'other'):
                for name in rollcall.get(voteval + '_votes', []):
                    getattr(vote, voteval)(name)

            vote.add_source(url)
            vote.add_source(vote_url)
            bill.add_vote(vote)

        for link in doc.xpath("//a[contains(@href, 'listVotes.cfm')]"):
            self.parse_committee_votes(committee, chamber, bill,
                                       link.attrib['href'])
Exemple #51
0
    def parse_committee_votes(self, bill, url):
        bill.add_source(url)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        chamber = ('upper'
                   if 'Senate' in doc.xpath('string(//h1)') else 'lower')
        committee = tuple(doc.xpath('//h2')[0].itertext())[-2].strip()
        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
                date = link.xpath('../../td')[0].text_content()
                try:
                    date = datetime.datetime.strptime(date, fmt)
                except ValueError:
                    continue
                break

            # Motion

            motion = link.text_content().split(' - ')[-1].strip()
            motion = 'Committee vote (%s): %s' % (committee, motion)

            # Roll call.
            vote_url = link.attrib['href']
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            vote = Vote(chamber,
                        date,
                        motion,
                        type='other',
                        committee=committee,
                        **rollcall)

            for voteval in ('yes', 'no', 'other'):
                for name in rollcall.get(voteval + '_votes', []):
                    getattr(vote, voteval)(name)

            vote.add_source(url)
            vote.add_source(vote_url)
            bill.add_vote(vote)
Exemple #52
0
 def scrape(self, chamber, session):
     url = {
         "upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
         "lower": "%s/%s" % (RI_URL_BASE, "HVotes")
     }
     url = url[chamber]
     action = "%s/%s" % (url, "votes.asp")
     dates = self.get_dates(url)
     for date in dates:
         votes = self.parse_vote_page(self.post_to(action, date), url,
                                      session)
         for vote_dict in votes:
             for vote in vote_dict:
                 vote = vote_dict[vote]
                 count = vote['count']
                 chamber = {
                     "H": "lower",
                     "S": "upper"
                 }[vote['meta']['chamber']]
                 v = Vote(
                     chamber,
                     vote['time'],
                     vote['meta']['extra']['motion'],
                     count['passage'],
                     int(count['YEAS']),
                     int(count['NAYS']),
                     int(count['NOT VOTING']),
                     session=session,
                     bill_id=vote['meta']['bill'],
                     bill_chamber=chamber,
                     bill_session=vote['meta']['year'],
                 )
                 v.add_source(vote['source'])
                 for vt in vote['votes']:
                     if vt['vote'] == "Y":
                         v.yes(vt['name'])
                     elif vt['vote'] == "N":
                         v.no(vt['name'])
                     else:
                         v.other(vt['name'])
                 self.save_vote(v)
Exemple #53
0
    def scrape_vote(self, bill, date, motion, url):
        try:
            page = self.get(url).text
        except scrapelib.HTTPError:
            #sometiems the link is there but is dead
            return

        if 'not yet official' in page:
            # Sometimes they link to vote pages before they go live
            return

        page = lxml.html.fromstring(page)

        if url.endswith('Senate'):
            actor = 'upper'
        else:
            actor = 'lower'

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count,
                    no_count, other_count)
        vote.add_source(url)

        xpath = (
            '//*[contains(@class, "ms-standardheader")]/'
            'following-sibling::table')
        divs = page.xpath(xpath)
        votevals = 'yes no other other'.split()
        for (voteval, div) in zip(votevals, divs):
            for a in div.xpath('.//a'):
                name = a.text_content().strip()
                if not name:
                    continue
                getattr(vote, voteval)(name)
        bill.add_vote(vote)
Exemple #54
0
    def scrape_vote(self, bill, moid, vote_id, body, inst, motion, chamber):
        url = "http://alisondb.legislature.state.al.us/acas/GetRollCallVoteResults.asp?MOID=%s&VOTE=%s&BODY=%s&INST=%s&SESS=%s" % (
            moid, vote_id, body, inst, self.session_id)
        doc = lxml.html.fromstring(self.urlopen(url))

        voters = {'Y': [], 'N': [], 'P': [], 'A': []}

        leg_tds = doc.xpath('//td[@width="33%"]')
        for td in leg_tds:
            name = td.text
            two_after = td.xpath('following-sibling::td')[1].text
            if name == 'Total Yea:':
                total_yea = int(two_after)
            elif name == 'Total Nay:':
                total_nay = int(two_after)
            elif name == 'Total Abs:':
                total_abs = int(two_after)
            elif name == 'Legislative Date:':
                vote_date = datetime.datetime.strptime(two_after, '%m/%d/%Y')
            # lines to ignore
            elif name in ('Legislative Day:', 'Vote ID:'):
                pass
            elif 'Vacant' in name:
                pass
            else:
                # add legislator to list of voters
                voters[two_after].append(name)

        # TODO: passed is faked
        total_other = total_abs + len(voters['P'])
        vote = Vote(chamber, vote_date, motion, total_yea > total_nay,
                    total_yea, total_nay, total_other)
        vote.add_source(url)
        for member in voters['Y']:
            vote.yes(member)
        for member in voters['N']:
            vote.no(member)
        for member in (voters['A'] + voters['P']):
            vote.other(member)

        bill.add_vote(vote)
Exemple #55
0
    def parse_vote(self, bill, link):
        member_doc = lxml.html.fromstring(self.get(link).text)
        motion = member_doc.xpath("//div[@id='main_content']/h4/text()")
        opinions = member_doc.xpath("//div[@id='main_content']/h3/text()")
        if len(opinions) > 0:
            temp = opinions[0].split()
            vote_chamber = temp[0]
            vote_date = datetime.datetime.strptime(temp[-1], '%m/%d/%Y')
            vote_status = " ".join(temp[2:-2])
            vote_status = vote_status if vote_status.strip() else motion[0]
            vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'

            for i in opinions:
                try:
                    count = int(i[i.find("(") + 1:i.find(")")])
                except:
                    pass
                if "yea" in i.lower():
                    yes_count = count
                elif "nay" in i.lower():
                    no_count = count
                elif "present" in i.lower():
                    p_count = count
                elif "absent" in i.lower():
                    a_count = count
            vote = Vote(vote_chamber, vote_date, vote_status,
                        yes_count > no_count, yes_count, no_count,
                        p_count + a_count)
            vote.add_source(link)
            a_links = member_doc.xpath("//div[@id='main_content']/a/text()")
            for i in range(1, len(a_links)):
                if i <= yes_count:
                    vote.yes(re.sub(',', '', a_links[i]).split()[0])
                elif no_count != 0 and i > yes_count and i <= yes_count + no_count:
                    vote.no(re.sub(',', '', a_links[i]).split()[0])
                else:
                    vote.other(re.sub(',', '', a_links[i]).split()[0])
            bill.add_vote(vote)
        else:
            print self.warning("No Votes for: %s", link)
Exemple #56
0
    def vote(self):
        '''Return a billy vote.
        '''
        actual_vote_dict = collections.defaultdict(list)
        vote = Vote('lower', self.date(), self.motion(),
                    self.passed(), 0, 0, 0,
                    actual_vote=dict(actual_vote_dict))

        for (vote_val, count), (actual_vote, _), text in self._parse():
            vote[vote_val + '_count'] = count
            for name in filter(None, PlaintextColumns(text)):
                names = [name]
                if 'Candelaria Reardon' in name:
                    names.append('Candelaria Reardon')
                    other_name = name.replace('Candelaria Reardon', '').strip()
                    names.append(other_name)
                for name in filter(None, names):
                    actual_vote_dict[actual_vote].append(name)
                    getattr(vote, vote_val)(name)

        vote.add_source(self.url)
        return vote
Exemple #57
0
    def scrape_vote_history(self, bill, vurl):
        html = self.urlopen(vurl)
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(vurl)

        # skip first two rows
        for row in doc.xpath('//table/tr')[2:]:
            tds = row.getchildren()
            if len(tds) != 10:
                self.warning('irregular vote row: %s' % vurl)
                continue
            (timestamp, motion, vote, yeas, nays, nv, exc, abst, total,
             result) = tds

            timestamp = timestamp.text.replace(u'\xa0', ' ')
            timestamp = datetime.datetime.strptime(timestamp,
                                                   '%m/%d/%Y %H:%M %p')
            yeas = int(yeas.text)
            nays = int(nays.text)
            others = int(nv.text) + int(exc.text) + int(abst.text)
            assert yeas + nays + others == int(total.text)

            passed = (result.text == 'Passed')

            vote_link = vote.xpath('a')[0]
            if '[H]' in vote_link.text:
                chamber = 'lower'
            else:
                chamber = 'upper'

            vote = Vote(chamber, timestamp, motion.text, passed, yeas, nays,
                        others)
            vote.add_source(vurl)

            rollcall_pdf = vote_link.get('href')
            self.scrape_rollcall(vote, rollcall_pdf)
            vote.add_source(rollcall_pdf)

            bill.add_vote(vote)
Exemple #58
0
    def vote(self):
        '''Return a billy vote.
        '''
        actual_vote_dict = collections.defaultdict(list)
        date = self.date()
        motion = self.motion()
        passed = self.passed()
        counts = self.get_counts()
        yes_count = sum(int(counts.get(key, 0)) for key in ('Yea', 'Yeas'))
        no_count = sum(int(counts.get(key, 0)) for key in ('Nay', 'Nays'))
        vote = Vote(self.chamber,
                    date,
                    motion,
                    passed,
                    yes_count,
                    no_count,
                    sum(map(int, counts.values())) - (yes_count + no_count),
                    actual_vote=dict(actual_vote_dict))

        for vote_val, voter in self.vote_values():
            getattr(vote, vote_val)(voter)
        vote.add_source(self.url)
        return vote
Exemple #59
0
    def parse_vote(self, bill, action, act_chamber, act_date, url):
        url = "http://www.legis.state.ak.us/basis/%s" % url
        info_page = self.soup_parser(self.urlopen(url))

        tally = re.findall(
            'Y(\d+) N(\d+)\s*(?:\w(\d+))*\s*(?:\w(\d+))*'
            '\s*(?:\w(\d+))*', action)[0]
        yes, no, o1, o2, o3 = [0 if not x else int(x) for x in tally]
        other = o1 + o2 + o3

        votes = info_page.findAll('pre', text=re.compile('Yeas'),
                                  limit=1)[0].split('\n\n')

        motion = info_page.findAll(text=re.compile('The question being'))[0]
        motion = re.findall('The question being:\s*"(.*)\?"', motion,
                            re.DOTALL)[0].replace('\n', ' ')

        vote = Vote(act_chamber, act_date, motion, yes > no, yes, no, other)

        for vote_list in votes:
            vote_type = False
            if vote_list.startswith('Yeas: '):
                vote_list, vote_type = vote_list[6:], vote.yes
            elif vote_list.startswith('Nays: '):
                vote_list, vote_type = vote_list[6:], vote.no
            elif vote_list.startswith('Excused: '):
                vote_list, vote_type = vote_list[9:], vote.other
            elif vote_list.startswith('Absent: '):
                vote_list, vote_type = vote_list[9:], vote.other
            if vote_type:
                for name in vote_list.split(','):
                    name = name.strip()
                    if name:
                        vote_type(name)

        vote.add_source(url)
        return vote