예제 #1
0
파일: bills.py 프로젝트: ecocity/openstates
    def add_house_votes(self, vote, filename):
        vcount_re = re.compile('AYES.* (\d+).*NAYS.* (\d+).*NOT VOTING.* (\d+).* PAIRED.*(\d+)')
        xml = convert_pdf(filename, 'xml')
        doc = lxml.html.fromstring(xml)  # use lxml.html for text_content()

        # function to call on next legislator name
        vfunc = None
        name = ''

        for textitem in doc.xpath('//text/text()'):
            if textitem.startswith('AYES'):
                ayes, nays, nv, paired = vcount_re.match(textitem).groups()
                vote['yes_count'] = int(ayes)
                vote['no_count'] = int(nays)
                vote['other_count'] = int(nv)+int(paired)
            elif textitem == 'N':
                vfunc = vote.no
                name = ''
            elif textitem == 'Y':
                vfunc = vote.yes
                name = ''
            elif textitem == 'x':
                vfunc = vote.other
                name = ''
            elif textitem in ('R', 'D', 'I'):
                vfunc(name)
            else:
                if name:
                    name += ' ' + textitem
                else:
                    name = textitem
예제 #2
0
    def scrape_votes(self, url, motion, date, chamber):
        vote_pdf, resp = self.urlretrieve(url)
        text = convert_pdf(vote_pdf, 'text')
        text = text.replace("Yeas--", ",Yeas, ")
        text = text.replace("Nays--", ",Nays, ")
        text = text.replace("Total--", ",Total, ")
        text = text.replace("DISCLAIMER", ",DISCLAIMER,")
        text = text.replace("--", ",")
        text = text.replace("Absent or those not voting", ",Absentorthosenotvoting,")
        passed = text.find("passed") != -1
        split_text = text.split(",")
        yea_mark = split_text.index("Yeas") + 1
        end_mark = split_text.index("DISCLAIMER")
        nays, other = False, False
        yes_votes = []
        no_votes = []
        other_votes = []
        for num in range(yea_mark, end_mark):
            name = split_text[num]
            name = name.replace("\n", "")

            if name.find("(") != -1:
                if len(name.split()) == 2:
                    name = name.split()[0]
                if len(name.split()) == 3:
                    name =  name.split()[0] + " " + name.split()[1]
                
            if len(name) > 0 and name[0] == " ":
                name = name[1: len(name)]

            if len(name.split()) > 3:
                name = name.replace(" ", "")

            if self.check_name(name, nays, other) == 1:
                yes_votes.append(name)
            elif self.check_name(name, nays, other) == 2:
                no_votes.append(name)
            elif self.check_name(name, nays, other) == 3:
                other_votes.append(name)
            else:
                if name == "Nays":
                    nays = True
                if name.find("Absent") != -1:
                    nays = False
                    other = True
        yes_count = len(yes_votes)
        no_count = len(no_votes)
        other_count = len(other_votes)
        vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count)
        vote['yes_votes'] = yes_votes
        vote['no_votes'] = no_votes
        vote['other_votes'] = other_votes
        return vote
예제 #3
0
파일: bills.py 프로젝트: ecocity/openstates
    def add_senate_votes(self, vote, filename):
        xml = convert_pdf(filename, 'xml')
        doc = lxml.html.fromstring(xml)  # use lxml.html for text_content()

        # what to do with the pieces
        vfunc = None

        for textitem in doc.xpath('//text'):

            text = textitem.text_content().strip()

            if text.startswith('AYES'):
                vfunc = vote.yes
                vote['yes_count'] = int(text.split(u' \u2212 ')[1])
            elif text.startswith('NAYS'):
                vfunc = vote.no
                vote['no_count'] = int(text.split(u' \u2212 ')[1])
            elif text.startswith('NOT VOTING'):
                vfunc = vote.other
                vote['other_count'] = int(text.split(u' \u2212 ')[1])
            elif text.startswith('SEQUENCE NO'):
                vfunc = None
            elif vfunc:
                vfunc(text)
예제 #4
0
파일: bills.py 프로젝트: ecocity/openstates
    def scrape_votes(self, url, motion, date, chamber):
        vote_pdf, resp = self.urlretrieve(url)
        text = convert_pdf(vote_pdf, 'text')

        # this way we get a key error on a missing vote type
        #if motion in self._vote_mapping:
        motion, passed = self._vote_mapping[motion]
        #else:
        #    passed = True
        #   self.warning('unknown vote type: ' + motion)

        # process PDF text

        yes_votes = []
        no_votes = []
        other_votes = []

        # point at array to add names to
        cur_array = None

        precursors = (
            ('Yeas--', yes_votes),
            ('Nays--', no_votes),
            ('Absent or those not voting--', other_votes),
            ('Absent and those not voting--', other_votes),
            ('Voting Present--', other_votes),
            ('Present--', other_votes),
            ('DISCLAIMER', None),
        )

        # split lines on newline, recombine lines that don't end in punctuation
        lines = _combine_lines(text.split('\n'))

        for line in lines:

            # check if the line starts with a precursor, switch to that array
            for pc, arr in precursors:
                if pc in line:
                    cur_array = arr
                    line = line.replace(pc, '')

            # split names
            for name in line.split(','):
                name = name.strip()

                # None or a Total indicate the end of a section
                if 'None.' in name:
                    cur_array = None
                match = re.match(r'(.+?)\. Total--.*', name)
                if match:
                    cur_array.append(match.groups()[0])
                    cur_array = None

                # append name if it looks ok
                if cur_array is not None and name and 'Total--' not in name:
                    # strip trailing .
                    if name[-1] == '.':
                        name = name[:-1]
                    cur_array.append(name)

        # return vote object
        yes_count = len(yes_votes)
        no_count = len(no_votes)
        other_count = len(other_votes)
        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote['yes_votes'] = yes_votes
        vote['no_votes'] = no_votes
        vote['other_votes'] = other_votes
        return vote