Esempio n. 1
0
def test_fix_bill_id():
    expect = 'AB 74'
    bill_ids = ['A.B. 74', 'A.B.74', 'AB74', 'AB 0074',
                'AB074', 'A.B.074', 'A.B. 074', 'A.B\t074']

    for bill_id in bill_ids:
        assert bills.fix_bill_id(bill_id) == expect

    assert bills.fix_bill_id('PR19-0041') == 'PR 19-0041'
Esempio n. 2
0
def test_fix_bill_id():
    expect = 'AB 74'
    bill_ids = [
        'A.B. 74', 'A.B.74', 'AB74', 'AB 0074', 'AB074', 'A.B.074', 'A.B. 074',
        'A.B\t074'
    ]

    for bill_id in bill_ids:
        assert bills.fix_bill_id(bill_id) == expect

    assert bills.fix_bill_id('PR19-0041') == 'PR 19-0041'
Esempio n. 3
0
    def scrape_senate_vote(self, bill, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, "text")
        os.remove(path)

        lines = text.split("\n")

        date_match = re.search(r"Date:\s+(\d+/\d+/\d+)", text)
        if not date_match:
            self.log("Couldn't find date on %s" % url)
            return

        time_match = re.search(r"Time:\s+(\d+:\d+:\d+)\s+(AM|PM)", text)
        date = "%s %s %s" % (date_match.group(1), time_match.group(1), time_match.group(2))
        date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p")
        date = self._tz.localize(date)

        vote_type = None
        yes_count, no_count, other_count = None, None, 0
        votes = []
        for line in lines[21:]:
            line = line.strip()
            if not line:
                continue

            if line.startswith("YEAS"):
                yes_count = int(line.split(" - ")[1])
                vote_type = "yes"
            elif line.startswith("NAYS"):
                no_count = int(line.split(" - ")[1])
                vote_type = "no"
            elif line.startswith("EXCUSED") or line.startswith("NOT VOTING"):
                other_count += int(line.split(" - ")[1])
                vote_type = "other"
            else:
                votes.extend([(n.strip(), vote_type) for n in re.split(r"\s{2,}", line)])

        if yes_count is None or no_count is None:
            self.log("Couldne't find vote counts in %s" % url)
            return

        passed = yes_count > no_count + other_count

        clean_bill_id = fix_bill_id(bill["bill_id"])
        motion_line = None
        for i, line in enumerate(lines):
            if line.strip() == clean_bill_id:
                motion_line = i + 2
        motion = lines[motion_line]
        if not motion:
            self.log("Couldn't find motion for %s" % url)
            return

        vote = Vote("upper", date, motion, passed, yes_count, no_count, other_count)
        vote.add_source(url)

        insert_specific_votes(vote, votes)
        check_vote_counts(vote)

        bill.add_vote(vote)
Esempio n. 4
0
    def scrape_senate_vote(self, bill, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, 'text')
        os.remove(path)

        lines = text.split('\n')

        date_match = re.search(r'Date:\s+(\d+/\d+/\d+)', text)
        if not date_match:
            self.log("Couldn't find date on %s" % url)
            return

        time_match = re.search(r'Time:\s+(\d+:\d+:\d+)\s+(AM|PM)', text)
        date = "%s %s %s" % (date_match.group(1), time_match.group(1),
                             time_match.group(2))
        date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p")
        date = self._tz.localize(date)

        vote_type = None
        yes_count, no_count, other_count = None, None, 0
        votes = []
        for line in lines[21:]:
            line = line.strip()
            if not line:
                continue

            if line.startswith('YEAS'):
                yes_count = int(line.split(' - ')[1])
                vote_type = 'yes'
            elif line.startswith('NAYS'):
                no_count = int(line.split(' - ')[1])
                vote_type = 'no'
            elif line.startswith('EXCUSED') or line.startswith('NOT VOTING'):
                other_count += int(line.split(' - ')[1])
                vote_type = 'other'
            else:
                votes.extend([(n.strip(), vote_type)
                              for n in re.split(r'\s{2,}', line)])

        if yes_count is None or no_count is None:
            self.log("Couldne't find vote counts in %s" % url)
            return

        passed = yes_count > no_count + other_count

        clean_bill_id = fix_bill_id(bill['bill_id'])
        motion_line = None
        for i, line in enumerate(lines):
            if line.strip() == clean_bill_id:
                motion_line = i + 2
        motion = lines[motion_line]
        if not motion:
            self.log("Couldn't find motion for %s" % url)
            return

        vote = Vote('upper', date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        insert_specific_votes(vote, votes)
        check_vote_counts(vote)

        bill.add_vote(vote)
Esempio n. 5
0
    def scrape_senate_vote(self, bill, url):
        try:
            (path, resp) = self.urlretrieve(url)
        except:
            return
        text = convert_pdf(path, 'text')
        os.remove(path)

        lines = text.split('\n')

        date_match = re.search(r'Date:\s+(\d+/\d+/\d+)', text)
        if not date_match:
            self.log("Couldn't find date on %s" % url)
            return

        time_match = re.search(r'Time:\s+(\d+:\d+:\d+)\s+(AM|PM)', text)
        date = "%s %s %s" % (date_match.group(1), time_match.group(1),
                             time_match.group(2))
        date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p")
        date = self._tz.localize(date)

        vote_type = None
        yes_count, no_count, other_count = None, None, 0
        votes = []
        for line in lines[21:]:
            line = line.strip()
            if not line:
                continue

            if line.startswith('YEAS'):
                yes_count = int(line.split(' - ')[1])
                vote_type = 'yes'
            elif line.startswith('NAYS'):
                no_count = int(line.split(' - ')[1])
                vote_type = 'no'
            elif line.startswith('EXCUSED') or line.startswith('NOT VOTING'):
                other_count += int(line.split(' - ')[1])
                vote_type = 'other'
            else:
                votes.extend([(n.strip(), vote_type)
                              for n in re.split(r'\s{2,}', line)])

        if yes_count is None or no_count is None:
            self.log("Couldne't find vote counts in %s" % url)
            return

        passed = yes_count > no_count + other_count

        clean_bill_id = fix_bill_id(bill['bill_id'])
        motion_line = None
        for i, line in enumerate(lines):
            if line.strip() == clean_bill_id:
                motion_line = i + 2
        motion = lines[motion_line]
        if not motion:
            self.log("Couldn't find motion for %s" % url)
            return

        vote = Vote('upper', date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        insert_specific_votes(vote, votes)
        check_vote_counts(vote)

        bill.add_vote(vote)