def test_fix_bill_id(): expect = 'AB 74' bill_ids = ['A.B. 74', 'A.B.74', 'AB74', 'AB 0074', 'AB074', 'A.B.074', 'A.B. 074', 'A.B\t074'] for bill_id in bill_ids: assert bills.fix_bill_id(bill_id) == expect assert bills.fix_bill_id('PR19-0041') == 'PR 19-0041'
def test_fix_bill_id(): expect = 'AB 74' bill_ids = [ 'A.B. 74', 'A.B.74', 'AB74', 'AB 0074', 'AB074', 'A.B.074', 'A.B. 074', 'A.B\t074' ] for bill_id in bill_ids: assert bills.fix_bill_id(bill_id) == expect assert bills.fix_bill_id('PR19-0041') == 'PR 19-0041'
def scrape_senate_vote(self, bill, url): (path, resp) = self.urlretrieve(url) text = convert_pdf(path, "text") os.remove(path) lines = text.split("\n") date_match = re.search(r"Date:\s+(\d+/\d+/\d+)", text) if not date_match: self.log("Couldn't find date on %s" % url) return time_match = re.search(r"Time:\s+(\d+:\d+:\d+)\s+(AM|PM)", text) date = "%s %s %s" % (date_match.group(1), time_match.group(1), time_match.group(2)) date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p") date = self._tz.localize(date) vote_type = None yes_count, no_count, other_count = None, None, 0 votes = [] for line in lines[21:]: line = line.strip() if not line: continue if line.startswith("YEAS"): yes_count = int(line.split(" - ")[1]) vote_type = "yes" elif line.startswith("NAYS"): no_count = int(line.split(" - ")[1]) vote_type = "no" elif line.startswith("EXCUSED") or line.startswith("NOT VOTING"): other_count += int(line.split(" - ")[1]) vote_type = "other" else: votes.extend([(n.strip(), vote_type) for n in re.split(r"\s{2,}", line)]) if yes_count is None or no_count is None: self.log("Couldne't find vote counts in %s" % url) return passed = yes_count > no_count + other_count clean_bill_id = fix_bill_id(bill["bill_id"]) motion_line = None for i, line in enumerate(lines): if line.strip() == clean_bill_id: motion_line = i + 2 motion = lines[motion_line] if not motion: self.log("Couldn't find motion for %s" % url) return vote = Vote("upper", date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) insert_specific_votes(vote, votes) check_vote_counts(vote) bill.add_vote(vote)
def scrape_senate_vote(self, bill, url): (path, resp) = self.urlretrieve(url) text = convert_pdf(path, 'text') os.remove(path) lines = text.split('\n') date_match = re.search(r'Date:\s+(\d+/\d+/\d+)', text) if not date_match: self.log("Couldn't find date on %s" % url) return time_match = re.search(r'Time:\s+(\d+:\d+:\d+)\s+(AM|PM)', text) date = "%s %s %s" % (date_match.group(1), time_match.group(1), time_match.group(2)) date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p") date = self._tz.localize(date) vote_type = None yes_count, no_count, other_count = None, None, 0 votes = [] for line in lines[21:]: line = line.strip() if not line: continue if line.startswith('YEAS'): yes_count = int(line.split(' - ')[1]) vote_type = 'yes' elif line.startswith('NAYS'): no_count = int(line.split(' - ')[1]) vote_type = 'no' elif line.startswith('EXCUSED') or line.startswith('NOT VOTING'): other_count += int(line.split(' - ')[1]) vote_type = 'other' else: votes.extend([(n.strip(), vote_type) for n in re.split(r'\s{2,}', line)]) if yes_count is None or no_count is None: self.log("Couldne't find vote counts in %s" % url) return passed = yes_count > no_count + other_count clean_bill_id = fix_bill_id(bill['bill_id']) motion_line = None for i, line in enumerate(lines): if line.strip() == clean_bill_id: motion_line = i + 2 motion = lines[motion_line] if not motion: self.log("Couldn't find motion for %s" % url) return vote = Vote('upper', date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) insert_specific_votes(vote, votes) check_vote_counts(vote) bill.add_vote(vote)
def scrape_senate_vote(self, bill, url): try: (path, resp) = self.urlretrieve(url) except: return text = convert_pdf(path, 'text') os.remove(path) lines = text.split('\n') date_match = re.search(r'Date:\s+(\d+/\d+/\d+)', text) if not date_match: self.log("Couldn't find date on %s" % url) return time_match = re.search(r'Time:\s+(\d+:\d+:\d+)\s+(AM|PM)', text) date = "%s %s %s" % (date_match.group(1), time_match.group(1), time_match.group(2)) date = datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p") date = self._tz.localize(date) vote_type = None yes_count, no_count, other_count = None, None, 0 votes = [] for line in lines[21:]: line = line.strip() if not line: continue if line.startswith('YEAS'): yes_count = int(line.split(' - ')[1]) vote_type = 'yes' elif line.startswith('NAYS'): no_count = int(line.split(' - ')[1]) vote_type = 'no' elif line.startswith('EXCUSED') or line.startswith('NOT VOTING'): other_count += int(line.split(' - ')[1]) vote_type = 'other' else: votes.extend([(n.strip(), vote_type) for n in re.split(r'\s{2,}', line)]) if yes_count is None or no_count is None: self.log("Couldne't find vote counts in %s" % url) return passed = yes_count > no_count + other_count clean_bill_id = fix_bill_id(bill['bill_id']) motion_line = None for i, line in enumerate(lines): if line.strip() == clean_bill_id: motion_line = i + 2 motion = lines[motion_line] if not motion: self.log("Couldn't find motion for %s" % url) return vote = Vote('upper', date, motion, passed, yes_count, no_count, other_count) vote.add_source(url) insert_specific_votes(vote, votes) check_vote_counts(vote) bill.add_vote(vote)