def scrape(self): for page in self.iterpages(): for subject in page.xpath('//div[@class="ContainerPanel"]'): dates = subject.xpath(".//font[@color='#276598']/b/text()") motions = [x.strip() for x in subject.xpath( ".//div[@style='width:260px; float:left;']/text()")] votes = subject.xpath(".//div[@style='width:150px; float:right;']") docket = subject.xpath(".//div[@class='HeaderContent']/b/text()") docket = list(filter(lambda x: "docket" in x.lower(), docket)) docket = docket[0] if docket else None for date, motion, vote in zip(dates, motions, votes): when = dt.datetime.strptime(date, "%m/%d/%Y") motion = motion.strip() if motion == "": self.warning("Skipping vote.") continue v = Vote(session=self.session, organization="Boston City Council", type='other', passed=False, date=when.strftime("%Y-%m-%d"), motion=motion, yes_count=0, no_count=0,) if docket: v.set_bill(docket) yes, no, other = 0, 0, 0 vit = iter(vote.xpath("./div")) vote = zip(vit, vit, vit) for who, entry, _ in vote: how = entry.text who = who.text if how == 'Y': v.yes(who) yes += 1 elif how == 'N': v.no(who) no += 1 else: v.other(who) other += 1 for count in v.vote_counts: count['count'] = { "yes": yes, "no": no, "other": other }[count['vote_type']] v.add_source(DURL, note='root') yield v
def scrape(self): for page in self.iterpages(): for subject in page.xpath('//div[@class="ContainerPanel"]'): dates = subject.xpath(".//font[@color='#276598']/b/text()") motions = [x.strip() for x in subject.xpath(".//div[@style='width:260px; float:left;']/text()")] votes = subject.xpath(".//div[@style='width:150px; float:right;']") docket = subject.xpath(".//div[@class='HeaderContent']/b/text()") docket = list(filter(lambda x: "docket" in x.lower(), docket)) docket = docket[0] if docket else None for date, motion, vote in zip(dates, motions, votes): when = dt.datetime.strptime(date, "%m/%d/%Y") motion = motion.strip() if motion == "": self.warning("Skipping vote.") continue v = Vote( session=self.session, organization="Boston City Council", type="other", passed=False, date=when.strftime("%Y-%m-%d"), motion=motion, yes_count=0, no_count=0, ) if docket: v.set_bill(docket) yes, no, other = 0, 0, 0 vit = iter(vote.xpath("./div")) vote = zip(vit, vit, vit) for who, entry, _ in vote: how = entry.text who = who.text if how == "Y": v.yes(who) yes += 1 elif how == "N": v.no(who) no += 1 else: v.other(who) other += 1 for count in v.vote_counts: count["count"] = {"yes": yes, "no": no, "other": other}[count["vote_type"]] v.add_source(DURL, note="root") yield v
def test_full_vote(): j = Jurisdiction.objects.create(id='jid', division_id='did') session = j.legislative_sessions.create(name='1900', identifier='1900') person = Person.objects.create(id='person-id', name='Adam Smith') org = Organization.objects.create(id='org-id', name='House', classification='lower') bill = Bill.objects.create(id='bill-id', identifier='HB 1', legislative_session=session, from_organization=org) com = Organization.objects.create(id='com-id', name='Arbitrary Committee', parent=org) vote = ScrapeVote(legislative_session='1900', motion_text='passage', start_date='1900-04-01', classification='passage:bill', result='pass', bill_chamber='lower', bill=bill.identifier) vote.set_count('yes', 20) vote.yes('John Smith') vote.no('Adam Smith') dmi = DumbMockImporter() bi = BillImporter('jid', dmi, dmi) VoteImporter('jid', dmi, dmi, bi).import_data([vote.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session_id == session.id assert ve.motion_classification == ['passage:bill'] assert ve.bill_id == bill.id count = ve.counts.get() assert count.option == 'yes' assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == 'John Smith': assert v.option == 'yes' else: assert v.option == 'no'
def test_full_vote(): j = Jurisdiction.objects.create(id='jid', division_id='did') session = j.legislative_sessions.create(name='1900', identifier='1900') Person.objects.create(id='person-id', name='Adam Smith') org = Organization.objects.create(id='org-id', name='House', classification='lower') bill = Bill.objects.create(id='bill-id', identifier='HB 1', legislative_session=session, from_organization=org) Organization.objects.create(id='com-id', name='Arbitrary Committee', parent=org) vote = ScrapeVote(legislative_session='1900', motion_text='passage', start_date='1900-04-01', classification='passage:bill', result='pass', bill_chamber='lower', bill=bill.identifier) vote.set_count('yes', 20) vote.yes('John Smith') vote.no('Adam Smith') dmi = DumbMockImporter() bi = BillImporter('jid', dmi, dmi) VoteImporter('jid', dmi, dmi, bi).import_data([vote.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session_id == session.id assert ve.motion_classification == ['passage:bill'] assert ve.bill_id == bill.id count = ve.counts.get() assert count.option == 'yes' assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == 'John Smith': assert v.option == 'yes' else: assert v.option == 'no'
def get_bills(self): bills = [ { "name": "HB500", "title": "Makes various changes to provisions governing employment practices", "session": "2011", "versions": ["http://example.com/HB500.pdf"], "actions": [ { "description": "Introduced", "actor": "Committee on Pudding Pops", "date": "2014-04-15", }, { "date": "2014-04-15", "description": "Read first time. Referred to Committee on Commerce and Labor. To printer.", "actor": "Test City Council" }, { "date": "2014-04-15", "description": "From printer. To committee.", "actor": "Test City Council" }, { "date": "2014-04-15", "description": "From committee: Do pass.", "actor": "Rules" }, { "description": "Signed into law", "actor": "Fiscal Committee", "date": "2014-04-19", }, ], "sponsors_people": [], "sponsors_committee": [], "votes": [ { "motion": "Vote by the Committee on the Whole.", "yes_count": 1, "other_count": 1, "no_count": 3, "passed": True, "type": "passage:bill", "date": "2014-04-15", "session": "2011", "roll": { "yes": [ "Eliana Meyer", ], "no": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "other": [ "Unknown Person", ], } }, ] }, { "name": "HB101", "title": "Joint county ditch proceedings-conduct by teleconference or video conference", "session": "2011", "versions": ["http://example.com/HB101.pdf"], "actions": [ { "description": "Introduced", "actor": "council", "date": "2014-04-15", }, { "description": "Referred to the Committee on Pudding Pops", "actor": "council", "date": "2014-04-16", }, { "description": "Reported favorably", "actor": "council", "date": "2014-04-16", }, { "description": "Referred to the Bills in the Third Read", "actor": "council", "date": "2014-04-17", }, { "description": "Vote by the Committee on the Whole. Do pass.", "actor": "council", "date": "2014-04-18", }, { "description": "Signed into law", "actor": "council", "date": "2014-04-19", }, ], "sponsors_people": [ "Shayla Fritz", "Gunnar Luna", ], "sponsors_committee": [ "Standing Committee on Public Safety", ], "votes": [ { "motion": "Vote by the Committee on the Whole.", "yes_count": 3, "no_count": 1, "passed": True, "type": "passage:bill", "date": "2014-04-18", "session": "2011", "roll": { "yes": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "no": [ "Eliana Meyer", ], "other": [], } }, ] }, ] for bill in bills: b = Bill(identifier=bill['name'], title=bill['title'], legislative_session=bill['session']) b.add_source("ftp://example.com/some/bill") for vote in bill['votes']: v = Vote( motion_text=vote['motion'], organization_id=make_psuedo_id( name="Test City Council", classification="legislature"), yes_count=vote['yes_count'], no_count=vote['no_count'], result='pass' if vote['passed'] else 'fail', classification=vote['type'], start_date=vote['date'], legislative_session=vote['session'], ) v.add_source("http://example.com/votes/vote.xls") for yv in vote['roll']['yes']: v.yes(yv) for nv in vote['roll']['no']: v.no(nv) yield v for sponsor in bill['sponsors_people']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='person', primary=True) for sponsor in bill['sponsors_committee']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='organization', primary=True) for version in bill['versions']: b.add_version_link(note="Bill Version", url=version) for action in bill['actions']: action['organization'] = make_psuedo_id( name=action.pop('actor')) b.add_action(**action) yield b
def get_bills(self): bills = [ {"name": "HB500", "title": "Makes various changes to provisions governing employment practices", "session": "2011", "versions": ["http://example.com/HB500.pdf"], "actions": [ {"description": "Introduced", "actor": "Committee on Pudding Pops", "date": "2014-04-15",}, {"date": "2014-04-15", "description": "Read first time. Referred to Committee on Commerce and Labor. To printer.", "actor": "Test City Council" }, {"date": "2014-04-15", "description": "From printer. To committee.", "actor": "Test City Council"}, {"date": "2014-04-15", "description": "From committee: Do pass.", "actor": "Rules"}, {"description": "Signed into law", "actor": "Fiscal Committee", "date": "2014-04-19",}, ], "sponsors_people": [ ], "sponsors_committee": [ ], "votes": [ {"motion": "Vote by the Committee on the Whole.", "yes_count": 1, "other_count": 1, "no_count": 3, "passed": True, "type": "passage:bill", "date": "2014-04-15", "session": "2011", "roll": { "yes": [ "Eliana Meyer", ], "no": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "other": [ "Unknown Person", ], } }, ]}, {"name": "HB101", "title": "Joint county ditch proceedings-conduct by teleconference or video conference", "session": "2011", "versions": ["http://example.com/HB101.pdf"], "actions": [ {"description": "Introduced", "actor": "council", "date": "2014-04-15",}, {"description": "Referred to the Committee on Pudding Pops", "actor": "council", "date": "2014-04-16",}, {"description": "Reported favorably", "actor": "council", "date": "2014-04-16",}, {"description": "Referred to the Bills in the Third Read", "actor": "council", "date": "2014-04-17",}, {"description": "Vote by the Committee on the Whole. Do pass.", "actor": "council", "date": "2014-04-18",}, {"description": "Signed into law", "actor": "council", "date": "2014-04-19",}, ], "sponsors_people": [ "Shayla Fritz", "Gunnar Luna", ], "sponsors_committee": [ "Standing Committee on Public Safety", ], "votes": [ {"motion": "Vote by the Committee on the Whole.", "yes_count": 3, "no_count": 1, "passed": True, "type": "passage:bill", "date": "2014-04-18", "session": "2011", "roll": { "yes": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "no": [ "Eliana Meyer", ], "other": [ ], } }, ]}, ] for bill in bills: b = Bill(identifier=bill['name'], title=bill['title'], legislative_session=bill['session']) b.add_source("ftp://example.com/some/bill") for vote in bill['votes']: v = Vote(motion_text=vote['motion'], organization_id=make_psuedo_id( name="Test City Council", classification="legislature" ), yes_count=vote['yes_count'], no_count=vote['no_count'], result='pass' if vote['passed'] else 'fail', classification=vote['type'], start_date=vote['date'], legislative_session=vote['session'], ) v.add_source("http://example.com/votes/vote.xls") for yv in vote['roll']['yes']: v.yes(yv) for nv in vote['roll']['no']: v.no(nv) yield v for sponsor in bill['sponsors_people']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='person', primary=True) for sponsor in bill['sponsors_committee']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='organization', primary=True) for version in bill['versions']: b.add_version_link(note="Bill Version", url=version) for action in bill['actions']: action['organization'] = make_psuedo_id(name=action.pop( 'actor' )) b.add_action(**action) yield b