def toy_vote(): v = Vote(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage') v.add_source("http://uri.example.com/", note="foo") return v
def scrape(self): for page in self.iterpages(): for subject in page.xpath('//div[@class="ContainerPanel"]'): dates = subject.xpath(".//font[@color='#276598']/b/text()") motions = [x.strip() for x in subject.xpath( ".//div[@style='width:260px; float:left;']/text()")] votes = subject.xpath(".//div[@style='width:150px; float:right;']") docket = subject.xpath(".//div[@class='HeaderContent']/b/text()") docket = list(filter(lambda x: "docket" in x.lower(), docket)) docket = docket[0] if docket else None for date, motion, vote in zip(dates, motions, votes): when = dt.datetime.strptime(date, "%m/%d/%Y") motion = motion.strip() if motion == "": self.warning("Skipping vote.") continue v = Vote(session=self.session, organization="Boston City Council", type='other', passed=False, date=when.strftime("%Y-%m-%d"), motion=motion, yes_count=0, no_count=0,) if docket: v.set_bill(docket) yes, no, other = 0, 0, 0 vit = iter(vote.xpath("./div")) vote = zip(vit, vit, vit) for who, entry, _ in vote: how = entry.text who = who.text if how == 'Y': v.yes(who) yes += 1 elif how == 'N': v.no(who) no += 1 else: v.other(who) other += 1 for count in v.vote_counts: count['count'] = { "yes": yes, "no": no, "other": other }[count['vote_type']] v.add_source(DURL, note='root') yield v
def scrape(self): for page in self.iterpages(): for subject in page.xpath('//div[@class="ContainerPanel"]'): dates = subject.xpath(".//font[@color='#276598']/b/text()") motions = [x.strip() for x in subject.xpath(".//div[@style='width:260px; float:left;']/text()")] votes = subject.xpath(".//div[@style='width:150px; float:right;']") docket = subject.xpath(".//div[@class='HeaderContent']/b/text()") docket = list(filter(lambda x: "docket" in x.lower(), docket)) docket = docket[0] if docket else None for date, motion, vote in zip(dates, motions, votes): when = dt.datetime.strptime(date, "%m/%d/%Y") motion = motion.strip() if motion == "": self.warning("Skipping vote.") continue v = Vote( session=self.session, organization="Boston City Council", type="other", passed=False, date=when.strftime("%Y-%m-%d"), motion=motion, yes_count=0, no_count=0, ) if docket: v.set_bill(docket) yes, no, other = 0, 0, 0 vit = iter(vote.xpath("./div")) vote = zip(vit, vit, vit) for who, entry, _ in vote: how = entry.text who = who.text if how == "Y": v.yes(who) yes += 1 elif how == "N": v.no(who) no += 1 else: v.other(who) other += 1 for count in v.vote_counts: count["count"] = {"yes": yes, "no": no, "other": other}[count["vote_type"]] v.add_source(DURL, note="root") yield v
def addBillHistory(self, bill, history_table) : all_votes = [] history = self.parseDataTable(history_table) for action, _, _ in history : action_description = action['Action'] try : action_date = action['Date'].date().isoformat() except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492 continue if action_description : bill.add_action(action_description, action_date, organization=action['Action\xa0By'], classification=ACTION_CLASSIFICATION[action_description]) if 'url' in action['Action\xa0Details'] : action_detail_url = action['Action\xa0Details']['url'] result, votes = self.extractVotes(action_detail_url) if votes and result : # see https://github.com/datamade/municipal-scrapers-us/issues/15 action_vote = Vote(legislative_session=bill.legislative_session, motion_text=action_description, classification=None, start_date=action_date, result=result, bill=bill.identifier) action_vote.add_source(action_detail_url) for option, voter in votes : action_vote.vote(option, voter) all_votes.append(action_vote) return all_votes
def toy_vote(): v = Vote(session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='passage:bill') v.add_source("http://uri.example.com/", note="foo") return v
def get_bills(self): bills = [ { "name": "HB500", "title": "Makes various changes to provisions governing employment practices", "session": "2011", "versions": ["http://example.com/HB500.pdf"], "actions": [ { "description": "Introduced", "actor": "Committee on Pudding Pops", "date": "2014-04-15", }, { "date": "2014-04-15", "description": "Read first time. Referred to Committee on Commerce and Labor. To printer.", "actor": "Test City Council" }, { "date": "2014-04-15", "description": "From printer. To committee.", "actor": "Test City Council" }, { "date": "2014-04-15", "description": "From committee: Do pass.", "actor": "Rules" }, { "description": "Signed into law", "actor": "Fiscal Committee", "date": "2014-04-19", }, ], "sponsors_people": [], "sponsors_committee": [], "votes": [ { "motion": "Vote by the Committee on the Whole.", "yes_count": 1, "other_count": 1, "no_count": 3, "passed": True, "type": "passage:bill", "date": "2014-04-15", "session": "2011", "roll": { "yes": [ "Eliana Meyer", ], "no": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "other": [ "Unknown Person", ], } }, ] }, { "name": "HB101", "title": "Joint county ditch proceedings-conduct by teleconference or video conference", "session": "2011", "versions": ["http://example.com/HB101.pdf"], "actions": [ { "description": "Introduced", "actor": "council", "date": "2014-04-15", }, { "description": "Referred to the Committee on Pudding Pops", "actor": "council", "date": "2014-04-16", }, { "description": "Reported favorably", "actor": "council", "date": "2014-04-16", }, { "description": "Referred to the Bills in the Third Read", "actor": "council", "date": "2014-04-17", }, { "description": "Vote by the Committee on the Whole. Do pass.", "actor": "council", "date": "2014-04-18", }, { "description": "Signed into law", "actor": "council", "date": "2014-04-19", }, ], "sponsors_people": [ "Shayla Fritz", "Gunnar Luna", ], "sponsors_committee": [ "Standing Committee on Public Safety", ], "votes": [ { "motion": "Vote by the Committee on the Whole.", "yes_count": 3, "no_count": 1, "passed": True, "type": "passage:bill", "date": "2014-04-18", "session": "2011", "roll": { "yes": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "no": [ "Eliana Meyer", ], "other": [], } }, ] }, ] for bill in bills: b = Bill(identifier=bill['name'], title=bill['title'], legislative_session=bill['session']) b.add_source("ftp://example.com/some/bill") for vote in bill['votes']: v = Vote( motion_text=vote['motion'], organization_id=make_psuedo_id( name="Test City Council", classification="legislature"), yes_count=vote['yes_count'], no_count=vote['no_count'], result='pass' if vote['passed'] else 'fail', classification=vote['type'], start_date=vote['date'], legislative_session=vote['session'], ) v.add_source("http://example.com/votes/vote.xls") for yv in vote['roll']['yes']: v.yes(yv) for nv in vote['roll']['no']: v.no(nv) yield v for sponsor in bill['sponsors_people']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='person', primary=True) for sponsor in bill['sponsors_committee']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='organization', primary=True) for version in bill['versions']: b.add_version_link(note="Bill Version", url=version) for action in bill['actions']: action['organization'] = make_psuedo_id( name=action.pop('actor')) b.add_action(**action) yield b
def scrape_bill(self, bill_id): old = self.api('bills/' + bill_id + '?') # not needed old.pop('id') old.pop('state') old.pop('level', None) old.pop('country', None) old.pop('created_at') old.pop('updated_at') old.pop('action_dates') old.pop('+subject', None) old.pop('+scraped_subjects', None) old.pop('subjects', []) classification = old.pop('type') # ca weirdness if 'fiscal committee' in classification: classification.remove('fiscal committee') if 'urgency' in classification: classification.remove('urgency') if 'local program' in classification: classification.remove('local program') if 'tax levy' in classification: classification.remove('tax levy') if classification[0] in ['miscellaneous', 'jres', 'cres']: return if classification == ['memorial resolution'] and self.state == 'ar': classification = ['memorial'] if classification == ['concurrent memorial resolution'] and self.state == 'ar': classification = ['concurrent memorial'] if classification == ['joint session resolution'] and self.state == 'il': classification = ['joint resolution'] if classification == ['legislative resolution'] and self.state == 'ny': classification = ['resolution'] if not old['title'] and self.state == 'me': old['title'] = '(unknown)' chamber = old.pop('chamber') if chamber == 'upper' and self.state in ('ne', 'dc'): chamber = 'legislature' elif chamber in ('joint', 'conference'): chamber = 'legislature' new = Bill(old.pop('bill_id'), old.pop('session'), old.pop('title'), chamber=chamber, classification=classification) abstract = old.pop('summary', None) if abstract: new.add_abstract(abstract, note='') for title in old.pop('alternate_titles'): new.add_title(title) for doc in old.pop('documents'): new.add_document_link(doc['name'], doc['url'], on_duplicate='ignore') for doc in old.pop('versions'): new.add_version_link(doc['name'], doc['url'], media_type=doc.pop('mimetype', '')) for subj in old.pop('scraped_subjects', []): if subj: new.add_subject(subj) for spon in old.pop('sponsors'): if spon.get('committee_id') is not None: entity_type = 'organization' elif spon.get('leg_id') is not None: entity_type = 'person' else: entity_type = '' new.add_sponsorship(spon['name'], spon['type'], entity_type, spon['type'] == 'primary') for act in old.pop('actions'): actor = act['actor'] if actor.lower() in ('governor', 'mayor', 'secretary of state'): actor = 'executive' elif actor.lower() == 'house' or (actor.lower().startswith('lower (') and self.state == 'ca'): actor = 'lower' elif actor.lower() in ('senate', 'upper`') or (actor.lower().startswith('upper (') and self.state == 'ca'): actor = 'upper' elif actor in ('joint', 'other', 'Data Systems', 'Speaker', 'clerk', 'Office of the Legislative Fiscal Analyst', 'Became Law w', 'conference') or (actor.lower().startswith('legislature (') and self.state == 'ca'): actor = 'legislature' if actor in ('committee', 'sponsor') and self.state == 'pr': actor = 'legislature' # nebraska & DC if actor == 'upper' and self.state in ('ne', 'dc'): actor = 'legislature' if act['action']: newact = new.add_action(act['action'], act['date'][:10], chamber=actor, classification=[action_types[c] for c in act['type'] if c != 'other']) for re in act.get('related_entities', []): if re['type'] == 'committee': re['type'] = 'organization' elif re['type'] == 'legislator': re['type'] = 'person' newact.add_related_entity(re['name'], re['type']) for comp in old.pop('companions', []): if self.state in ('nj', 'ny', 'mn'): rtype = 'companion' new.add_related_bill(comp['bill_id'], comp['session'], rtype) for abid in old.pop('alternate_bill_ids', []) + old.pop('+alternate_bill_ids', []): new.add_identifier(abid) # generic OpenStates stuff for id in old.pop('all_ids'): new.add_identifier(id, scheme='openstates') for source in old.pop('sources'): source.pop('retrieved', None) new.add_source(**source) ext_title = old.pop('+extended_title', None) if ext_title: new.add_title(ext_title, note='Extended Title') official_title = old.pop('+official_title', None) if official_title: new.add_title(official_title, note='Official Title') to_extras = ['+status', '+final_disposition', '+volume_chapter', '+ld_number', '+referral', '+companion', '+description', '+fiscal_note_probable:', '+preintroduction_required:', '+drafter', '+category:', '+chapter', '+requester', '+transmittal_date:', '+by_request_of', '+bill_draft_number:', '+bill_lr', '+bill_url', '+rcs_num', '+fiscal_note', '+impact_clause', '+fiscal_notes', '+short_title', '+type_', '+conference_committee', 'conference_committee', '+companion_bill_ids'] for k in to_extras: v = old.pop(k, None) if v: new.extras[k.replace('+', '')] = v # votes vote_no = 1 for vote in old.pop('votes'): vote.pop('id') vote.pop('state') vote.pop('bill_id') vote.pop('bill_chamber', None) vote.pop('+state', None) vote.pop('+country', None) vote.pop('+level', None) vote.pop('+vacant', None) vote.pop('+not_voting', None) vote.pop('+amended', None) vote.pop('+excused', None) vote.pop('+NV', None) vote.pop('+AB', None) vote.pop('+P', None) vote.pop('+V', None) vote.pop('+E', None) vote.pop('+EXC', None) vote.pop('+EMER', None) vote.pop('+present', None) vote.pop('+absent', None) vote.pop('+seconded', None) vote.pop('+moved', None) vote.pop('+vote_type', None) vote.pop('+actual_vote', None) vote.pop('+skip_votes', None) vote.pop('vote_id') vote.pop('+bill_chamber', None) vote.pop('+session', None) vote.pop('+bill_id', None) vote.pop('+bill_session', None) vote.pop('committee', None) vote.pop('committee_id', None) vtype = vote.pop('type', 'passage') if vtype == 'veto_override': vtype = ['veto-override'] elif vtype == 'amendment': vtype = ['amendment-passage'] elif vtype == 'other': vtype = '' else: vtype = ['bill-passage'] # most states need identifiers for uniqueness, just do it everywhere identifier = vote['date'] + '-' + str(vote_no) vote_no += 1 chamber = vote.pop('chamber') if chamber == 'upper' and self.state in ('ne', 'dc'): chamber = 'legislature' elif chamber == 'joint': chamber = 'legislature' newvote = Vote(legislative_session=vote.pop('session'), motion_text=vote.pop('motion'), result='pass' if vote.pop('passed') else 'fail', chamber=chamber, start_date=vote.pop('date'), classification=vtype, bill=new, identifier=identifier) for vt in ('yes', 'no', 'other'): newvote.set_count(vt, vote.pop(vt + '_count')) for name in vote.pop(vt + '_votes'): newvote.vote(vt, name['name']) for source in vote.pop('sources'): source.pop('retrieved', None) newvote.add_source(**source) if not newvote.sources: newvote.sources = new.sources to_extras = ['+record', '+method', 'method', '+filename', 'record', '+action', '+location', '+rcs_num', '+type_', '+threshold', '+other_vote_detail', '+voice_vote'] for k in to_extras: v = vote.pop(k, None) if v: newvote.extras[k.replace('+', '')] = v assert not vote, vote.keys() yield newvote assert not old, old.keys() yield new
def get_bills(self): bills = [ {"name": "HB500", "title": "Makes various changes to provisions governing employment practices", "session": "2011", "versions": ["http://example.com/HB500.pdf"], "actions": [ {"description": "Introduced", "actor": "Committee on Pudding Pops", "date": "2014-04-15",}, {"date": "2014-04-15", "description": "Read first time. Referred to Committee on Commerce and Labor. To printer.", "actor": "Test City Council" }, {"date": "2014-04-15", "description": "From printer. To committee.", "actor": "Test City Council"}, {"date": "2014-04-15", "description": "From committee: Do pass.", "actor": "Rules"}, {"description": "Signed into law", "actor": "Fiscal Committee", "date": "2014-04-19",}, ], "sponsors_people": [ ], "sponsors_committee": [ ], "votes": [ {"motion": "Vote by the Committee on the Whole.", "yes_count": 1, "other_count": 1, "no_count": 3, "passed": True, "type": "passage:bill", "date": "2014-04-15", "session": "2011", "roll": { "yes": [ "Eliana Meyer", ], "no": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "other": [ "Unknown Person", ], } }, ]}, {"name": "HB101", "title": "Joint county ditch proceedings-conduct by teleconference or video conference", "session": "2011", "versions": ["http://example.com/HB101.pdf"], "actions": [ {"description": "Introduced", "actor": "council", "date": "2014-04-15",}, {"description": "Referred to the Committee on Pudding Pops", "actor": "council", "date": "2014-04-16",}, {"description": "Reported favorably", "actor": "council", "date": "2014-04-16",}, {"description": "Referred to the Bills in the Third Read", "actor": "council", "date": "2014-04-17",}, {"description": "Vote by the Committee on the Whole. Do pass.", "actor": "council", "date": "2014-04-18",}, {"description": "Signed into law", "actor": "council", "date": "2014-04-19",}, ], "sponsors_people": [ "Shayla Fritz", "Gunnar Luna", ], "sponsors_committee": [ "Standing Committee on Public Safety", ], "votes": [ {"motion": "Vote by the Committee on the Whole.", "yes_count": 3, "no_count": 1, "passed": True, "type": "passage:bill", "date": "2014-04-18", "session": "2011", "roll": { "yes": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", ], "no": [ "Eliana Meyer", ], "other": [ ], } }, ]}, ] for bill in bills: b = Bill(identifier=bill['name'], title=bill['title'], legislative_session=bill['session']) b.add_source("ftp://example.com/some/bill") for vote in bill['votes']: v = Vote(motion_text=vote['motion'], organization_id=make_psuedo_id( name="Test City Council", classification="legislature" ), yes_count=vote['yes_count'], no_count=vote['no_count'], result='pass' if vote['passed'] else 'fail', classification=vote['type'], start_date=vote['date'], legislative_session=vote['session'], ) v.add_source("http://example.com/votes/vote.xls") for yv in vote['roll']['yes']: v.yes(yv) for nv in vote['roll']['no']: v.no(nv) yield v for sponsor in bill['sponsors_people']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='person', primary=True) for sponsor in bill['sponsors_committee']: b.add_sponsorship(name=sponsor, classification='primary', entity_type='organization', primary=True) for version in bill['versions']: b.add_version_link(note="Bill Version", url=version) for action in bill['actions']: action['organization'] = make_psuedo_id(name=action.pop( 'actor' )) b.add_action(**action) yield b