def bos_scrape_committees(self): page = self.lxmlize(COMMITTEE_LIST) committees = page.xpath("//a[contains(@href, 'committee') and contains(@href, 'asp')]") for c in committees: if c.text is None: continue name = clean_name(c.text) homepage = c.attrib["href"] info = self.scrape_committee_page(homepage) committee = Committee(name, classification="committee") committee.add_source(COMMITTEE_LIST) committee.add_source(homepage) for member in info["members"]: member = clean_name(member) committee.add_member(member, role="member") chair = info.get("chair", None) if chair: chair = chair[0] chair = clean_name(chair) committee.add_member(chair, role="chair") vchair = info.get("vice-chair", None) if vchair: vchair = vchair[0] vchair = clean_name(vchair) committee.add_member(vchair, role="vice-chair") email = info.get("email", None) if email: email = email[0] committee.add_contact_detail(type="email", value=email, note="committee email") yield committee
def get_committees(self): committees = [ {"name": "Ways and Means", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", ]}, {"name": "Committee on Pudding Pops", "members": [ "Kohen Dudley", "Cayden Norman", "Shayla Fritz", "Gunnar Luna", ]}, {"name": "Fiscal Committee", "members": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", "Eliana Meyer", "Taylor Parrish", "Callie Craig", ]}, {"name": "Bills in the Third Read", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", ]}, {"name": "Rules", "members": [ "Shayla Fritz", "Gunnar Luna", "Regina Cruz", ]}, {"name": "Standing Committee on Public Safety", "members": [ "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", "Estrella Hahn", "Teagan Rojas", "Barrett Adams", "Kayla Shelton", ]}, ] for committee in committees: c = Committee(name=committee['name']) c.add_source("http://example.com") c.add_contact_detail(type='email', value="*****@*****.**", note='committee email') members = iter(committee['members']) chair = next(members) c.add_member(name_or_person=chair, role='chair') for member in members: c.add_member(name_or_person=member) yield c
def bos_scrape_committees(self): page = self.lxmlize(COMMITTEE_LIST) committees = page.xpath( "//a[contains(@href, 'committee') and contains(@href, 'asp')]") for c in committees: if c.text is None: continue name = clean_name(c.text) homepage = c.attrib['href'] info = self.scrape_committee_page(homepage) committee = Committee(name, classification='committee') committee.add_source(COMMITTEE_LIST) committee.add_source(homepage) for member in info['members']: member = clean_name(member) committee.add_member(member, role='member') chair = info.get('chair', None) if chair: chair = chair[0] chair = clean_name(chair) committee.add_member(chair, role='chair') vchair = info.get('vice-chair', None) if vchair: vchair = vchair[0] vchair = clean_name(vchair) committee.add_member(vchair, role='vice-chair') email = info.get('email', None) if email: email = email[0] committee.add_contact_detail(type='email', value=email, note='committee email') yield committee
def get_committees(self): committees = [ { "name": "Ways and Means", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", ] }, { "name": "Committee on Pudding Pops", "members": [ "Kohen Dudley", "Cayden Norman", "Shayla Fritz", "Gunnar Luna", ] }, { "name": "Fiscal Committee", "members": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", "Eliana Meyer", "Taylor Parrish", "Callie Craig", ] }, { "name": "Bills in the Third Read", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", ] }, { "name": "Rules", "members": [ "Shayla Fritz", "Gunnar Luna", "Regina Cruz", ] }, { "name": "Standing Committee on Public Safety", "members": [ "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", "Estrella Hahn", "Teagan Rojas", "Barrett Adams", "Kayla Shelton", ] }, ] for committee in committees: c = Committee(name=committee['name']) c.add_source("http://example.com") c.add_contact_detail(type='email', value="*****@*****.**", note='committee email') members = iter(committee['members']) chair = next(members) c.add_member(name_or_person=chair, role='chair') for member in members: c.add_member(name_or_person=member) yield c