def test_committee_add_member_person(): c = Committee('Defense') p = Person('John Adams') c.add_member(p, role='chairman') assert c._related[0].person_id == p._id assert c._related[0].organization_id == c._id assert c._related[0].role == 'chairman'
def get_committees(self): committees = [ {"name": "Ways and Means", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", ]}, {"name": "Committee on Pudding Pops", "members": [ "Kohen Dudley", "Cayden Norman", "Shayla Fritz", "Gunnar Luna", ]}, {"name": "Fiscal Committee", "members": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", "Eliana Meyer", "Taylor Parrish", "Callie Craig", ]}, {"name": "Bills in the Third Read", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", ]}, {"name": "Rules", "members": [ "Shayla Fritz", "Gunnar Luna", "Regina Cruz", ]}, {"name": "Standing Committee on Public Safety", "members": [ "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", "Estrella Hahn", "Teagan Rojas", "Barrett Adams", "Kayla Shelton", ]}, ] for committee in committees: c = Committee(name=committee['name']) c.add_source("http://example.com") c.add_contact_detail(type='email', value="*****@*****.**", note='committee email') members = iter(committee['members']) chair = next(members) c.add_member(name_or_person=chair, role='chair') for member in members: c.add_member(name_or_person=member) yield c
def nyc_scrape_committees(self): SUBS = [ (r", Chair", "") ] for committee in self.get_committees(): name, url = [committee[x] for x in ["name", "url"]] info = self.scrape_committee_homepage(url) if name in BAD_CTTIES: print "WARNING: Bad Cttie: %s" % (name) continue c = Committee(name=name, classification='committee') c.add_source(url) for x in info['people']: for reg, repl in SUBS: x = re.sub(reg, repl, x) c.add_member(x) yield c
def _scrape_committees(self): url = "http://www.cabq.gov/council/committees" page = self.lxmlize(url) root = page.xpath("//div[@id='parent-fieldname-text']")[0] h3s = root.xpath("./h3") ps = root.xpath("./p")[2:] uls = root.xpath("./ul") for h3, p, ul in zip(h3s, ps, uls): name = h3.text_content() org = Committee(name=name) org.add_source(url) for person in ul.xpath(".//li"): who = person.text_content() title = "member" if ", chair" in who.lower(): title = "chair" who = who.replace(", Chair", "") org.add_member(name=who, role=title) yield org
def scrape(self): page = self.lxmlize(MEMBER_LIST) for row in page.xpath("//table[@frame='void']/tbody/tr")[1:]: role, whos, expire = row.xpath("./*") people = zip([x.text_content() for x in whos.xpath(".//font")], [x.text_content() for x in expire.xpath(".//font")]) thing = role.text_content() comm = Committee(name=thing) url = role.xpath(".//a")[0].attrib['href'] comm.add_link(url=url, note='homepage') for person, expire in people: if "TBA" in person: continue info = {} try: info = re.match("(?P<name>.*), (?P<addr>\d+\w* .*)", person).groupdict() except AttributeError: info = re.match("(?P<name>.*) (?P<addr>\d+\w* .*)", person).groupdict() addr = info['addr'] roles = {"Vice Chair": "Vice Chair", "Chair": "Chair", "CHAIR": "Chair", "Appt": "member",} position = "member" if "Resigned" in addr: continue for role in roles: if role in addr: addr, chair = [x.strip() for x in addr.rsplit(role, 1)] position = roles[role] addr = clean_address(addr) leg = Legislator(name=info['name'], district=position) leg.add_contact_detail(type="address", value=addr, note="Address") leg.add_source(MEMBER_LIST) yield leg leg.add_membership(comm) comm.add_source(MEMBER_LIST) yield comm
def nyc_scrape_committees(self): SUBS = [(r", Chair", "")] for committee in self.get_committees(): name, url = [committee[x] for x in ["name", "url"]] info = self.scrape_committee_homepage(url) if name in BAD_CTTIES: print "WARNING: Bad Cttie: %s" % (name) continue c = Committee(name=name, classification='committee') c.add_source(url) for x in info['people']: for reg, repl in SUBS: x = re.sub(reg, repl, x) c.add_member(x) yield c
def _scrape_committees(self): url = "http://www.cabq.gov/council/committees" page = self.lxmlize(url) root = page.xpath("//div[@id='parent-fieldname-text']")[0] h3s = root.xpath("./h3") ps = root.xpath("./p")[2:] uls = root.xpath("./ul") for h3, p, ul in zip(h3s, ps, uls): name = h3.text_content() org = Committee(name=name) org.add_source(url) for person in ul.xpath(".//li"): who = person.text_content() title = 'member' if ", chair" in who.lower(): title = 'chair' who = who.replace(", Chair", "") org.add_member(name=who, role=title) yield org
def bos_scrape_committees(self): page = self.lxmlize(COMMITTEE_LIST) committees = page.xpath( "//a[contains(@href, 'committee') and contains(@href, 'asp')]") for c in committees: if c.text is None: continue name = clean_name(c.text) homepage = c.attrib['href'] info = self.scrape_committee_page(homepage) committee = Committee(name, classification='committee') committee.add_source(COMMITTEE_LIST) committee.add_source(homepage) for member in info['members']: member = clean_name(member) committee.add_member(member, role='member') chair = info.get('chair', None) if chair: chair = chair[0] chair = clean_name(chair) committee.add_member(chair, role='chair') vchair = info.get('vice-chair', None) if vchair: vchair = vchair[0] vchair = clean_name(vchair) committee.add_member(vchair, role='vice-chair') email = info.get('email', None) if email: email = email[0] committee.add_contact_detail(type='email', value=email, note='committee email') yield committee
def get_people(self): objs = [] for x in range(2): root = Committee(name='FOO ROOT %s' % (x)) root.add_source("fake-data") objs.append(root) com1 = Committee(name='FOO SUBSUB %s' % (x)) com1.add_source("fake-data") com = Committee(name='FOO SUB %s' % (x)) com.add_source("fake-data") com.parent = root objs.append(com) com1.parent = com objs.append(com1) random.shuffle(objs) return objs
def get_committees(self): committees = [ { "name": "Ways and Means", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", ] }, { "name": "Committee on Pudding Pops", "members": [ "Kohen Dudley", "Cayden Norman", "Shayla Fritz", "Gunnar Luna", ] }, { "name": "Fiscal Committee", "members": [ "Gunnar Luna", "Regina Cruz", "Makenzie Keller", "Eliana Meyer", "Taylor Parrish", "Callie Craig", ] }, { "name": "Bills in the Third Read", "members": [ "Mckenzie A. Cannon", "Yandel V. Watkins", "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", ] }, { "name": "Rules", "members": [ "Shayla Fritz", "Gunnar Luna", "Regina Cruz", ] }, { "name": "Standing Committee on Public Safety", "members": [ "Adrien A. Coffey", "Natasha Moon", "Ramon Harmon", "Sam Sellers", "Estrella Hahn", "Teagan Rojas", "Barrett Adams", "Kayla Shelton", ] }, ] for committee in committees: c = Committee(name=committee['name']) c.add_source("http://example.com") c.add_contact_detail(type='email', value="*****@*****.**", note='committee email') members = iter(committee['members']) chair = next(members) c.add_member(name_or_person=chair, role='chair') for member in members: c.add_member(name_or_person=member) yield c
def test_committee_add_member_name(): c = Committee('Defense') c.add_member('John Adams') assert get_psuedo_id(c._related[0].person_id) == {'name': 'John Adams'} assert c._related[0].organization_id == c._id assert c._related[0].role == 'member'