def get_people(self): # committee tech = Organization('Technology') tech.add_post('Chairman', 'chairman') tech.add_source('https://example.com') yield tech # subcommittee ecom = Organization('Subcommittee on E-Commerce', parent=tech) ecom.add_source('https://example.com') yield ecom p = Person('Paul Tagliamonte', district='6', chamber='upper') p.add_membership(tech, role='chairman') p.add_source('https://example.com') yield p
def test_add_contact_information(): """ Test that we can add contact information """ bob = Person("John Q. Public, Esq.", gender="male", image="http://example.com/john.jpg", summary="Some person") bob.add_source(url='foo') bob.validate() bob.add_contact_detail(type='voice', value='876-5309', note='Jenny Cell') bob.validate()
def get_people(self): urls = Urls(dict(list=legislators_url), self) council = Organization('Temecula City Council', classification='legislature') council.add_source(urls.list.url) yield council for tr in urls.list.xpath('//table[2]//tr')[1:]: # Parse some attributes. name, role = tr.xpath('td/p[1]//font/text()') image = tr.xpath('td/img/@src').pop() # Create legislator. person = Person(name, image=image) # Add membership on council. memb = person.add_membership(council, role=role) # Add email address. email, detail_url = tr.xpath('td//a/@href') email = email[7:] memb.contact_details.append( dict(type='email', value=email, note='work')) # Add sources. person.add_source(urls.list.url) person.add_source(detail_url) yield person
def get_people(self): urls = Urls(dict(list=legislators_url), self) council = Organization( 'Temecula City Council', classification='legislature') council.add_source(urls.list.url) yield council for tr in urls.list.xpath('//table[2]//tr')[1:]: # Parse some attributes. name, role = tr.xpath('td/p[1]//font/text()') image = tr.xpath('td/img/@src').pop() # Create legislator. person = Person(name, image=image) # Add membership on council. memb = person.add_membership(council, role=role) # Add email address. email, detail_url = tr.xpath('td//a/@href') email = email[7:] memb.contact_details.append( dict(type='email', value=email, note='work')) # Add sources. person.add_source(urls.list.url) person.add_source(detail_url) yield person
def test_basic_invalid_person(): """ Test that we can create an invalid person, and validation will fail """ bob = Person("Bob B. Johnson") bob.add_source(url='foo') bob.validate() bob.name = None @raises(ValidationError) def _(): bob.validate() _()
def get_people(self): urls = Urls(dict(list=legislators_url), self) council = Organization('Boise City Council') council.add_source(legislators_url) yield council xpath = '//div[@id="content"]/div/a/@href' people_urls = urls.list.xpath(xpath) # SKip the mayor because his page has no name or email. people_urls = people_urls[1:] for url in people_urls: urls.add(detail=url) # Parse some attributes. image = urls.detail.xpath('//div[@id="content"]/p/img/@src').pop() name = urls.detail.xpath('//h1/text()').pop() name = name.replace('Council ', '') role, _, name = name.partition(' ') # Create legislator. person = Person(name, image=image) # Add membership on council. memb = person.add_membership(council, role=role) memb.add_source(urls.detail.url) # Add email address. email_xpath = '//a[contains(@href, "mailto")]/@href' email = urls.detail.xpath(email_xpath).pop()[7:] memb.contact_details.append( dict(type='email', value=email, note='work')) # Add sources. person.add_source(urls.list.url) person.add_source(urls.detail.url) yield person
def test_magic_methods(): """ Test the magic methods work """ bob = Person("John Q. Public, Esq.", gender="male", image="http://example.com/john.jpg", summary="Some person") bob.add_source(url='foo') bob.validate() bob.add_link("http://twitter.com/ev", "Twitter Account") assert bob.links == [ {"note": "Twitter Account", "url": "http://twitter.com/ev"} ] bob.add_name("Thiston", note="What my friends call me") assert bob.other_names == [ {"name": "Thiston", "note": "What my friends call me"} ] bob.add_name("Johnseph Q. Publico", note="Birth name", start_date="1920-01", end_date="1949-12-31") assert bob.other_names == [ {"name": "Thiston", "note": "What my friends call me"}, {"name": "Johnseph Q. Publico", "note": "Birth name", "start_date": "1920-01", "end_date": "1949-12-31"} ]
def get_people(self): urls = Urls(dict(list=legislators_url), self) council = Organization('Denver City Council') council.add_source(legislators_url) # Get image urls, names, detail urls, and districts. image_xpath = '//a[contains(@href, "councildistrict")]/img/@src' image_urls = urls.list.xpath(image_xpath) name_xpath = '//a[contains(@href, "councildistrict")]' names = [a.text_content() for a in urls.list.xpath(name_xpath)][:-1] names = filter(None, names) person_urls_xpath = '//a[contains(@href, "councildistrict")]/@href' person_urls = urls.list.xpath(person_urls_xpath) post_ids = [] xpath = '//a[contains(@href, "councildistrict")]/img/ancestor::td' for td in urls.list.xpath(xpath): text = td.text_content() m = re.search('Council District \d+', text) if m: post_ids.append(m.group()) continue m = re.search('Council At-Large', text) if m: post_ids.append('Council At-Large') for post_id in post_ids: council.add_post(post_id, post_id) yield council data = zip(image_urls, names, person_urls, post_ids) for image_url, name, person_url, post_id in data: # Create legislator. person = Person(name, image=image_url) # Add sources. urls.add(detail=person_url) person.add_source(urls.list.url, note='list') person.add_source(urls.detail.url, note='detail') # Add membership on council. memb = person.add_membership(council, post_id=post_id.strip()) memb.add_source(urls.detail.url) xpath = '//div[@id="dnn_column3"]' contact_text = urls.detail.xpath(xpath)[0].text_content() if not contact_text.strip(): xpath = '//div[contains(@id, "dnn_RightPaneWide")]' contact_text = urls.detail.xpath(xpath)[0].text_content() phone_regex = r'\(\d{3}\)[ -]*\d{3}-\d{4}' phone = re.search(phone_regex, contact_text).group() memb.contact_details.append( dict(type='phone', value=phone, note='work')) # Add email address. email_regex = r'\[email protected]' email = re.search(email_regex, contact_text).group() memb.contact_details.append( dict(type='email', value=email, note='work')) yield person
def migrate_people(self, state): spec = {} if state: spec["state"] = state for entry in self.billy_db.legislators.find(spec, timeout=False): jurisdiction_id = openstates_to_jid(entry['_id']) who = Person(entry['full_name']) who.identifiers = [{'scheme': 'openstates', 'identifier': entry['_id']}] who._openstates_id = entry['_id'] who.created_at = entry['created_at'] who.updated_at = entry['updated_at'] if who.name != entry['_scraped_name']: who.add_name(entry['_scraped_name']) for k, v in { "photo_url": "image", "chamber": "chamber", "district": "district", }.items(): if entry.get(k, None): setattr(who, v, entry[k]) who.sources = entry['sources'] home = entry.get('url', None) if home: who.add_link(home, "Homepage") vsid = entry.get('votesmart_id') if vsid: who.add_identifier(vsid, 'votesmart-id') tdid = entry.get('transparencydata_id') if tdid: who.add_identifier(tdid, 'transparencydata-id') blacklist = ["photo_url", "chamber", "district", "url", "roles", "offices", "party", "state", "sources", "active", "old_roles", "_locked_fields", "created_at", "updated_at", "transparencydata_id", "votesmart_id", "leg_id", "email", "phone", "fax", "_scraped_name", "_id", "_all_ids", "full_name", "country", "level", "office_address", "suffixes", "_type"] # The above is a list of keys we move over by hand later. for key, value in entry.items(): if key in blacklist or not value: # or key.startswith("_"): continue who.extras[key] = value who.add_meta('locked_fields', entry.get('_locked_fields', [])) chamber = entry.get('chamber') if chamber == "joint": continue legislature = None if chamber: legislature = _hot_cache.get('{state}-{chamber}'.format( state=entry['state'], chamber=chamber, )) if legislature is None: print(chamber, entry['state'], entry['_id']) raise Exception("Someone's in the void.") self.save_object(who) # gives who an id, btw. party = entry.get('party', None) db.memberships.remove({"person_id": who._id}, safe=True) if party: m = Membership(who._id, self.create_or_get_party(entry['party'])) self.save_object(m) if legislature: term = get_current_term(jurisdiction_id) m = Membership(who._id, legislature, chamber=chamber, start_date=str(term['start_year'])) m.add_extra('term', term['name']) chamber, district = (entry.get(x, None) for x in ['chamber', 'district']) if chamber: m.chamber = chamber if district: m.post_id = district for key in ['email', 'fax', 'phone']: if key in entry and entry[key]: m.add_contact_detail(type=key, value=entry[key], note=key) if entry.get("office_address"): m.add_contact_detail(type='office', value=entry['office_address'], note='Office Address') for office in entry.get('offices', []): note = office['name'] for key, value in office.items(): if not value or key in ["name", "type"]: continue m.add_contact_detail(type=key, value=value, note=note) self.save_object(m) for session in entry.get('old_roles', []): roles = entry['old_roles'][session] meta = get_metadata(obj_to_jid(who)) term = None for role in roles: term = role['term'] to = None for to in meta['terms']: if term == to['name']: term = to break else: raise Exception("No term found?") start_year = term['start_year'] end_year = term['end_year'] if 'committee' in role: cid = role.get('committee_id') if cid: jid = _hot_cache.get(cid) if jid: m = Membership(who._id, jid, role='member', start_date=str(start_year), end_date=str(end_year), chamber=role['chamber']) m.add_extra('term', term['name']) if "position" in role: m.role = role['position'] self.save_object(m) if m.role != 'member': # In addition to being the (chair|vice-chair), # they should also be noted as a member. m = Membership(who._id, jid, role='member', start_date=str(start_year), end_date=str(end_year), chamber=role['chamber']) m.add_extra('term', term['name']) self.save_object(m) if 'district' in role: oid = "{state}-{chamber}".format(**role) leg_ocdid = _hot_cache.get(oid) if leg_ocdid: m = Membership(who._id, leg_ocdid, start_date=str(start_year), end_date=str(end_year), post_id=role['district'], chamber=role['chamber']) m.add_extra('term', term['name']) self.save_object(m)