def test_multiple_posts_class(): create_jurisdiction() org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") hari = Person.objects.create(id="hs", name="Hari Seldon") founder = Post.objects.create(id='f', label="founder", role="Founder", organization=org) chair = Post.objects.create(id='c', label="chair", role="Chair", organization=org) m1 = ScrapeMembership(person_id=hari.id, organization_id=org.id, post_id=founder.id) m2 = ScrapeMembership(person_id=hari.id, organization_id=org.id, post_id=chair.id) dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict(), m2.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 2 assert hari.memberships.count() == 2 assert founder.memberships.count() == 1 assert chair.memberships.count() == 1
def test_full_membership(): org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") hari = Person.objects.create(id="hs", name="Hari Seldon") robot = Person.objects.create(id="robot", name="R. Daneel Olivaw") post = Post.objects.create(id='f', label="founder", role="Founder", organization=org) # add a membership through a post m1 = ScrapeMembership(person_id=hari.id, organization_id=org.id, post_id=post.id) m1.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') m1.add_link('http://example.com/link') # add a membership direct to an organization m2 = ScrapeMembership(person_id=robot.id, organization_id=org.id, label='member', role='member') dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict(), m2.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 2 assert hari.memberships.count() == 1 assert robot.memberships.count() == 1 assert post.memberships.count() == 1 # ensure that the first membership has contact details and links m = hari.memberships.get() cd = m.contact_details.get() assert cd.type == 'phone' assert cd.value == '555-555-1234' assert cd.note == 'this is fake' assert m.links.all()[0].url == 'http://example.com/link'
def test_full_membership(): create_jurisdiction() org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") hari = Person.objects.create(id="hs", name="Hari Seldon") robot = Person.objects.create(id="robot", name="R. Daneel Olivaw") post = Post.objects.create(id='f', label="founder", role="Founder", organization=org) # add a membership through a post m1 = ScrapeMembership(person_id=hari.id, organization_id=org.id, post_id=post.id) m1.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') m1.add_link('http://example.com/link') # add a membership direct to an organization m2 = ScrapeMembership(person_id=robot.id, organization_id=org.id, label='member', role='member') dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict(), m2.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 2 assert hari.memberships.count() == 1 assert robot.memberships.count() == 1 assert post.memberships.count() == 1 # ensure that the first membership has contact details and links m = hari.memberships.get() cd = m.contact_details.get() assert cd.type == 'phone' assert cd.value == '555-555-1234' assert cd.note == 'this is fake' assert m.links.all()[0].url == 'http://example.com/link'
def scrape(self): method = 'metadata/{}?'.format(self.state) self.metadata = self.api(method) method = 'legislators/?state={}&fields=id&active=False'.format( self.state) for result in self.api(method): if result['id'] not in ('NJL000013', ): yield self.scrape_legislator(result['id']) method = 'committees/?state={}&fields=id,parent_id'.format(self.state) results = sorted(self.api(method), key=lambda x: x.get('parent_id', '') or '') for result in results: yield self.scrape_committee(result['id']) for leg_id, com_id, role, start, end in self._roles: # resolve OpenStates committee id to JSON ids if com_id in self._committees: com_id = self._committees[com_id]._id else: continue if leg_id == 'NJL000013': leg_id = 'NJL000015' yield Membership(person_id=self._people[leg_id]._id, organization_id=com_id, role=role, label=role, start_date=str(start), end_date=str(end))
def test_unmatched_person(): create_jurisdiction() org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") # not a real person, won't have a person_id after import m1 = ScrapeMembership(person_name='Harry Seldom', organization_id=org.id, person_id=None ) dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 1 membership = org.memberships.get() assert membership.person_id is None assert membership.person_name == 'Harry Seldom'
def test_unmatched_person(): create_jurisdiction() org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") # not a real person, won't have a person_id after import m1 = ScrapeMembership(person_name='Harry Seldom', organization_id=org.id, person_id=None) dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 1 membership = org.memberships.get() assert membership.person_id is None assert membership.person_name == 'Harry Seldom'
def test_basic_invalid_membership(): membership = Membership(person_id=33, organization_id="orga_id") with pytest.raises(ValueError): membership.validate()
def scrape_legislator(self, legislator_id): old = self.api('legislators/' + legislator_id + '?') # just not needed id = old.pop('id') old.pop('created_at') old.pop('updated_at') old.pop('country', None) old.pop('level', None) old.pop('state') old.pop('leg_id') old.pop('active') # junk keys old.pop('suffix', None) old.pop('notice', None) old.pop('csrfmiddlewaretoken', None) old.pop('office_address', None) old.pop('office_phone', None) # translated district = old.pop('district', None) chamber = old.pop('chamber', None) image = old.pop('photo_url', '') name = old.pop('full_name') party = old.pop('party', None) if party in ('Nonpartisan', 'unknown', 'Unknown', 'Unaffiliated', "Non Affiliated", " "): party = None elif party == 'Democrat': party = 'Democratic' if self.state in ('ne', 'dc'): chamber = 'legislature' if chamber == 'upper' and self.state == 'pr': pr_district = { '1': 'I', '2': 'II', '3': 'III', '4': 'IV', '5': 'V', '6': 'VI', '7': 'VII', '8': 'VIII', } if district in pr_district: district = pr_district[district] if '2008-2011' in old: old['old_roles']['2008-2011'] = old.pop('2008-2011') old_roles = old.pop('old_roles', {}) if old['roles'] and 'Lt. Governor' in [ x['type'] for x in old['roles'] ]: new = Person(name=name, district=district, party=party, image=image) self.jurisdiction._executive.add_post('Lt. Governor', 'lt-gov') membership = Membership( person_id=new._id, role="Lt. Governor", organization_id=self.jurisdiction._executive._id) new._related.append(membership) else: new = Person(name=name, party=party, image=image) if id in birthdays: new.birth_date = birthdays[id] # various ids id_types = { 'votesmart_id': 'votesmart', 'transparencydata_id': 'influence-explorer', 'nimsp_id': 'nimsp', 'nimsp_candidate_id': 'nimsp-candidate', } for idname, scheme in id_types.items(): val = old.pop(idname, None) if val: new.add_identifier(val, scheme=scheme) for id in old.pop('all_ids'): new.add_identifier(id, scheme='openstates') self._people[id] = new # contact details email = old.pop('email', None) if email: new.add_contact_detail(type='email', value=email, note='') office_keys = { 'fax': 'fax', 'phone': 'voice', 'email': 'email', 'address': 'address' } for office in old.pop('offices'): for key, type in office_keys.items(): if office.get(key): if 'Office Hours' in office[key] and self.state == 'pa': for x in office[key].split('Office Hours: '): if x: new.add_contact_detail(type=type, value=x, note=office['name']) else: new.add_contact_detail(type=type, value=office[key], note=office['name']) # links link = old.pop('url', None) if link: new.add_link(link) #for utah, conflict of interest is in links if self.state == 'ut': links = old.pop('+links', []) for l in links: new.add_link(note="conflict of interest form", url=l) # sources for source in old.pop('sources'): source.pop('retrieved', None) source.pop('+page', None) new.add_source(**source) # roles for role in old.pop('roles'): self.process_role(new, role, leg_id=id) for role_list in old_roles.values(): for role in role_list: self.process_role(new, role, leg_id=id) # ignore most of the names for now old.pop('first_name') old.pop('middle_name') old.pop('suffixes') old.pop('nickname', None) new.sort_name = old.pop('last_name') #some places have legacy names without underscores old.pop('+firstname', None) old.pop('+lastname', None) gender = old.pop('+gender', None) if gender: new.gender = gender biography = old.pop('+biography', None) if biography: new.biography = biography birth_date = old.pop('+birth_date', None) if birth_date: new.birth_date = birth_date # keys to keep to_extras = [ '+occupation', '+twitter', '+facebook_url', '+sworn_in_date', '+profession', '+secretary', '+office_hours', '+resident_county', '+district_name', '+leg_status', '+legal_position', '+title', '+start_year', '+end_date', 'occupation', '+oregon_member_id', '+facebook', '+youtube', '+instagram' ] for k in to_extras: v = old.pop(k, None) if v: new.extras[k.replace('+', '')] = v # keys not to keep to_pop = [ '+office_fax', '+phone', '+room', '+fax', '+email', '+url', '+photo', '+notice', '+page', '+suffix', '+city', '+address', '+additional_info_url', '+contact_form', '+fax_number', '+phone_number', '+business_phone', '+email_address', '+img_url', '+office_phone', '+disctict_name', '+office_loc', '+leg_url', '+office', '+district_address', '+capital_address', '+bis_phone', '+capital_phone', '+org_info', '+role', '+other_phone', '+home_phone', '+zip', '+zipcode', '+county', '+capitol_phone', '+image_url', '+header', '+town_represented', '+full_address', '+capitol_address', '+website', '+district_phone', '+district_offices', '+party', '+district', '+capitol_office', '+office_address', ] for k in to_pop: old.pop(k, None) # ensure we got it all assert not old, old.keys() return new
def test_full_membership(): create_jurisdiction() org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") hari = Person.objects.create(id="hs", name="Hari Seldon") robot = Person.objects.create(id="robot", name="R. Daneel Olivaw") post = Post.objects.create(id='f', label="founder", role="Founder", organization=org) # add a membership through a post, with a start date m1 = ScrapeMembership(person_id=hari.id, organization_id=org.id, post_id=post.id, start_date='2020-03-10', end_date='2021-06-30') m1.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') m1.add_link('http://example.com/link') # add a membership direct to an organization, with an end date m2 = ScrapeMembership(person_id=robot.id, organization_id=org.id, label='member', role='member', end_date='2019-11-09') dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) memimp.import_data([m1.as_dict(), m2.as_dict()]) # ensure that the memberships attached in the right places assert org.memberships.count() == 2 assert hari.memberships.count() == 1 assert robot.memberships.count() == 1 assert post.memberships.count() == 1 # ensure that the first membership has contact details and links m = hari.memberships.get() cd = m.contact_details.get() assert cd.type == 'phone' assert cd.value == '555-555-1234' assert cd.note == 'this is fake' assert m.links.all()[0].url == 'http://example.com/link' # update the imported memberships (i.e., change attributes that are not # in the spec) and confirm they resolve correctly memimp2 = MembershipImporter('fnd-jid', dumb_imp, dumb_imp, dumb_imp) m1.end_date = '2022-03-10' m2.extras = {'note': 'bleep blorp'} import_log = memimp2.import_data([m1.as_dict(), m2.as_dict()]) assert import_log['membership']['insert'] == 0 assert import_log['membership']['update'] == 2 # confirm the membership resolved based on start date and its end date was updated assert hari.memberships.count() == 1 assert hari.memberships.get().end_date == '2022-03-10' # confirm the membership resolved based on end date and its extras were updated assert robot.memberships.count() == 1 assert robot.memberships.get().extras == {'note': 'bleep blorp'}
def test_basic_membership(): m = Membership(person_id='person', organization_id='org') assert 'person' in str(m) and 'org' in str(m)
def scrape_current_legislators(self, repos): for repo in repos: CURRENT_LEGISLATORS = self.get_url(repo) people = self.yamlize(CURRENT_LEGISLATORS) parties = set() posts = {} person_cache = defaultdict(lambda: defaultdict(lambda: None)) for person in people: name = person['name'].get('official_full') if name is None: name = "{name[first]} {name[last]}".format(**person) if 'birthday' in person['bio']: birth_date = person['bio']['birthday'] who = person_cache[name][birth_date] has_term = False if who is None: who = Person(name=name, birth_date=birth_date) who.add_source(url=CURRENT_LEGISLATORS, note="unitedstates project on GitHub") for term in person.get('terms', []): has_term = True start_date = term['start'] end_date = term['end'] state = term['state'] type_ = term['type'] district = term.get('district', None) party = term.get('party', None) chamber = { 'rep': 'lower', 'sen': 'upper', }[type_] role = { 'rep': 'Representative', 'sen': 'Senator', }[type_] if type_ == "rep" and district is not None: label = "%s for District %s in %s" % (role, district, state) if district == 0: division_id = ( "ocd-division/country:us/state:{state}".format( state=state.lower())) else: division_id = ( "ocd-division/country:us/" "state:{state}/cd:{district}".format( state=state.lower(), district=district)) post = posts.get(division_id) if post is None: post = Post(organization_id={ "rep": self.house, "sen": self.senate }[type_]._id, division_id=division_id, label=label, role=role) posts[division_id] = post yield post membership = Membership(post_id=post._id, role=role, label=label, start_date=start_date, end_date=end_date, person_id=who._id, organization_id={ "rep": self.house, "sen": self.senate, }[type_]._id) yield membership if type_ == "sen": division_id = ( "ocd-division/country:us/state:{state}".format( state=state.lower())) label = "Senitor for %s" % (state) post = posts.get(division_id) if post is None: post = Post(organization_id={ "rep": self.house, "sen": self.senate }[type_]._id, division_id=division_id, label=label, role=role) posts[division_id] = post yield post membership = Membership(post_id=post._id, role=role, label=label, start_date=start_date, end_date=end_date, person_id=who._id, organization_id={ "rep": self.house, "sen": self.senate, }[type_]._id) yield membership if party == "Democrat": party = "Democratic" if party: membership = Membership(role='member', start_date=start_date, end_date=end_date, person_id=who._id, organization_id=make_pseudo_id( classification="party", name=party)) yield membership for key, value in person.get('id', {}).items(): if isinstance(value, list): for v in value: who.add_identifier(str(v), scheme=key) else: who.add_identifier(str(value), scheme=key) if has_term: yield who
def test_basic_invalid_membership(): membership = Membership(33, "orga_id") with pytest.raises(ValueError): membership.validate()
def test_basic_membership(): m = Membership('person', 'org') assert 'person' in str(m) and 'org' in str(m)