def test_legislator_related_chamber_district(): leg = Person('John Adams', district='1', primary_org='upper') leg.pre_save('jurisdiction-id') assert len(leg._related) == 1 assert leg._related[0].person_id == leg._id assert get_pseudo_id(leg._related[0].organization_id) == {'classification': 'upper'} assert get_pseudo_id(leg._related[0].post_id) == {"organization__classification": "upper", "label": "1"}
def test_legislator_related_district(): l = Person('John Adams', district='1', primary_org='legislature') l.pre_save('jurisdiction-id') assert len(l._related) == 1 assert l._related[0].person_id == l._id assert get_pseudo_id(l._related[0].organization_id) == {'classification': 'legislature'} assert get_pseudo_id(l._related[0].post_id) == {"organization__classification": "legislature", "label": "1", "role": "member"} assert l._related[0].role == 'member'
def test_legislator_related_district(): l = Person('John Adams', district='1', primary_org='legislature') l.pre_save('jurisdiction-id') assert len(l._related) == 1 assert l._related[0].person_id == l._id assert get_pseudo_id(l._related[0].organization_id) == { 'classification': 'legislature' } assert get_pseudo_id(l._related[0].post_id) == { "organization__classification": "legislature", "label": "1" }
def test_legislator_related_chamber_district_role(): l = Person('John Adams', district='1', primary_org='lower', role='Speaker') l.pre_save('jurisdiction-id') assert len(l._related) == 1 assert l._related[0].person_id == l._id assert get_pseudo_id(l._related[0].organization_id) == { 'classification': 'lower' } assert get_pseudo_id(l._related[0].post_id) == { "organization__classification": "lower", "label": "1", "role": "Speaker" } assert l._related[0].role == 'Speaker'
def test_set_bill_pseudo_id(): ve = toy_vote_event() ve.set_bill('HB 1', chamber='lower') assert get_pseudo_id(ve.bill) == {'identifier': 'HB 1', 'from_organization__classification': 'lower', 'legislative_session__identifier': '2009', }
def test_set_bill_pseudo_id(): ve = toy_vote_event() ve.set_bill('HB 1', chamber='lower') assert get_pseudo_id(ve.bill) == { 'identifier': 'HB 1', 'from_organization__classification': 'lower' }
def test_simple_vote_event(): ve = toy_vote_event() ve.set_count('yes', 2) ve.yes('James') ve.no('Paul') ve.vote('abstain', 'Thom') assert len(ve.votes) == 3 assert len(ve.counts) == 1 assert get_pseudo_id(ve.organization) == {'classification': 'legislature'} assert get_pseudo_id(ve.votes[0]['voter_id']) == {'name': 'James'} assert get_pseudo_id(ve.votes[1]['voter_id']) == {'name': 'Paul'} assert get_pseudo_id(ve.votes[2]['voter_id']) == {'name': 'Thom'} assert ve.bill is None ve.validate() assert 'we get here'
def test_person_add_party(): p = Person('Groot') p.add_party('Green') p._related[0].validate() assert get_pseudo_id(p._related[0].organization_id) == { 'name': 'Green', 'classification': 'party' }
def test_vote_event_org_chamber(): ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage', chamber='upper') assert get_pseudo_id(ve.organization) == {'classification': 'upper'}
def test_person_add_term(): p = Person('Eternal') p.add_term('eternal', 'council', start_date='0001', end_date='9999') p._related[0].validate() assert get_pseudo_id(p._related[0].organization_id) == { 'classification': 'council', } assert p._related[0].start_date == '0001' assert p._related[0].end_date == '9999'
def test_vote_event_org_dict(): odict = {'name': 'Random Committee', 'classification': 'committee'} ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill", start_date="2009-01-07", result='pass', classification='bill-passage', organization=odict) assert get_pseudo_id(ve.organization) == odict
def resolve_json_id(self, json_id, allow_no_match=False): """ Given an id found in scraped JSON, return a DB id for the object. params: json_id: id from json allow_no_match: just return None if id can't be resolved returns: database id raises: ValueError if id couldn't be resolved """ if not json_id: return None if json_id.startswith('~'): # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import if json_id not in self.pseudo_id_cache: spec = get_pseudo_id(json_id) spec = self.limit_spec(spec) if isinstance(spec, Q): objects = self.model_class.objects.filter(spec) else: objects = self.model_class.objects.filter(**spec) ids = {each.id for each in objects} if len(ids) == 1: self.pseudo_id_cache[json_id] = ids.pop() errmsg = None elif not ids: errmsg = 'cannot resolve pseudo id to {}: {}'.format( self.model_class.__name__, json_id) else: errmsg = 'multiple objects returned for {} pseudo id {}: {}'.format( self.model_class.__name__, json_id, ids) # either raise or log error if errmsg: if not allow_no_match: raise UnresolvedIdError(errmsg) else: self.error(errmsg) self.pseudo_id_cache[json_id] = None # return the cached object return self.pseudo_id_cache[json_id] # get the id that the duplicate points to, or use self json_id = self.duplicates.get(json_id, json_id) try: return self.json_to_db_id[json_id] except KeyError: raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
def test_person_add_membership_name(): p = Person('Leonardo DiCaprio') p.add_membership('Academy of Motion Picture Arts and Sciences', role='winner', start_date='2016') p._related[0].validate() assert get_pseudo_id(p._related[0].organization_id) == { 'name': 'Academy of Motion Picture Arts and Sciences'} assert p._related[0].person_id == p._id assert p._related[0].role == 'winner' assert p._related[0].start_date == '2016'
def test_legislator_related_party(): l = Person('John Adams', party='Democratic-Republican') l.pre_save('jurisdiction-id') # a party membership assert len(l._related) == 1 assert l._related[0].person_id == l._id assert get_pseudo_id(l._related[0].organization_id) == {'classification': 'party', 'name': 'Democratic-Republican'} assert l._related[0].role == 'member'
def test_legislator_related_party(): leg = Person('John Adams', party='Democratic-Republican') leg.pre_save('jurisdiction-id') # a party membership assert len(leg._related) == 1 assert leg._related[0].person_id == leg._id assert get_pseudo_id(leg._related[0].organization_id) == {'classification': 'party', 'name': 'Democratic-Republican'} assert leg._related[0].role == 'member'
def test_simple_vote(): v = toy_vote() v.set_count('yes', 2) v.yes('James') v.no('Paul') v.vote('abstain', 'Thom') assert len(v.votes) == 3 assert len(v.counts) == 1 assert get_pseudo_id(v.organization) == {'classification': 'legislature'} assert v.bill is None v.validate() assert 'we get here'
def prepare_for_db(self, data): data['legislative_session_id'] = self.get_session_id( data.pop('legislative_session')) data['organization_id'] = self.org_importer.resolve_json_id( data.pop('organization')) bill = data.pop('bill') if bill and bill.startswith('~'): # unpack psuedo id and apply filter in case there are any that alter it bill = get_pseudo_id(bill) self.bill_importer.apply_transformers(bill) bill = _make_pseudo_id(**bill) data['bill_id'] = self.bill_importer.resolve_json_id(bill) bill_action = data.pop('bill_action') if bill_action: try: action = BillAction.objects.get( bill_id=data['bill_id'], description=bill_action, date=data['start_date'], organization_id=data['organization_id'], ) # seen_action_ids is for ones being added in this import # action.vote is already set if action was set on prior import if action.id in self.seen_action_ids or hasattr( action, 'vote'): self.warning('can not match two VoteEvents to %s: %s', action.id, bill_action) else: data['bill_action_id'] = action.id self.seen_action_ids.add(action.id) except BillAction.DoesNotExist: self.warning('could not match VoteEvent to %s %s %s', bill, bill_action, data['start_date']) except BillAction.MultipleObjectsReturned as e: self.warning('could not match VoteEvent to %s %s %s: %s', bill, bill_action, data['start_date'], e) for vote in data['votes']: vote['voter_id'] = self.person_importer.resolve_json_id( vote['voter_id'], allow_no_match=True) return data
def resolve_json_id(self, json_id): """ Given an id found in scraped JSON, return a DB id for the object. params: json_id: id from json returns: database id raises: ValueError if id couldn't be resolved """ if not json_id: return None if json_id.startswith('~'): # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import if json_id not in self.pseudo_id_cache: spec = get_pseudo_id(json_id) spec = self.limit_spec(spec) try: self.pseudo_id_cache[ json_id] = self.model_class.objects.get(**spec).id except self.model_class.DoesNotExist: raise UnresolvedIdError( 'cannot resolve pseudo id to {}: {}'.format( self.model_class.__name__, json_id)) except self.model_class.MultipleObjectsReturned: raise UnresolvedIdError( 'multiple objects returned for pseudo id to {}: {}'. format(self.model_class.__name__, json_id)) # return the cached object return self.pseudo_id_cache[json_id] # get the id that the duplicate points to, or use self json_id = self.duplicates.get(json_id, json_id) try: return self.json_to_db_id[json_id] except KeyError: raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
def prepare_for_db(self, data): data['jurisdiction_id'] = self.jurisdiction_id data['location'] = self.get_location(data['location']) data['start_date'] = data['start_date'] data['end_date'] = data.get('end_date', "") for participant in data['participants']: if 'person_id' in participant: participant['person_id'] = self.person_importer.resolve_json_id( participant['person_id'], allow_no_match=True) elif 'organization_id' in participant: participant['organization_id'] = self.org_importer.resolve_json_id( participant['organization_id'], allow_no_match=True) for item in data['agenda']: for entity in item['related_entities']: if 'person_id' in entity: entity['person_id'] = self.person_importer.resolve_json_id( entity['person_id'], allow_no_match=True) elif 'organization_id' in entity: entity['organization_id'] = self.org_importer.resolve_json_id( entity['organization_id'], allow_no_match=True) elif 'bill_id' in entity: # unpack and repack bill psuedo id in case filters alter it bill = get_pseudo_id(entity['bill_id']) self.bill_importer.apply_transformers(bill) bill = _make_pseudo_id(**bill) entity['bill_id'] = self.bill_importer.resolve_json_id( bill, allow_no_match=True) elif 'vote_event_id' in entity: entity['vote_event_id'] = self.vote_event_importer.resolve_json_id( entity['vote_event_id'], allow_no_match=True) return data
def prepare_for_db(self, data): data['jurisdiction_id'] = self.jurisdiction_id if data['location']: data['location'] = self.get_location(data['location']) data['start_date'] = data['start_date'] data['end_date'] = data.get('end_date', "") for participant in data['participants']: if 'person_id' in participant: participant[ 'person_id'] = self.person_importer.resolve_json_id( participant['person_id'], allow_no_match=True) elif 'organization_id' in participant: participant[ 'organization_id'] = self.org_importer.resolve_json_id( participant['organization_id'], allow_no_match=True) for item in data['agenda']: for entity in item['related_entities']: if 'person_id' in entity: entity['person_id'] = self.person_importer.resolve_json_id( entity['person_id'], allow_no_match=True) elif 'organization_id' in entity: entity[ 'organization_id'] = self.org_importer.resolve_json_id( entity['organization_id'], allow_no_match=True) elif 'bill_id' in entity: # unpack and repack bill psuedo id in case filters alter it bill = get_pseudo_id(entity['bill_id']) self.bill_importer.apply_transformers(bill) bill = _make_pseudo_id(**bill) entity['bill_id'] = self.bill_importer.resolve_json_id( bill, allow_no_match=True) elif 'vote_event_id' in entity: entity[ 'vote_event_id'] = self.vote_event_importer.resolve_json_id( entity['vote_event_id'], allow_no_match=True) return data
def resolve_json_id(self, json_id): """ Given an id found in scraped JSON, return a DB id for the object. params: json_id: id from json returns: database id raises: ValueError if id couldn't be resolved """ if not json_id: return None if json_id.startswith('~'): # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import if json_id not in self.pseudo_id_cache: spec = get_pseudo_id(json_id) spec = self.limit_spec(spec) try: self.pseudo_id_cache[json_id] = self.model_class.objects.get(**spec).id except self.model_class.DoesNotExist: raise UnresolvedIdError('cannot resolve pseudo id to {}: {}'.format( self.model_class.__name__, json_id)) except self.model_class.MultipleObjectsReturned: raise UnresolvedIdError('multiple objects returned for pseudo id to {}: {}'.format( self.model_class.__name__, json_id)) # return the cached object return self.pseudo_id_cache[json_id] # get the id that the duplicate points to, or use self json_id = self.duplicates.get(json_id, json_id) try: return self.json_to_db_id[json_id] except KeyError: raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
def prepare_for_db(self, data): data['legislative_session_id'] = self.get_session_id( data.pop('legislative_session')) data['organization_id'] = self.org_importer.resolve_json_id( data.pop('organization')) bill = data.pop('bill') if bill and bill.startswith('~'): bill = get_pseudo_id(bill) bill['identifier'] = fix_bill_id(bill['identifier']) bill = _make_pseudo_id(**bill) data['bill_id'] = self.bill_importer.resolve_json_id(bill) bill_action = data.pop('bill_action') if bill_action: try: action = BillAction.objects.get( bill_id=data['bill_id'], description=bill_action, date=data['start_date'], organization_id=data['organization_id'], ) if action.id in self.seen_action_ids: self.warning('can not match two VoteEvents to %s: %s', action.id, bill_action) else: data['bill_action_id'] = action.id self.seen_action_ids.add(action.id) except BillAction.DoesNotExist: self.warning('could not match VoteEvent to %s %s %s', bill, bill_action, data['start_date']) except BillAction.MultipleObjectsReturned as e: self.warning('could not match VoteEvent to %s %s %s: %s', bill, bill_action, data['start_date'], e) for vote in data['votes']: vote['voter_id'] = self.person_importer.resolve_json_id( vote['voter_id'], allow_no_match=True) return data
def prepare_for_db(self, data): data['legislative_session_id'] = self.get_session_id(data.pop('legislative_session')) data['organization_id'] = self.org_importer.resolve_json_id(data.pop('organization')) bill = data.pop('bill') if bill and bill.startswith('~'): # unpack psuedo id and apply filter in case there are any that alter it bill = get_pseudo_id(bill) self.bill_importer.apply_transformers(bill) bill = _make_pseudo_id(**bill) data['bill_id'] = self.bill_importer.resolve_json_id(bill) bill_action = data.pop('bill_action') if bill_action: try: action = BillAction.objects.get(bill_id=data['bill_id'], description=bill_action, date=data['start_date'], organization_id=data['organization_id'], ) # seen_action_ids is for ones being added in this import # action.vote is already set if action was set on prior import if action.id in self.seen_action_ids or hasattr(action, 'vote'): self.warning('can not match two VoteEvents to %s: %s', action.id, bill_action) else: data['bill_action_id'] = action.id self.seen_action_ids.add(action.id) except BillAction.DoesNotExist: self.warning('could not match VoteEvent to %s %s %s', bill, bill_action, data['start_date']) except BillAction.MultipleObjectsReturned as e: self.warning('could not match VoteEvent to %s %s %s: %s', bill, bill_action, data['start_date'], e) for vote in data['votes']: vote['voter_id'] = self.person_importer.resolve_json_id(vote['voter_id'], allow_no_match=True) return data
def prepare_for_db(self, data): data['jurisdiction_id'] = self.jurisdiction_id data['location'] = self.get_location(data['location']) data['start_date'] = data['start_date'] data['end_date'] = data.get('end_date', "") for participant in data['participants']: if 'person_id' in participant: participant[ 'person_id'] = self.person_importer.resolve_json_id( participant['person_id'], allow_no_match=True) elif 'organization_id' in participant: participant[ 'organization_id'] = self.org_importer.resolve_json_id( participant['organization_id'], allow_no_match=True) for item in data['agenda']: for entity in item['related_entities']: if 'person_id' in entity: entity['person_id'] = self.person_importer.resolve_json_id( entity['person_id'], allow_no_match=True) elif 'organization_id' in entity: entity[ 'organization_id'] = self.org_importer.resolve_json_id( entity['organization_id'], allow_no_match=True) elif 'bill_id' in entity: bill = get_pseudo_id(entity['bill_id']) bill['identifier'] = fix_bill_id(bill['identifier']) bill = _make_pseudo_id(**bill) entity['bill_id'] = self.bill_importer.resolve_json_id( bill, allow_no_match=True) elif 'vote_event_id' in entity: entity[ 'vote_event_id'] = self.vote_event_importer.resolve_json_id( entity['vote_event_id'], allow_no_match=True) return data
def test_person_add_party(): p = Person('Groot') p.add_party('Green') p._related[0].validate() assert get_pseudo_id(p._related[0].organization_id) == { 'name': 'Green', 'classification': 'party'}
def test_committee_add_member_name(): c = Organization('Defense', classification='committee') c.add_member('John Adams') assert get_pseudo_id(c._related[0].person_id) == {'name': 'John Adams'} assert c._related[0].organization_id == c._id assert c._related[0].role == 'member'
def test_set_bill_pseudo_id(): v = toy_vote() v.set_bill('HB 1', chamber='lower') assert get_pseudo_id(v.bill) == {'identifier': 'HB 1', 'from_organization__classification': 'lower'}