Example #1
0
def test_legislator_related_chamber_district():
    leg = Person('John Adams', district='1', primary_org='upper')
    leg.pre_save('jurisdiction-id')

    assert len(leg._related) == 1
    assert leg._related[0].person_id == leg._id
    assert get_pseudo_id(leg._related[0].organization_id) == {'classification': 'upper'}
    assert get_pseudo_id(leg._related[0].post_id) == {"organization__classification": "upper",
                                                      "label": "1"}
def test_legislator_related_district():
    l = Person('John Adams', district='1', primary_org='legislature')
    l.pre_save('jurisdiction-id')

    assert len(l._related) == 1
    assert l._related[0].person_id == l._id
    assert get_pseudo_id(l._related[0].organization_id) == {'classification': 'legislature'}
    assert get_pseudo_id(l._related[0].post_id) == {"organization__classification": "legislature",
                                                    "label": "1", "role": "member"}
    assert l._related[0].role == 'member'
Example #3
0
def test_legislator_related_district():
    l = Person('John Adams', district='1', primary_org='legislature')
    l.pre_save('jurisdiction-id')

    assert len(l._related) == 1
    assert l._related[0].person_id == l._id
    assert get_pseudo_id(l._related[0].organization_id) == {
        'classification': 'legislature'
    }
    assert get_pseudo_id(l._related[0].post_id) == {
        "organization__classification": "legislature",
        "label": "1"
    }
Example #4
0
def test_legislator_related_chamber_district_role():
    l = Person('John Adams', district='1', primary_org='lower', role='Speaker')
    l.pre_save('jurisdiction-id')

    assert len(l._related) == 1
    assert l._related[0].person_id == l._id
    assert get_pseudo_id(l._related[0].organization_id) == {
        'classification': 'lower'
    }
    assert get_pseudo_id(l._related[0].post_id) == {
        "organization__classification": "lower",
        "label": "1",
        "role": "Speaker"
    }
    assert l._related[0].role == 'Speaker'
def test_set_bill_pseudo_id():
    ve = toy_vote_event()
    ve.set_bill('HB 1', chamber='lower')
    assert get_pseudo_id(ve.bill) == {'identifier': 'HB 1',
                                      'from_organization__classification': 'lower',
                                      'legislative_session__identifier': '2009',
                                      }
def test_set_bill_pseudo_id():
    ve = toy_vote_event()
    ve.set_bill('HB 1', chamber='lower')
    assert get_pseudo_id(ve.bill) == {
        'identifier': 'HB 1',
        'from_organization__classification': 'lower'
    }
def test_simple_vote_event():
    ve = toy_vote_event()
    ve.set_count('yes', 2)
    ve.yes('James')
    ve.no('Paul')
    ve.vote('abstain', 'Thom')

    assert len(ve.votes) == 3
    assert len(ve.counts) == 1
    assert get_pseudo_id(ve.organization) == {'classification': 'legislature'}
    assert get_pseudo_id(ve.votes[0]['voter_id']) == {'name': 'James'}
    assert get_pseudo_id(ve.votes[1]['voter_id']) == {'name': 'Paul'}
    assert get_pseudo_id(ve.votes[2]['voter_id']) == {'name': 'Thom'}
    assert ve.bill is None

    ve.validate()
    assert 'we get here'
Example #8
0
def test_simple_vote_event():
    ve = toy_vote_event()
    ve.set_count('yes', 2)
    ve.yes('James')
    ve.no('Paul')
    ve.vote('abstain', 'Thom')

    assert len(ve.votes) == 3
    assert len(ve.counts) == 1
    assert get_pseudo_id(ve.organization) == {'classification': 'legislature'}
    assert get_pseudo_id(ve.votes[0]['voter_id']) == {'name': 'James'}
    assert get_pseudo_id(ve.votes[1]['voter_id']) == {'name': 'Paul'}
    assert get_pseudo_id(ve.votes[2]['voter_id']) == {'name': 'Thom'}
    assert ve.bill is None

    ve.validate()
    assert 'we get here'
Example #9
0
def test_person_add_party():
    p = Person('Groot')
    p.add_party('Green')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'name': 'Green',
        'classification': 'party'
    }
Example #10
0
def test_vote_event_org_chamber():
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage',
                   chamber='upper')
    assert get_pseudo_id(ve.organization) == {'classification': 'upper'}
Example #11
0
def test_person_add_term():
    p = Person('Eternal')
    p.add_term('eternal', 'council', start_date='0001', end_date='9999')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'classification': 'council',
    }
    assert p._related[0].start_date == '0001'
    assert p._related[0].end_date == '9999'
Example #12
0
def test_vote_event_org_dict():
    odict = {'name': 'Random Committee', 'classification': 'committee'}
    ve = VoteEvent(legislative_session="2009",
                   motion_text="passage of the bill",
                   start_date="2009-01-07",
                   result='pass',
                   classification='bill-passage',
                   organization=odict)
    assert get_pseudo_id(ve.organization) == odict
Example #13
0
    def resolve_json_id(self, json_id, allow_no_match=False):
        """
            Given an id found in scraped JSON, return a DB id for the object.

            params:
                json_id:        id from json
                allow_no_match: just return None if id can't be resolved

            returns:
                database id

            raises:
                ValueError if id couldn't be resolved
        """
        if not json_id:
            return None

        if json_id.startswith('~'):
            # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import
            if json_id not in self.pseudo_id_cache:
                spec = get_pseudo_id(json_id)
                spec = self.limit_spec(spec)

                if isinstance(spec, Q):
                    objects = self.model_class.objects.filter(spec)
                else:
                    objects = self.model_class.objects.filter(**spec)
                ids = {each.id for each in objects}
                if len(ids) == 1:
                    self.pseudo_id_cache[json_id] = ids.pop()
                    errmsg = None
                elif not ids:
                    errmsg = 'cannot resolve pseudo id to {}: {}'.format(
                        self.model_class.__name__, json_id)
                else:
                    errmsg = 'multiple objects returned for {} pseudo id {}: {}'.format(
                        self.model_class.__name__, json_id, ids)

                # either raise or log error
                if errmsg:
                    if not allow_no_match:
                        raise UnresolvedIdError(errmsg)
                    else:
                        self.error(errmsg)
                        self.pseudo_id_cache[json_id] = None

            # return the cached object
            return self.pseudo_id_cache[json_id]

        # get the id that the duplicate points to, or use self
        json_id = self.duplicates.get(json_id, json_id)

        try:
            return self.json_to_db_id[json_id]
        except KeyError:
            raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
Example #14
0
    def resolve_json_id(self, json_id, allow_no_match=False):
        """
            Given an id found in scraped JSON, return a DB id for the object.

            params:
                json_id:        id from json
                allow_no_match: just return None if id can't be resolved

            returns:
                database id

            raises:
                ValueError if id couldn't be resolved
        """
        if not json_id:
            return None

        if json_id.startswith('~'):
            # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import
            if json_id not in self.pseudo_id_cache:
                spec = get_pseudo_id(json_id)
                spec = self.limit_spec(spec)

                if isinstance(spec, Q):
                    objects = self.model_class.objects.filter(spec)
                else:
                    objects = self.model_class.objects.filter(**spec)
                ids = {each.id for each in objects}
                if len(ids) == 1:
                    self.pseudo_id_cache[json_id] = ids.pop()
                    errmsg = None
                elif not ids:
                    errmsg = 'cannot resolve pseudo id to {}: {}'.format(
                        self.model_class.__name__, json_id)
                else:
                    errmsg = 'multiple objects returned for {} pseudo id {}: {}'.format(
                        self.model_class.__name__, json_id, ids)

                # either raise or log error
                if errmsg:
                    if not allow_no_match:
                        raise UnresolvedIdError(errmsg)
                    else:
                        self.error(errmsg)
                        self.pseudo_id_cache[json_id] = None

            # return the cached object
            return self.pseudo_id_cache[json_id]

        # get the id that the duplicate points to, or use self
        json_id = self.duplicates.get(json_id, json_id)

        try:
            return self.json_to_db_id[json_id]
        except KeyError:
            raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
Example #15
0
def test_person_add_membership_name():
    p = Person('Leonardo DiCaprio')
    p.add_membership('Academy of Motion Picture Arts and Sciences',
                     role='winner', start_date='2016')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'name': 'Academy of Motion Picture Arts and Sciences'}
    assert p._related[0].person_id == p._id
    assert p._related[0].role == 'winner'
    assert p._related[0].start_date == '2016'
def test_legislator_related_party():
    l = Person('John Adams', party='Democratic-Republican')
    l.pre_save('jurisdiction-id')

    # a party membership
    assert len(l._related) == 1
    assert l._related[0].person_id == l._id
    assert get_pseudo_id(l._related[0].organization_id) == {'classification': 'party',
                                                            'name': 'Democratic-Republican'}
    assert l._related[0].role == 'member'
Example #17
0
def test_legislator_related_party():
    leg = Person('John Adams', party='Democratic-Republican')
    leg.pre_save('jurisdiction-id')

    # a party membership
    assert len(leg._related) == 1
    assert leg._related[0].person_id == leg._id
    assert get_pseudo_id(leg._related[0].organization_id) == {'classification': 'party',
                                                              'name': 'Democratic-Republican'}
    assert leg._related[0].role == 'member'
Example #18
0
def test_simple_vote():
    v = toy_vote()
    v.set_count('yes', 2)
    v.yes('James')
    v.no('Paul')
    v.vote('abstain', 'Thom')

    assert len(v.votes) == 3
    assert len(v.counts) == 1
    assert get_pseudo_id(v.organization) == {'classification': 'legislature'}
    assert v.bill is None

    v.validate()
    assert 'we get here'
Example #19
0
def test_simple_vote():
    v = toy_vote()
    v.set_count('yes', 2)
    v.yes('James')
    v.no('Paul')
    v.vote('abstain', 'Thom')

    assert len(v.votes) == 3
    assert len(v.counts) == 1
    assert get_pseudo_id(v.organization) == {'classification': 'legislature'}
    assert v.bill is None

    v.validate()
    assert 'we get here'
Example #20
0
    def prepare_for_db(self, data):
        data['legislative_session_id'] = self.get_session_id(
            data.pop('legislative_session'))
        data['organization_id'] = self.org_importer.resolve_json_id(
            data.pop('organization'))

        bill = data.pop('bill')
        if bill and bill.startswith('~'):
            # unpack psuedo id and apply filter in case there are any that alter it
            bill = get_pseudo_id(bill)
            self.bill_importer.apply_transformers(bill)
            bill = _make_pseudo_id(**bill)

        data['bill_id'] = self.bill_importer.resolve_json_id(bill)
        bill_action = data.pop('bill_action')
        if bill_action:
            try:
                action = BillAction.objects.get(
                    bill_id=data['bill_id'],
                    description=bill_action,
                    date=data['start_date'],
                    organization_id=data['organization_id'],
                )
                # seen_action_ids is for ones being added in this import
                # action.vote is already set if action was set on prior import
                if action.id in self.seen_action_ids or hasattr(
                        action, 'vote'):
                    self.warning('can not match two VoteEvents to %s: %s',
                                 action.id, bill_action)
                else:
                    data['bill_action_id'] = action.id
                    self.seen_action_ids.add(action.id)
            except BillAction.DoesNotExist:
                self.warning('could not match VoteEvent to %s %s %s', bill,
                             bill_action, data['start_date'])
            except BillAction.MultipleObjectsReturned as e:
                self.warning('could not match VoteEvent to %s %s %s: %s', bill,
                             bill_action, data['start_date'], e)

        for vote in data['votes']:
            vote['voter_id'] = self.person_importer.resolve_json_id(
                vote['voter_id'], allow_no_match=True)
        return data
Example #21
0
File: base.py Project: rshorey/pupa
    def resolve_json_id(self, json_id):
        """
            Given an id found in scraped JSON, return a DB id for the object.

            params:
                json_id:    id from json

            returns:
                database id

            raises:
                ValueError if id couldn't be resolved
        """
        if not json_id:
            return None

        if json_id.startswith('~'):
            # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import
            if json_id not in self.pseudo_id_cache:
                spec = get_pseudo_id(json_id)
                spec = self.limit_spec(spec)
                try:
                    self.pseudo_id_cache[
                        json_id] = self.model_class.objects.get(**spec).id
                except self.model_class.DoesNotExist:
                    raise UnresolvedIdError(
                        'cannot resolve pseudo id to {}: {}'.format(
                            self.model_class.__name__, json_id))
                except self.model_class.MultipleObjectsReturned:
                    raise UnresolvedIdError(
                        'multiple objects returned for pseudo id to {}: {}'.
                        format(self.model_class.__name__, json_id))

            # return the cached object
            return self.pseudo_id_cache[json_id]

        # get the id that the duplicate points to, or use self
        json_id = self.duplicates.get(json_id, json_id)

        try:
            return self.json_to_db_id[json_id]
        except KeyError:
            raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
Example #22
0
    def prepare_for_db(self, data):
        data['jurisdiction_id'] = self.jurisdiction_id
        data['location'] = self.get_location(data['location'])

        data['start_date'] = data['start_date']
        data['end_date'] = data.get('end_date', "")

        for participant in data['participants']:
            if 'person_id' in participant:
                participant['person_id'] = self.person_importer.resolve_json_id(
                    participant['person_id'],
                    allow_no_match=True)
            elif 'organization_id' in participant:
                participant['organization_id'] = self.org_importer.resolve_json_id(
                    participant['organization_id'],
                    allow_no_match=True)

        for item in data['agenda']:
            for entity in item['related_entities']:
                if 'person_id' in entity:
                    entity['person_id'] = self.person_importer.resolve_json_id(
                        entity['person_id'],
                        allow_no_match=True)
                elif 'organization_id' in entity:
                    entity['organization_id'] = self.org_importer.resolve_json_id(
                        entity['organization_id'],
                        allow_no_match=True)
                elif 'bill_id' in entity:
                    # unpack and repack bill psuedo id in case filters alter it
                    bill = get_pseudo_id(entity['bill_id'])
                    self.bill_importer.apply_transformers(bill)
                    bill = _make_pseudo_id(**bill)
                    entity['bill_id'] = self.bill_importer.resolve_json_id(
                        bill,
                        allow_no_match=True)
                elif 'vote_event_id' in entity:
                    entity['vote_event_id'] = self.vote_event_importer.resolve_json_id(
                        entity['vote_event_id'],
                        allow_no_match=True)

        return data
Example #23
0
    def prepare_for_db(self, data):
        data['jurisdiction_id'] = self.jurisdiction_id
        if data['location']:
            data['location'] = self.get_location(data['location'])

        data['start_date'] = data['start_date']
        data['end_date'] = data.get('end_date', "")

        for participant in data['participants']:
            if 'person_id' in participant:
                participant[
                    'person_id'] = self.person_importer.resolve_json_id(
                        participant['person_id'], allow_no_match=True)
            elif 'organization_id' in participant:
                participant[
                    'organization_id'] = self.org_importer.resolve_json_id(
                        participant['organization_id'], allow_no_match=True)

        for item in data['agenda']:
            for entity in item['related_entities']:
                if 'person_id' in entity:
                    entity['person_id'] = self.person_importer.resolve_json_id(
                        entity['person_id'], allow_no_match=True)
                elif 'organization_id' in entity:
                    entity[
                        'organization_id'] = self.org_importer.resolve_json_id(
                            entity['organization_id'], allow_no_match=True)
                elif 'bill_id' in entity:
                    # unpack and repack bill psuedo id in case filters alter it
                    bill = get_pseudo_id(entity['bill_id'])
                    self.bill_importer.apply_transformers(bill)
                    bill = _make_pseudo_id(**bill)
                    entity['bill_id'] = self.bill_importer.resolve_json_id(
                        bill, allow_no_match=True)
                elif 'vote_event_id' in entity:
                    entity[
                        'vote_event_id'] = self.vote_event_importer.resolve_json_id(
                            entity['vote_event_id'], allow_no_match=True)

        return data
Example #24
0
    def resolve_json_id(self, json_id):
        """
            Given an id found in scraped JSON, return a DB id for the object.

            params:
                json_id:    id from json

            returns:
                database id

            raises:
                ValueError if id couldn't be resolved
        """
        if not json_id:
            return None

        if json_id.startswith('~'):
            # keep caches of all the pseudo-ids to avoid doing 1000s of lookups during import
            if json_id not in self.pseudo_id_cache:
                spec = get_pseudo_id(json_id)
                spec = self.limit_spec(spec)
                try:
                    self.pseudo_id_cache[json_id] = self.model_class.objects.get(**spec).id
                except self.model_class.DoesNotExist:
                    raise UnresolvedIdError('cannot resolve pseudo id to {}: {}'.format(
                        self.model_class.__name__, json_id))
                except self.model_class.MultipleObjectsReturned:
                    raise UnresolvedIdError('multiple objects returned for pseudo id to {}: {}'.format(
                        self.model_class.__name__, json_id))

            # return the cached object
            return self.pseudo_id_cache[json_id]

        # get the id that the duplicate points to, or use self
        json_id = self.duplicates.get(json_id, json_id)

        try:
            return self.json_to_db_id[json_id]
        except KeyError:
            raise UnresolvedIdError('cannot resolve id: {}'.format(json_id))
Example #25
0
    def prepare_for_db(self, data):
        data['legislative_session_id'] = self.get_session_id(
            data.pop('legislative_session'))
        data['organization_id'] = self.org_importer.resolve_json_id(
            data.pop('organization'))

        bill = data.pop('bill')
        if bill and bill.startswith('~'):
            bill = get_pseudo_id(bill)
            bill['identifier'] = fix_bill_id(bill['identifier'])
            bill = _make_pseudo_id(**bill)

        data['bill_id'] = self.bill_importer.resolve_json_id(bill)
        bill_action = data.pop('bill_action')
        if bill_action:
            try:
                action = BillAction.objects.get(
                    bill_id=data['bill_id'],
                    description=bill_action,
                    date=data['start_date'],
                    organization_id=data['organization_id'],
                )
                if action.id in self.seen_action_ids:
                    self.warning('can not match two VoteEvents to %s: %s',
                                 action.id, bill_action)
                else:
                    data['bill_action_id'] = action.id
                    self.seen_action_ids.add(action.id)
            except BillAction.DoesNotExist:
                self.warning('could not match VoteEvent to %s %s %s', bill,
                             bill_action, data['start_date'])
            except BillAction.MultipleObjectsReturned as e:
                self.warning('could not match VoteEvent to %s %s %s: %s', bill,
                             bill_action, data['start_date'], e)

        for vote in data['votes']:
            vote['voter_id'] = self.person_importer.resolve_json_id(
                vote['voter_id'], allow_no_match=True)
        return data
Example #26
0
    def prepare_for_db(self, data):
        data['legislative_session_id'] = self.get_session_id(data.pop('legislative_session'))
        data['organization_id'] = self.org_importer.resolve_json_id(data.pop('organization'))

        bill = data.pop('bill')
        if bill and bill.startswith('~'):
            # unpack psuedo id and apply filter in case there are any that alter it
            bill = get_pseudo_id(bill)
            self.bill_importer.apply_transformers(bill)
            bill = _make_pseudo_id(**bill)

        data['bill_id'] = self.bill_importer.resolve_json_id(bill)
        bill_action = data.pop('bill_action')
        if bill_action:
            try:
                action = BillAction.objects.get(bill_id=data['bill_id'],
                                                description=bill_action,
                                                date=data['start_date'],
                                                organization_id=data['organization_id'],
                                                )
                # seen_action_ids is for ones being added in this import
                # action.vote is already set if action was set on prior import
                if action.id in self.seen_action_ids or hasattr(action, 'vote'):
                    self.warning('can not match two VoteEvents to %s: %s',
                                 action.id, bill_action)
                else:
                    data['bill_action_id'] = action.id
                    self.seen_action_ids.add(action.id)
            except BillAction.DoesNotExist:
                self.warning('could not match VoteEvent to %s %s %s',
                             bill, bill_action, data['start_date'])
            except BillAction.MultipleObjectsReturned as e:
                self.warning('could not match VoteEvent to %s %s %s: %s',
                             bill, bill_action, data['start_date'], e)

        for vote in data['votes']:
            vote['voter_id'] = self.person_importer.resolve_json_id(vote['voter_id'],
                                                                    allow_no_match=True)
        return data
Example #27
0
    def prepare_for_db(self, data):
        data['jurisdiction_id'] = self.jurisdiction_id
        data['location'] = self.get_location(data['location'])

        data['start_date'] = data['start_date']
        data['end_date'] = data.get('end_date', "")

        for participant in data['participants']:
            if 'person_id' in participant:
                participant[
                    'person_id'] = self.person_importer.resolve_json_id(
                        participant['person_id'], allow_no_match=True)
            elif 'organization_id' in participant:
                participant[
                    'organization_id'] = self.org_importer.resolve_json_id(
                        participant['organization_id'], allow_no_match=True)

        for item in data['agenda']:
            for entity in item['related_entities']:
                if 'person_id' in entity:
                    entity['person_id'] = self.person_importer.resolve_json_id(
                        entity['person_id'], allow_no_match=True)
                elif 'organization_id' in entity:
                    entity[
                        'organization_id'] = self.org_importer.resolve_json_id(
                            entity['organization_id'], allow_no_match=True)
                elif 'bill_id' in entity:
                    bill = get_pseudo_id(entity['bill_id'])
                    bill['identifier'] = fix_bill_id(bill['identifier'])
                    bill = _make_pseudo_id(**bill)
                    entity['bill_id'] = self.bill_importer.resolve_json_id(
                        bill, allow_no_match=True)
                elif 'vote_event_id' in entity:
                    entity[
                        'vote_event_id'] = self.vote_event_importer.resolve_json_id(
                            entity['vote_event_id'], allow_no_match=True)

        return data
def test_person_add_party():
    p = Person('Groot')
    p.add_party('Green')
    p._related[0].validate()
    assert get_pseudo_id(p._related[0].organization_id) == {
        'name': 'Green', 'classification': 'party'}
def test_committee_add_member_name():
    c = Organization('Defense', classification='committee')
    c.add_member('John Adams')
    assert get_pseudo_id(c._related[0].person_id) == {'name': 'John Adams'}
    assert c._related[0].organization_id == c._id
    assert c._related[0].role == 'member'
Example #30
0
def test_set_bill_pseudo_id():
    v = toy_vote()
    v.set_bill('HB 1', chamber='lower')
    assert get_pseudo_id(v.bill) == {'identifier': 'HB 1',
                                     'from_organization__classification': 'lower'}
Example #31
0
def test_committee_add_member_name():
    c = Organization('Defense', classification='committee')
    c.add_member('John Adams')
    assert get_pseudo_id(c._related[0].person_id) == {'name': 'John Adams'}
    assert c._related[0].organization_id == c._id
    assert c._related[0].role == 'member'
Example #32
0
def test_vote_event_org_dict():
    odict = {'name': 'Random Committee', 'classification': 'committee'}
    ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill",
                   start_date="2009-01-07", result='pass', classification='bill-passage',
                   organization=odict)
    assert get_pseudo_id(ve.organization) == odict
Example #33
0
def test_vote_event_org_chamber():
    ve = VoteEvent(legislative_session="2009", motion_text="passage of the bill",
                   start_date="2009-01-07", result='pass', classification='bill-passage',
                   chamber='upper')
    assert get_pseudo_id(ve.organization) == {'classification': 'upper'}