Ejemplo n.º 1
0
def test_full_organization():
    create_jurisdictions()
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone',
                           value='555-555-1234',
                           note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
Ejemplo n.º 2
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(
            self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'],
                                     classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'],
                                     scheme=TWO_LETTER_ORG_CODE_SCHEME)
                extras['tmmis_decision_body_ids'].append(
                    {inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 3
0
def test_full_organization():
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jurisdiction-id').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
Ejemplo n.º 4
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'], classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)
                extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
def test_deduplication_other_name_overlaps():
    create_jurisdictions()
    create_org()
    org = ScrapeOrganization('The United Nations', classification='international')
    org.add_name('United Nations')
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])
    assert Organization.objects.all().count() == 1
Ejemplo n.º 6
0
def test_deduplication_other_name_overlaps():
    create_jurisdictions()
    create_org()
    org = ScrapeOrganization('The United Nations',
                             classification='international')
    org.add_name('United Nations')
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])
    assert Organization.objects.all().count() == 1
Ejemplo n.º 7
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(
            self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            # When there are no meetings scheduled and was no way to deduce committee code.
            if not code:
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'],
                                     classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'],
                                     scheme=TWO_LETTER_ORG_CODE_SCHEME)

                    # TODO: Scrape non-councillor members
                    meeting_id = self.referenceMeetingId(
                        inst['code'], inst['term'])
                    if meeting_id:
                        seen_posts = []
                        membership_url = MEMBERSHIP_URL_TEMPLATE.format(
                            meeting_id)
                        for councillor in self.councillorMembers(
                                membership_url):
                            o.add_member(councillor['name'],
                                         councillor['role'])
                            if councillor['role'] not in seen_posts:
                                # TODO: More specific divisions for some committee?
                                o.add_post(
                                    role=councillor['role'],
                                    label=councillor['role'],
                                    division_id=self.jurisdiction.division_id)
                                seen_posts.append(councillor['role'])

                extras['tmmis_decision_body_ids'].append(
                    {inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 8
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            # When there are no meetings scheduled and was no way to deduce committee code.
            if not code:
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'], classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)

                    # TODO: Scrape non-councillor members
                    meeting_id = self.referenceMeetingId(inst['code'], inst['term'])
                    if meeting_id:
                        seen_posts = []
                        membership_url = MEMBERSHIP_URL_TEMPLATE.format(meeting_id)
                        for councillor in self.councillorMembers(membership_url):
                            o.add_member(councillor['name'], councillor['role'])
                            if councillor['role'] not in seen_posts:
                                o.add_post(
                                    role=councillor['role'],
                                    label=councillor['role'],
                                    # TODO: More specific divisions for some committee?
                                    division_id=self.jurisdiction.division_id,
                                )
                                seen_posts.append(councillor['role'])

                extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
def test_deduplication_error_overlaps():
    create_jurisdictions()

    Organization.objects.create(name='World Wrestling Federation',
                                classification='international',
                                jurisdiction_id='jid1')
    wildlife = Organization.objects.create(name='World Wildlife Fund',
                                           classification='international',
                                           jurisdiction_id='jid1')
    wildlife.other_names.create(name='WWF')

    org = ScrapeOrganization('World Wrestling Federation', classification='international')
    org.add_name('WWF')
    od = org.as_dict()
    with pytest.raises(SameOrgNameError):
        OrganizationImporter('jid1').import_data([od])
Ejemplo n.º 10
0
def test_deduplication_error_overlaps():
    create_jurisdictions()

    Organization.objects.create(name='World Wrestling Federation',
                                classification='international',
                                jurisdiction_id='jid1')
    wildlife = Organization.objects.create(name='World Wildlife Fund',
                                           classification='international',
                                           jurisdiction_id='jid1')
    wildlife.other_names.create(name='WWF')

    org = ScrapeOrganization('World Wrestling Federation',
                             classification='international')
    org.add_name('WWF')
    od = org.as_dict()
    with pytest.raises(SameOrgNameError):
        OrganizationImporter('jid1').import_data([od])
Ejemplo n.º 11
0
def test_deduplication_overlap_name_distinct_juris():
    create_jurisdictions()

    org_jid_1 = Organization.objects.create(name='World Wrestling Federation',
                                            classification='international',
                                            jurisdiction_id='jid1')
    org_jid_1.other_names.create(name='WWF')

    org = ScrapeOrganization(name="WWF", classification="international")
    org.add_name('WWF')

    oi1 = OrganizationImporter('jid1')
    oi1.import_item(org.as_dict())
    assert Organization.objects.count() == 1

    oi2 = OrganizationImporter('jid2')
    oi2.import_item(org.as_dict())
    assert Organization.objects.count() == 2
def test_deduplication_overlap_name_distinct_juris():
    create_jurisdictions()

    org_jid_1 = Organization.objects.create(name='World Wrestling Federation',
                                            classification='international',
                                            jurisdiction_id='jid1')
    org_jid_1.other_names.create(name='WWF')

    org = ScrapeOrganization(name="WWF", classification="international")
    org.add_name('WWF')

    oi1 = OrganizationImporter('jid1')
    oi1.import_item(org.as_dict())
    assert Organization.objects.count() == 1

    oi2 = OrganizationImporter('jid2')
    oi2.import_item(org.as_dict())
    assert Organization.objects.count() == 2
Ejemplo n.º 13
0
def test_locked_field_subitem():
    org = ScrapeOrganization('SHIELD')
    org.add_name('S.H.I.E.L.D.')
    oi = OrganizationImporter('jid')
    oi.import_data([org.as_dict()])

    # lock the field
    o = Organization.objects.get()
    o.locked_fields = ['other_names']
    o.save()

    # reimport
    org = ScrapeOrganization('SHIELD').as_dict()
    oi = OrganizationImporter('jid')
    oi.import_data([org])

    o = Organization.objects.get()
    assert o.other_names.get().name == 'S.H.I.E.L.D.'
Ejemplo n.º 14
0
def test_locked_field_subitem():
    create_jurisdiction()
    org = ScrapeOrganization('SHIELD')
    org.add_name('S.H.I.E.L.D.')
    oi = OrganizationImporter('jid')
    oi.import_data([org.as_dict()])

    # lock the field
    o = Organization.objects.get()
    o.locked_fields = ['other_names']
    o.save()

    # reimport
    org = ScrapeOrganization('SHIELD').as_dict()
    oi = OrganizationImporter('jid')
    oi.import_data([org])

    o = Organization.objects.get()
    assert o.other_names.get().name == 'S.H.I.E.L.D.'
Ejemplo n.º 15
0
    def dict_to_org(self, committee):
        names = sorted(committee['name'])
        first_name = names.pop()
        if 'chamber' in committee:
            o = Organization(first_name,
                             classification='committee',
                             chamber=committee['chamber'])
        else:
            o = Organization(first_name,
                             classification='committee',
                             parent_id=committee['parent'])
        for other_name in names:
            o.add_name(other_name)
        for code in committee['code']:
            if code:
                o.add_name(code)
        for source in committee['source']:
            o.add_source(source)
            self.scrape_members(o, source)

        return o
Ejemplo n.º 16
0
    def dict_to_org(self, committee):
        names = sorted(committee['name'])
        first_name = names.pop()
        if 'chamber' in committee:
            o = Organization(first_name,
                             classification='committee',
                             chamber=committee['chamber'])
        else:
            o = Organization(first_name,
                             classification='committee',
                             parent_id=committee['parent'])
        for other_name in names:
            o.add_name(other_name)
        for code in committee['code']:
            if code:
                o.add_name(code)
        for source in committee['source']:
            o.add_source(source)
            self.scrape_members(o, source)

        return o
Ejemplo n.º 17
0
    def dict_to_org(self, committee):
        names = sorted(committee["name"])
        first_name = names.pop()
        if "chamber" in committee:
            o = Organization(
                first_name, classification="committee", chamber=committee["chamber"]
            )
        else:
            o = Organization(
                first_name, classification="committee", parent_id=committee["parent"]
            )
        for other_name in names:
            o.add_name(other_name)
        for code in committee["code"]:
            if code:
                o.add_name(code)
        for source in committee["source"]:
            o.add_source(source)
            self.scrape_members(o, source)

        return o
Ejemplo n.º 18
0
    def scrape(self):
        current_path = Path(__file__)
        committee_path = current_path.parent / 'congress-legislators/committees-historical.yaml'

        with committee_path.open() as f:
            committees = yaml.load(f, Loader=yaml.CLoader)

        for committee in committees:
            if committee['type'] == 'house':
                chamber = 'lower'
            elif committee['type'] == 'senate':
                chamber = 'upper'
            else:
                print(committee)
                raise

            names_int = {
                int(key): name
                for key, name in committee['names'].items()
            }
            _, current_name = max(names_int.items())

            if chamber == 'lower':
                current_name = 'House Committee on ' + current_name
            elif chamber == 'upper':
                current_name = 'Senate Committee on ' + current_name

            c = Organization(current_name,
                             classification='committee',
                             chamber=chamber)

            start, end = duration(committee)

            c.founding_date = start
            if end:
                c.dissolution_date = end

            c.add_identifier(committee['thomas_id'] + '00', scheme='thomas_id')

            if 'house_committee_id' in committee:
                c.add_identifier(committee['house_committee_id'],
                                 scheme='house_committee_id')
            if 'senate_committee_id' in committee:
                c.add_identifier(committee['senate_committee_id'],
                                 scheme='senate_committee_id')

            c.add_source(
                'https://github.com/unitedstates/congress-legislators/blob/master/committees-historical.yaml'
            )

            for name in set(committee['names'].values()):
                if chamber == 'lower':
                    name = 'House Committee on ' + name
                elif chamber == 'upper':
                    name = 'Senate Committee on ' + name

                if name != c.name:
                    c.add_name(name)

            yield c

            for subcommittee in committee.get('subcommittees', []):

                names_int = {
                    int(key): name
                    for key, name in subcommittee['names'].items()
                }
                _, current_name = max(names_int.items())

                sc = Organization('Subcommittee on ' + current_name,
                                  classification='committee',
                                  parent_id=c)

                start, end = duration(subcommittee)

                sc.founding_date = start
                if end:
                    sc.dissolution_date = end

                thomas_id = (committee['thomas_id'] +
                             subcommittee['thomas_id'])
                sc.add_identifier(thomas_id, scheme='thomas_id')

                sc.add_source(
                    'https://github.com/unitedstates/congress-legislators/blob/master/committees-historical.yaml'
                )

                if thomas_id == 'SSJU12':
                    sc.add_identifier('SSJU15', scheme='thomas_id')
                elif thomas_id == 'SSJU15':
                    continue
                if 'Oversight and Investigations' in sc.name:
                    print(thomas_id)
                    #input()

                for name in set(subcommittee['names'].values()):
                    name = 'Subcommittee on ' + name
                    if name != sc.name:
                        sc.add_name(name)

                yield sc