Ejemplo n.º 1
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(
            self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'],
                                     classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'],
                                     scheme=TWO_LETTER_ORG_CODE_SCHEME)
                extras['tmmis_decision_body_ids'].append(
                    {inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 2
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'], classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)
                extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 3
0
    def scrape_committees(self, repos):
        for repo in repos:
            source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(repo)
            committees = self.fetch_yaml(source)
            for committee in committees:
                org = Organization(committee["name"], classification="committee")

                org.add_source(source)

                for key in committee.keys() & {"url", "rss_url"}:
                    org.add_link(committee[key])

                for key in committee.keys() & {"phone", "address"}:
                    org.add_contact_detail(
                        type="voice", value=committee[key]
                    ) if key == "phone" else org.add_contact_detail(type=key, value=committee[key])

                for key in committee.keys() & {"senate_committee_id", "house_committee_id", "thomas_id"}:
                    org.add_identifier(committee[key], scheme=key)

                if "subcommittees" in committee:
                    for subcommittee in committee["subcommittees"]:
                        sub_org = Organization(subcommittee["name"], classification="committee", parent_id=org._id)

                        sub_org.add_identifier(subcommittee["thomas_id"], scheme="thomas")
                        sub_org.add_source(source)

                        for key in subcommittee.keys() & {"phone", "address"}:
                            sub_org.add_contact_detail(
                                type="voice", value=committee[key]
                            ) if key == "phone" else sub_org.add_contact_detail(type=key, value=committee[key])

                        yield sub_org

                yield org
Ejemplo n.º 4
0
def test_full_organization():
    create_jurisdictions()
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone',
                           value='555-555-1234',
                           note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
Ejemplo n.º 5
0
def test_full_organization():
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jurisdiction-id').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
Ejemplo n.º 6
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(
            self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            # When there are no meetings scheduled and was no way to deduce committee code.
            if not code:
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'],
                                     classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'],
                                     scheme=TWO_LETTER_ORG_CODE_SCHEME)

                    # TODO: Scrape non-councillor members
                    meeting_id = self.referenceMeetingId(
                        inst['code'], inst['term'])
                    if meeting_id:
                        seen_posts = []
                        membership_url = MEMBERSHIP_URL_TEMPLATE.format(
                            meeting_id)
                        for councillor in self.councillorMembers(
                                membership_url):
                            o.add_member(councillor['name'],
                                         councillor['role'])
                            if councillor['role'] not in seen_posts:
                                # TODO: More specific divisions for some committee?
                                o.add_post(
                                    role=councillor['role'],
                                    label=councillor['role'],
                                    division_id=self.jurisdiction.division_id)
                                seen_posts.append(councillor['role'])

                extras['tmmis_decision_body_ids'].append(
                    {inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 7
0
    def scrape(self):
        sessions = reversed(self.jurisdiction.legislative_sessions)
        committee_term_instances = committees_from_sessions(self, sessions)
        committees_by_code = build_lookup_dict(self, data_list=committee_term_instances, index_key='code')

        for code, instances in committees_by_code.items():
            # TODO: Figure out how to edit city council org.
            if code == 'CC':
                continue

            # When there are no meetings scheduled and was no way to deduce committee code.
            if not code:
                continue

            extras = {'tmmis_decision_body_ids': []}
            for i, inst in enumerate(instances):
                # TODO: Ensure this survives addition of new term (2017)
                #       so specific year always creates
                canonical_i = 0
                if i == canonical_i:
                    o = Organization(name=inst['name'], classification='committee')
                    extras.update({'description': inst['info']})
                    o.add_identifier(inst['code'], scheme=TWO_LETTER_ORG_CODE_SCHEME)

                    # TODO: Scrape non-councillor members
                    meeting_id = self.referenceMeetingId(inst['code'], inst['term'])
                    if meeting_id:
                        seen_posts = []
                        membership_url = MEMBERSHIP_URL_TEMPLATE.format(meeting_id)
                        for councillor in self.councillorMembers(membership_url):
                            o.add_member(councillor['name'], councillor['role'])
                            if councillor['role'] not in seen_posts:
                                o.add_post(
                                    role=councillor['role'],
                                    label=councillor['role'],
                                    # TODO: More specific divisions for some committee?
                                    division_id=self.jurisdiction.division_id,
                                )
                                seen_posts.append(councillor['role'])

                extras['tmmis_decision_body_ids'].append({inst['term']: inst['decision_body_id']})
                o.extras = extras
                o.add_source(inst['source_url'])
                if instances[canonical_i]['name'] != inst['name']:
                    # TODO: Add start_date and end_date
                    o.add_name(inst['name'])

            yield o
Ejemplo n.º 8
0
    def scrape_committees(self, repos):
        for repo in repos:
            source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(
                repo)
            committees = self.fetch_yaml(source)
            for committee in committees:
                org = Organization(committee['name'],
                                   classification='committee')

                org.add_source(source)

                for key in committee.keys() & {'url', 'rss_url'}:
                    org.add_link(committee[key])

                for key in committee.keys() & {'phone', 'address'}:
                    org.add_contact_detail(
                        type='voice', value=committee[key]
                    ) if key == 'phone' else org.add_contact_detail(
                        type=key, value=committee[key])

                for key in committee.keys() & {
                        'senate_committee_id', 'house_committee_id',
                        'thomas_id'
                }:
                    org.add_identifier(committee[key], scheme=key)

                if 'subcommittees' in committee:
                    for subcommittee in committee['subcommittees']:
                        sub_org = Organization(subcommittee['name'],
                                               classification="committee",
                                               parent_id=org._id)

                        sub_org.add_identifier(subcommittee['thomas_id'],
                                               scheme="thomas")
                        sub_org.add_source(source)

                        for key in subcommittee.keys() & {'phone', 'address'}:
                            sub_org.add_contact_detail(
                                type='voice', value=committee[key]
                            ) if key == 'phone' else sub_org.add_contact_detail(
                                type=key, value=committee[key])

                        yield sub_org

                yield org
Ejemplo n.º 9
0
    def scrape(self, session=None):
        if session is None:
            session = self.latest_session()
            self.info('no session specified, using %s', session)

        # com_types = ['J', 'SE', 'O']
        # base_url = 'https://wyoleg.gov/LsoService/api/committeeList/2018/J'
        url = 'https://wyoleg.gov/LsoService/api/committees/{}'.format(session)

        response = self.get(url)
        coms_json = json.loads(response.content.decode('utf-8'))

        for row in coms_json:
            com_url = 'https://wyoleg.gov/LsoService/api/committeeDetail/{}/{}'.format(
                session, row['ownerID'])
            com_response = self.get(com_url)
            com = json.loads(com_response.content.decode('utf-8'))

            # WY doesn't seem to have any house/senate only committees that I can find
            committee = Organization(name=com['commName'],
                                     chamber='legislature',
                                     classification='committee')

            for member in com['commMembers']:
                role = 'chairman' if member[
                    'chairman'] == 'Chairman' else 'member'
                committee.add_member(member['name'], role)

            # some WY committees have non-legislators appointed to the member by the Governor
            # but the formatting is super inconsistent
            if com['otherMembers']:
                committee.extras['other_members'] = com['otherMembers']

            committee.extras['wy_id'] = com['commID']
            committee.extras['wy_code'] = com['ownerID']
            committee.extras['wy_type_code'] = com['type']
            committee.extras['budget'] = com['budget']

            if com['statAuthority']:
                committee.extras['statutory_authority'] = com['statAuthority']

            if com['number']:
                committee.extras['seat_distribution'] = com['number']

            committee.add_identifier(scheme='WY Committee ID',
                                     identifier=str(com['commID']))
            committee.add_identifier(scheme='WY Committee Code',
                                     identifier=str(com['ownerID']))

            if com['description']:
                committee.add_identifier(scheme='Common Name',
                                         identifier=com['description'])

            source_url = 'http://wyoleg.gov/Committees/{}/{}'.format(
                session, com['ownerID'])
            committee.add_source(source_url)

            yield committee
Ejemplo n.º 10
0
    def scrape(self, session=None):
        if session is None:
            session = self.latest_session()
            self.info("no session specified, using %s", session)

        # com_types = ['J', 'SE', 'O']
        # base_url = 'https://wyoleg.gov/LsoService/api/committeeList/2018/J'
        url = "https://wyoleg.gov/LsoService/api/committees/{}".format(session)

        response = self.get(url)
        coms_json = json.loads(response.content.decode("utf-8"))

        for row in coms_json:
            com_url = "https://wyoleg.gov/LsoService/api/committeeDetail/{}/{}".format(
                session, row["ownerID"])
            com_response = self.get(com_url)
            com = json.loads(com_response.content.decode("utf-8"))

            # WY doesn't seem to have any house/senate only committees that I can find
            committee = Organization(name=com["commName"],
                                     chamber="legislature",
                                     classification="committee")

            for member in com["commMembers"]:
                role = "chairman" if member[
                    "chairman"] == "Chairman" else "member"
                committee.add_member(member["name"], role)

            # some WY committees have non-legislators appointed to the member by the Governor
            # but the formatting is super inconsistent
            if com["otherMembers"]:
                committee.extras["other_members"] = com["otherMembers"]

            committee.extras["wy_id"] = com["commID"]
            committee.extras["wy_code"] = com["ownerID"]
            committee.extras["wy_type_code"] = com["type"]
            committee.extras["budget"] = com["budget"]

            if com["statAuthority"]:
                committee.extras["statutory_authority"] = com["statAuthority"]

            if com["number"]:
                committee.extras["seat_distribution"] = com["number"]

            committee.add_identifier(scheme="WY Committee ID",
                                     identifier=str(com["commID"]))
            committee.add_identifier(scheme="WY Committee Code",
                                     identifier=str(com["ownerID"]))

            if com["description"]:
                committee.add_identifier(scheme="Common Name",
                                         identifier=com["description"])

            source_url = "http://wyoleg.gov/Committees/{}/{}".format(
                session, com["ownerID"])
            committee.add_source(source_url)

            yield committee
Ejemplo n.º 11
0
    def scrape(self, session=None):
        if session is None:
            session = self.latest_session()
            self.info('no session specified, using %s', session)

        # com_types = ['J', 'SE', 'O']
        # base_url = 'https://wyoleg.gov/LsoService/api/committeeList/2018/J'
        url = 'https://wyoleg.gov/LsoService/api/committees/{}'.format(session)

        response = self.get(url)
        coms_json = json.loads(response.content.decode('utf-8'))

        for row in coms_json:
            com_url = 'https://wyoleg.gov/LsoService/api/committeeDetail/{}/{}'.format(
                session, row['ownerID'])
            com_response = self.get(com_url)
            com = json.loads(com_response.content.decode('utf-8'))

            # WY doesn't seem to have any house/senate only committees that I can find
            committee = Organization(
                name=com['commName'], chamber='legislature', classification='committee')

            for member in com['commMembers']:
                role = 'chairman' if member['chairman'] == 'Chairman' else 'member'
                committee.add_member(member['name'], role)

            # some WY committees have non-legislators appointed to the member by the Governor
            # but the formatting is super inconsistent
            if com['otherMembers']:
                committee.extras['other_members'] = com['otherMembers']

            committee.extras['wy_id'] = com['commID']
            committee.extras['wy_code'] = com['ownerID']
            committee.extras['wy_type_code'] = com['type']
            committee.extras['budget'] = com['budget']

            if com['statAuthority']:
                committee.extras['statutory_authority'] = com['statAuthority']

            if com['number']:
                committee.extras['seat_distribution'] = com['number']

            committee.add_identifier(
                scheme='WY Committee ID', identifier=str(com['commID']))
            committee.add_identifier(
                scheme='WY Committee Code', identifier=str(com['ownerID']))

            if com['description']:
                committee.add_identifier(
                    scheme='Common Name', identifier=com['description'])

            source_url = 'http://wyoleg.gov/Committees/{}/{}'.format(
                session, com['ownerID'])
            committee.add_source(source_url)

            yield committee
Ejemplo n.º 12
0
    def scrape_committee(self, committee_id):
        old = self.api('committees/' + committee_id + '?')
        id = old.pop('id')
        old.pop('created_at')
        old.pop('updated_at')
        old.pop('country', None)
        old.pop('level', None)
        old.pop('state')
        old.pop('votesmart_id', None)
        old.pop('+short_name', None)
        old.pop('+session', None)
        old.pop('+az_committee_id', None)

        com = old.pop('committee')
        sub = old.pop('subcommittee')
        parent_id = old.pop('parent_id')
        chamber = old.pop('chamber')
        if chamber == 'joint':
            chamber = ''
        if self.state in('ne', 'dc'):
            chamber = 'legislature'

        if sub:
            if parent_id:
                parent = self._committees[parent_id]._id
                new = Organization(sub, parent_id=parent, classification='committee')
            else:
                new = Organization(com + ': ' + sub, chamber=chamber, classification='committee')
        else:
            new = Organization(com, chamber=chamber, classification='committee')
            assert parent_id is None

        # all_ids
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')
            self._committees[id] = new

        # sources
        for source in old.pop('sources'):
            new.add_source(**source)

        # members
        start, end = self.get_term_years()
        for role in old.pop('members'):
            # leg_id, com_id, role, start, end
            if role['leg_id']:
                self._roles.add((role['leg_id'], id, role['role'], start, end))

        to_extras = ['+twitter', '+description', '+code', '+secretary', '+office_hours',
                     '+office_phone', '+meetings_info', '+status', '+aide', '+contact_info',
                     '+comm_type', 'comm_type', 'aide', 'contact_info', '+town_represented',
                     '+action_code',
                    ]
        for k in to_extras:
            v = old.pop(k, None)
            if v:
                new.extras[k.replace('+', '')] = v

        assert not old, old.keys()

        return new
Ejemplo n.º 13
0
    def scrape_committee(self, committee_id):
        old = self.api('committees/' + committee_id + '?')
        id = old.pop('id')
        old.pop('created_at')
        old.pop('updated_at')
        old.pop('country', None)
        old.pop('level', None)
        old.pop('state')
        old.pop('votesmart_id', None)
        old.pop('+short_name', None)
        old.pop('+session', None)
        old.pop('+az_committee_id', None)

        com = old.pop('committee')
        sub = old.pop('subcommittee')
        parent_id = old.pop('parent_id')
        chamber = old.pop('chamber')
        if chamber == 'joint':
            chamber = ''
        if self.state in ('ne', 'dc'):
            chamber = 'legislature'

        if sub:
            if parent_id:
                parent = self._committees[parent_id]._id
                new = Organization(sub,
                                   parent_id=parent,
                                   classification='committee')
            else:
                new = Organization(com + ': ' + sub,
                                   chamber=chamber,
                                   classification='committee')
        else:
            new = Organization(com,
                               chamber=chamber,
                               classification='committee')
            assert parent_id is None

        # all_ids
        for id in old.pop('all_ids'):
            new.add_identifier(id, scheme='openstates')
            self._committees[id] = new

        # sources
        for source in old.pop('sources'):
            new.add_source(**source)

        # members
        start, end = self.get_term_years()
        for role in old.pop('members'):
            # leg_id, com_id, role, start, end
            if role['leg_id']:
                self._roles.add((role['leg_id'], id, role['role'], start, end))

        to_extras = [
            '+twitter',
            '+description',
            '+code',
            '+secretary',
            '+office_hours',
            '+office_phone',
            '+meetings_info',
            '+status',
            '+aide',
            '+contact_info',
            '+comm_type',
            'comm_type',
            'aide',
            'contact_info',
            '+town_represented',
            '+action_code',
        ]
        for k in to_extras:
            v = old.pop(k, None)
            if v:
                new.extras[k.replace('+', '')] = v

        assert not old, old.keys()

        return new
Ejemplo n.º 14
0
    def scrape(self):
        current_path = Path(__file__)
        committee_path = current_path.parent / 'congress-legislators/committees-historical.yaml'

        with committee_path.open() as f:
            committees = yaml.load(f, Loader=yaml.CLoader)

        for committee in committees:
            if committee['type'] == 'house':
                chamber = 'lower'
            elif committee['type'] == 'senate':
                chamber = 'upper'
            else:
                print(committee)
                raise

            names_int = {
                int(key): name
                for key, name in committee['names'].items()
            }
            _, current_name = max(names_int.items())

            if chamber == 'lower':
                current_name = 'House Committee on ' + current_name
            elif chamber == 'upper':
                current_name = 'Senate Committee on ' + current_name

            c = Organization(current_name,
                             classification='committee',
                             chamber=chamber)

            start, end = duration(committee)

            c.founding_date = start
            if end:
                c.dissolution_date = end

            c.add_identifier(committee['thomas_id'] + '00', scheme='thomas_id')

            if 'house_committee_id' in committee:
                c.add_identifier(committee['house_committee_id'],
                                 scheme='house_committee_id')
            if 'senate_committee_id' in committee:
                c.add_identifier(committee['senate_committee_id'],
                                 scheme='senate_committee_id')

            c.add_source(
                'https://github.com/unitedstates/congress-legislators/blob/master/committees-historical.yaml'
            )

            for name in set(committee['names'].values()):
                if chamber == 'lower':
                    name = 'House Committee on ' + name
                elif chamber == 'upper':
                    name = 'Senate Committee on ' + name

                if name != c.name:
                    c.add_name(name)

            yield c

            for subcommittee in committee.get('subcommittees', []):

                names_int = {
                    int(key): name
                    for key, name in subcommittee['names'].items()
                }
                _, current_name = max(names_int.items())

                sc = Organization('Subcommittee on ' + current_name,
                                  classification='committee',
                                  parent_id=c)

                start, end = duration(subcommittee)

                sc.founding_date = start
                if end:
                    sc.dissolution_date = end

                thomas_id = (committee['thomas_id'] +
                             subcommittee['thomas_id'])
                sc.add_identifier(thomas_id, scheme='thomas_id')

                sc.add_source(
                    'https://github.com/unitedstates/congress-legislators/blob/master/committees-historical.yaml'
                )

                if thomas_id == 'SSJU12':
                    sc.add_identifier('SSJU15', scheme='thomas_id')
                elif thomas_id == 'SSJU15':
                    continue
                if 'Oversight and Investigations' in sc.name:
                    print(thomas_id)
                    #input()

                for name in set(subcommittee['names'].values()):
                    name = 'Subcommittee on ' + name
                    if name != sc.name:
                        sc.add_name(name)

                yield sc