コード例 #1
0
ファイル: utils.py プロジェクト: cmonagle/scrapers-ca
    def get_organizations(self):
        exclude_type_ids = getattr(self, 'exclude_type_ids', [])
        use_type_id = getattr(self, 'use_type_id', False)

        organization = Organization(self.name, classification=self.classification)

        parent = Division.get(self.division_id)
        if parent._type not in ('province', 'territory'):
            post = Post(role=styles_of_address[self.division_id]['Leader'], label=parent.name, division_id=parent.id, organization_id=organization._id)
            yield post

        children = [child for child in parent.children() if child._type != 'place' and child._type not in exclude_type_ids]

        for child in children:
            if child:
                if use_type_id:
                    label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
                else:
                    label = child.name
                post = Post(role=styles_of_address[self.division_id]['Member'], label=label, division_id=child.id, organization_id=organization._id)
                yield post

        if not children and parent.attrs['posts_count']:
            for i in range(1, int(parent.attrs['posts_count'])):  # exclude Mayor
                organization.add_post(role=styles_of_address[self.division_id]['Member'], label='{} (seat {})'.format(parent.name, i), division_id=parent.id)

        yield organization
コード例 #2
0
    def get_organizations(self):
        organization = Organization(self.name, classification=self.classification)

        leader_role = styles_of_address[self.division_id]['Leader']
        member_role = self.member_role or styles_of_address[self.division_id]['Member']

        parent = Division.get(self.division_id)
        # Don't yield posts for premiers.
        if parent._type not in ('province', 'territory'):
            # Yield posts to allow ca_on_toronto to make changes.
            post = Post(role=leader_role, label=parent.name, division_id=parent.id, organization_id=organization._id)
            yield post

        children = [child for child in parent.children() if child._type != 'place' and child._type not in self.exclude_types]

        for child in children:
            if not self.skip_null_valid_from and not child.attrs.get('validFrom') or child.attrs.get('validFrom') and (child.attrs['validFrom'] <= datetime.now().strftime('%Y-%m-%d') or child.attrs['validFrom'] == self.valid_from):
                if self.use_type_id:
                    label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
                else:
                    label = child.name
                # Yield posts to allow ca_on_toronto to make changes.
                post = Post(role=member_role, label=label, division_id=child.id, organization_id=organization._id)
                yield post

        if not children and parent.attrs['posts_count']:
            for i in range(1, int(parent.attrs['posts_count'])):  # exclude Mayor
                organization.add_post(role=member_role, label='{} (seat {})'.format(parent.name, i), division_id=parent.id)

        yield organization
コード例 #3
0
def test_deduplication():
    Organization.objects.create(id='us', name="United States Executive Branch",
                                classification="executive", jurisdiction_id="us")
    Organization.objects.create(id='nc', name="North Carolina Executive Branch",
                                classification="executive", jurisdiction_id="nc")
    pres = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}')
    vp = ScrapePost(label='vice-executive', role='Vice President',
                    organization_id='~{"classification": "executive"}')
    gov = ScrapePost(label='executive', role='Governor',
                     organization_id='~{"classification": "executive"}')

    # ensure pres, vp and gov are all imported
    #   pres & gov - same label, different jurisdiction
    #   vp & pres - same jurisdiction, different label
    us_oi = OrganizationImporter('us')
    nc_oi = OrganizationImporter('nc')
    PostImporter('us', us_oi).import_data([pres.as_dict(), vp.as_dict()])
    PostImporter('nc', nc_oi).import_data([gov.as_dict()])
    assert Post.objects.count() == 3

    # ensure changing the role is allowed
    pres = ScrapePost(label='executive', role='King',
                      organization_id='~{"classification": "executive"}')
    PostImporter('us', us_oi).import_data([pres.as_dict()])

    # no new object, just an update for role
    assert Post.objects.count() == 3
    assert Post.objects.get(organization_id='us', label='executive').role == 'King'
コード例 #4
0
def test_full_post():
    create_jurisdictions()
    org = Organization.objects.create(name="United States Executive Branch",
                                      classification="executive",
                                      jurisdiction_id="us")
    post = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}',
                      start_date=datetime.date(2015, 5, 18),
                      end_date='2015-05-19',
                      maximum_memberships=2
                      )
    post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    post.add_link('http://example.com/link')

    # import post
    oi = OrganizationImporter('us')
    PostImporter('jurisdiction-id', oi).import_data([post.as_dict()])
    print(post.as_dict())

    # get person from db and assert it imported correctly
    p = Post.objects.get()
    assert 'ocd-post' in p.id
    assert p.label == post.label
    assert p.role == post.role
    assert p.organization_id == org.id
    assert p.maximum_memberships == 2

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'

    assert p.start_date == '2015-05-18'
    assert p.end_date == '2015-05-19'
コード例 #5
0
def test_full_post():
    org = Organization.objects.create(name="United States Executive Branch",
                                      classification="executive",
                                      jurisdiction_id="jurisdiction-id")
    post = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}')
    post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    post.add_link('http://example.com/link')

    # import post
    oi = OrganizationImporter('jurisdiction-id')
    PostImporter('jurisdiction-id', oi).import_data([post.as_dict()])

    # get person from db and assert it imported correctly
    p = Post.objects.get()
    assert 'ocd-post' in p.id
    assert p.label == post.label
    assert p.role == post.role
    assert p.organization_id == org.id

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'
コード例 #6
0
def test_basic_post():
    post = Post(label='1', role='Representative', organization_id='fake_org')
    assert '1' in str(post)
    post.validate()
コード例 #7
0
def test_basic_invalid_post():
    post = Post(label=1, role='Representative', organization_id='fake_org')
    with pytest.raises(ValueError):
        post.validate()
コード例 #8
0
ファイル: people.py プロジェクト: spatialbits/scrapers-ca
    def scrape(self):
        organizations = {}
        seat_numbers = defaultdict(lambda: defaultdict(int))

        reader = self.csv_reader(self.csv_url,
                                 delimiter=self.delimiter,
                                 header=True,
                                 encoding=self.encoding,
                                 skip_rows=self.skip_rows)
        reader.fieldnames = [
            self.header_converter(field) for field in reader.fieldnames
        ]
        for row in reader:

            try:
                if self.is_valid_row(row):
                    for key, corrections in self.corrections.items():
                        if not isinstance(corrections, dict):
                            row[key] = corrections(row[key])
                        elif row[key] in corrections:
                            row[key] = corrections[row[key]]

                    organization_classification = 'legislature'

                    organization_name = row['organization']
                    organization_key = organization_name.lower()
                    if organization_key in organizations:
                        organization = organizations[organization_key]
                    else:
                        organization = Organization(
                            organization_name,
                            classification=organization_classification)
                        organization.add_source(self.csv_url)
                        yield organization
                        organizations[organization_key] = organization

                    if not row['primary role']:
                        row['primary role'] = 'Councillor'

                    role = row['primary role']

                    post = Post(role=role,
                                label=organization_name,
                                organization_id=organization._id)
                    yield post

                    name = row['name'].strip(' .,')

                    district = row['district name']

                    if self.many_posts_per_area and role not in self.unique_roles:
                        seat_numbers[role][district] += 1
                        district = '{} (seat {})'.format(
                            district, seat_numbers[role][district])

                    p = Person(primary_org=organization_classification,
                               name=name,
                               district=district,
                               role=role,
                               party=row.get('party name'))
                    p.add_source(self.csv_url)

                    if row.get('gender'):
                        p.gender = row['gender']
                    if row.get('photo url'):
                        p.image = row['photo url']

                    if row.get('source url'):
                        p.add_source(row['source url'].strip(' .,'))

                    if row.get('website'):
                        p.add_link(row['website'], note='web site')
                    if row.get('facebook'):
                        p.add_link(re.sub(r'[#?].+', '', row['facebook']))
                    if row.get('twitter'):
                        p.add_link(row['twitter'])

                    if row['email']:
                        p.add_contact('email', row['email'].strip(' .,'))
                    if row['address']:
                        p.add_contact('address', row['address'], 'legislature')
                    if row.get('phone'):
                        p.add_contact('voice', row['phone'], 'legislature')
                    if row.get('fax'):
                        p.add_contact('fax', row['fax'], 'legislature')
                    if row.get('cell'):
                        p.add_contact('cell', row['cell'], 'legislature')
                    if row.get('birth date'):
                        p.birth_date = row['birth date']

                    if row.get('incumbent'):
                        p.extras['incumbent'] = row['incumbent']

                    if name in self.other_names:
                        for other_name in self.other_names[name]:
                            p.add_name(other_name)

                    # Validate person entity so that we can catch the exception if needed.
                    p.validate()

                    yield p
            except Exception as e:
                print(repr(e))
                continue
コード例 #9
0
def test_basic_post():
    post = Post(label='1', role='Representative', organization_id='fake_org')
    assert '1' in str(post)
    post.validate()
コード例 #10
0
def test_basic_invalid_post():
    post = Post(label=1, role='Representative', organization_id='fake_org')
    with pytest.raises(ValueError):
        post.validate()
コード例 #11
0
    def scrape_current_legislators(self, repos):
        for repo in repos:
            CURRENT_LEGISLATORS = self.get_url(repo)

            people = self.yamlize(CURRENT_LEGISLATORS)
            parties = set()
            posts = {}
            person_cache = defaultdict(lambda: defaultdict(lambda: None))

            for person in people:
                name = person['name'].get('official_full')
                if name is None:
                    name = "{name[first]} {name[last]}".format(**person)

                if 'birthday' in person['bio']:
                    birth_date = person['bio']['birthday']

                who = person_cache[name][birth_date]
                has_term = False

                if who is None:
                    who = Person(name=name, birth_date=birth_date)
                    who.add_source(url=CURRENT_LEGISLATORS,
                                   note="unitedstates project on GitHub")

                for term in person.get('terms', []):
                    has_term = True
                    start_date = term['start']
                    end_date = term['end']
                    state = term['state']
                    type_ = term['type']
                    district = term.get('district', None)
                    party = term.get('party', None)

                    chamber = {
                        'rep': 'lower',
                        'sen': 'upper',
                    }[type_]

                    role = {
                        'rep': 'Representative',
                        'sen': 'Senator',
                    }[type_]

                    if type_ == "rep" and district is not None:
                        label = "%s for District %s in %s" % (role, district,
                                                              state)

                        if district == 0:
                            division_id = (
                                "ocd-division/country:us/state:{state}".format(
                                    state=state.lower()))
                        else:
                            division_id = (
                                "ocd-division/country:us/"
                                "state:{state}/cd:{district}".format(
                                    state=state.lower(), district=district))

                        post = posts.get(division_id)
                        if post is None:
                            post = Post(organization_id={
                                "rep": self.house,
                                "sen": self.senate
                            }[type_]._id,
                                        division_id=division_id,
                                        label=label,
                                        role=role)
                            posts[division_id] = post
                            yield post

                        membership = Membership(post_id=post._id,
                                                role=role,
                                                label=label,
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id={
                                                    "rep": self.house,
                                                    "sen": self.senate,
                                                }[type_]._id)
                        yield membership

                    if type_ == "sen":

                        division_id = (
                            "ocd-division/country:us/state:{state}".format(
                                state=state.lower()))

                        label = "Senitor for %s" % (state)

                        post = posts.get(division_id)
                        if post is None:
                            post = Post(organization_id={
                                "rep": self.house,
                                "sen": self.senate
                            }[type_]._id,
                                        division_id=division_id,
                                        label=label,
                                        role=role)
                            posts[division_id] = post
                            yield post

                        membership = Membership(post_id=post._id,
                                                role=role,
                                                label=label,
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id={
                                                    "rep": self.house,
                                                    "sen": self.senate,
                                                }[type_]._id)
                        yield membership

                    if party == "Democrat":
                        party = "Democratic"

                    if party:
                        membership = Membership(role='member',
                                                start_date=start_date,
                                                end_date=end_date,
                                                person_id=who._id,
                                                organization_id=make_pseudo_id(
                                                    classification="party",
                                                    name=party))
                        yield membership

                for key, value in person.get('id', {}).items():
                    if isinstance(value, list):
                        for v in value:
                            who.add_identifier(str(v), scheme=key)
                    else:
                        who.add_identifier(str(value), scheme=key)

                if has_term:
                    yield who
コード例 #12
0
ファイル: test_popolo.py プロジェクト: paultag/pupa
def test_basic_post():
    post = Post('1', 'Representative', 'fake_org')
    assert '1' in str(post)
    post.validate()
コード例 #13
0
ファイル: test_popolo.py プロジェクト: paultag/pupa
def test_basic_invalid_post():
    post = Post(1, 'Representative', 'fake_org')
    with pytest.raises(ValueError):
        post.validate()
コード例 #14
0
def test_basic_post():
    post = Post('1', 'Representative', 'fake_org')
    assert '1' in str(post)
    post.validate()
コード例 #15
0
def test_basic_invalid_post():
    post = Post(1, 'Representative', 'fake_org')
    with pytest.raises(ValueError):
        post.validate()