Exemple #1
0
    def do_import(self, juris, args):
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        #bill_importer = BillImporter(juris.jurisdiction_id, org_importer)
        #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
        #                             bill_importer)
        #event_importer = EventImporter(juris.jurisdiction_id)

        report = {}
        # TODO: wrap in a transaction
        report.update(juris_importer.import_directory(datadir))
        report.update(org_importer.import_directory(datadir))
        report.update(person_importer.import_directory(datadir))
        report.update(post_importer.import_directory(datadir))
        report.update(membership_importer.import_directory(datadir))
        #report.update(bill_importer.import_from_json(datadir))
        #report.update(event_importer.import_from_json(datadir))
        #report.update(vote_importer.import_from_json(datadir))

        return report
def do_import(stream, stransaction):
    stream = list(stream)
    jurisdiction_id = stransaction.jurisdiction.id

    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(
        jurisdiction_id,
        person_importer,
        org_importer,
        post_importer
    )

    report = {}

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()

    with transaction.atomic():
        report.update(org_importer.import_data(tfilter(Organization, stream)))
        report.update(person_importer.import_data(tfilter(Person, stream)))
        report.update(post_importer.import_data(tfilter(Post, stream)))
        report.update(membership_importer.import_data(
            tfilter(Membership, stream)))

    return report
Exemple #3
0
def test_deduplication():
    Organization.objects.create(id='us', name="United States Executive Branch",
                                classification="executive", jurisdiction_id="us")
    Organization.objects.create(id='nc', name="North Carolina Executive Branch",
                                classification="executive", jurisdiction_id="nc")
    pres = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}')
    vp = ScrapePost(label='vice-executive', role='Vice President',
                    organization_id='~{"classification": "executive"}')
    gov = ScrapePost(label='executive', role='Governor',
                     organization_id='~{"classification": "executive"}')

    # ensure pres, vp and gov are all imported
    #   pres & gov - same label, different jurisdiction
    #   vp & pres - same jurisdiction, different label
    us_oi = OrganizationImporter('us')
    nc_oi = OrganizationImporter('nc')
    PostImporter('us', us_oi).import_data([pres.as_dict(), vp.as_dict()])
    PostImporter('nc', nc_oi).import_data([gov.as_dict()])
    assert Post.objects.count() == 3

    # ensure changing the role is allowed
    pres = ScrapePost(label='executive', role='King',
                      organization_id='~{"classification": "executive"}')
    PostImporter('us', us_oi).import_data([pres.as_dict()])

    # no new object, just an update for role
    assert Post.objects.count() == 3
    assert Post.objects.get(organization_id='us', label='executive').role == 'King'
Exemple #4
0
    def do_import(self, juris, args):
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        #bill_importer = BillImporter(juris.jurisdiction_id, org_importer)
        #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
        #                             bill_importer)
        #event_importer = EventImporter(juris.jurisdiction_id)

        report = {}
        # TODO: wrap in a transaction
        report.update(juris_importer.import_directory(datadir))
        report.update(org_importer.import_directory(datadir))
        report.update(person_importer.import_directory(datadir))
        report.update(post_importer.import_directory(datadir))
        report.update(membership_importer.import_directory(datadir))
        #report.update(bill_importer.import_from_json(datadir))
        #report.update(event_importer.import_from_json(datadir))
        #report.update(vote_importer.import_from_json(datadir))

        return report
Exemple #5
0
def test_resolve_special_json_id():
    Organization.objects.create(id='us',
                                name="United States Executive Branch",
                                classification="executive",
                                jurisdiction_id="us")
    Organization.objects.create(id='nc',
                                name="North Carolina Executive Branch",
                                classification="executive",
                                jurisdiction_id="nc")
    Post.objects.create(id='pres',
                        label='executive',
                        role='President',
                        organization_id='us')
    Post.objects.create(id='vpres',
                        label='vice-executive',
                        role='Vice President',
                        organization_id='us')
    Post.objects.create(id='gov',
                        label='executive',
                        role='Governor',
                        organization_id='nc')

    oi = OrganizationImporter('')
    assert PostImporter(
        'us', oi).resolve_json_id('~{"label": "executive"}') == 'pres'
    assert PostImporter(
        'us', oi).resolve_json_id('~{"label": "vice-executive"}') == 'vpres'
    assert PostImporter('nc',
                        oi).resolve_json_id('~{"label": "executive"}') == 'gov'
Exemple #6
0
def do_import(stream, transaction):
    stream = list(stream)
    jurisdiction_id = transaction.jurisdiction.id

    juris_importer = JurisdictionImporter(jurisdiction_id)
    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(jurisdiction_id, person_importer,
                                             org_importer, post_importer)

    report = {}

    # This basically relates to Pupa's pupa.clu.commands.update:113
    # (From there - wrap this in a transaction.)

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()

    report.update(juris_importer.import_data(tfilter(Jurisdiction, stream)))
    report.update(org_importer.import_data(tfilter(Organization, stream)))
    report.update(person_importer.import_data(tfilter(Person, stream)))
    report.update(post_importer.import_data(tfilter(Post, stream)))

    report.update(membership_importer.import_data(tfilter(Membership, stream)))

    return report
Exemple #7
0
def do_import(stream, transaction):
    stream = list(stream)
    jurisdiction_id = transaction.jurisdiction.id

    juris_importer = JurisdictionImporter(jurisdiction_id)
    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(
        jurisdiction_id,
        person_importer,
        org_importer,
        post_importer
    )

    report = {}
    # This basically relates to Pupa's pupa.clu.commands.update:113
    # (From there - wrap this in a transaction.)

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()


    report.update(juris_importer.import_data(tfilter(Jurisdiction, stream)))
    report.update(org_importer.import_data(tfilter(Organization, stream)))
    report.update(person_importer.import_data(tfilter(Person, stream)))
    report.update(post_importer.import_data(tfilter(Post, stream)))

    report.update(membership_importer.import_data(
        tfilter(Membership, stream)))

    return report
Exemple #8
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter,
                                    PostImporter, MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        from pupa.reports import generate_session_report
        from pupa.models import SessionDataQualityReport
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer,
                                                org_importer, bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id,
                                       org_importer,
                                       person_importer,
                                       bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            if settings.ENABLE_PEOPLE_AND_ORGS:
                print('import organizations...')
                report.update(org_importer.import_directory(datadir))
                print('import people...')
                report.update(person_importer.import_directory(datadir))
                print('import posts...')
                report.update(post_importer.import_directory(datadir))
                print('import memberships...')
                report.update(membership_importer.import_directory(datadir))
            if settings.ENABLE_BILLS:
                print('import bills...')
                report.update(bill_importer.import_directory(datadir))
            if settings.ENABLE_EVENTS:
                print('import events...')
                report.update(event_importer.import_directory(datadir))
            if settings.ENABLE_VOTES:
                print('import vote events...')
                report.update(vote_event_importer.import_directory(datadir))

        # compile info on all sessions that were updated in this run
        seen_sessions = set()
        seen_sessions.update(bill_importer.get_seen_sessions())
        seen_sessions.update(vote_event_importer.get_seen_sessions())
        for session in seen_sessions:
            new_report = generate_session_report(session)
            with transaction.atomic():
                SessionDataQualityReport.objects.filter(legislative_session=session).delete()
                new_report.save()

        return report
Exemple #9
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter,
                                    PostImporter, MembershipImporter, BillImporter,
                                    VoteImporter, EventImporter,
                                    DisclosureImporter)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)

        org_importer = OrganizationImporter(juris.jurisdiction_id)

        person_importer = PersonImporter(juris.jurisdiction_id)

        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer)
        vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
                                     bill_importer)

        event_importer = EventImporter(juris.jurisdiction_id,
                                       org_importer,
                                       person_importer)

        disclosure_importer = DisclosureImporter(juris.jurisdiction_id,
                                                 org_importer,
                                                 person_importer,
                                                 event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            print('import organizations...')
            report.update(org_importer.import_directory(datadir))
            print('import people...')
            report.update(person_importer.import_directory(datadir))
            print('import posts...')
            report.update(post_importer.import_directory(datadir))
            print('import memberships...')
            report.update(membership_importer.import_directory(datadir))
            print('import bills...')
            report.update(bill_importer.import_directory(datadir))
            print('import events...')
            report.update(event_importer.import_directory(datadir))
            print('import disclosures...')
            report.update(disclosure_importer.import_directory(datadir))
            print('import votes...')
            report.update(vote_importer.import_directory(datadir))

        return report
Exemple #10
0
def test_full_post():
    org = Organization.objects.create(name="United States Executive Branch",
                                      classification="executive",
                                      jurisdiction_id="jurisdiction-id")
    post = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}')
    post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    post.add_link('http://example.com/link')

    # import post
    oi = OrganizationImporter('jurisdiction-id')
    PostImporter('jurisdiction-id', oi).import_data([post.as_dict()])

    # get person from db and assert it imported correctly
    p = Post.objects.get()
    assert 'ocd-post' in p.id
    assert p.label == post.label
    assert p.role == post.role
    assert p.organization_id == org.id

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'
Exemple #11
0
def test_full_post():
    create_jurisdictions()
    org = Organization.objects.create(name="United States Executive Branch",
                                      classification="executive",
                                      jurisdiction_id="us")
    post = ScrapePost(label='executive', role='President',
                      organization_id='~{"classification": "executive"}',
                      start_date=datetime.date(2015, 5, 18),
                      end_date='2015-05-19',
                      maximum_memberships=2
                      )
    post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    post.add_link('http://example.com/link')

    # import post
    oi = OrganizationImporter('us')
    PostImporter('jurisdiction-id', oi).import_data([post.as_dict()])
    print(post.as_dict())

    # get person from db and assert it imported correctly
    p = Post.objects.get()
    assert 'ocd-post' in p.id
    assert p.label == post.label
    assert p.role == post.role
    assert p.organization_id == org.id
    assert p.maximum_memberships == 2

    assert p.contact_details.all()[0].type == 'phone'
    assert p.contact_details.all()[0].value == '555-555-1234'
    assert p.contact_details.all()[0].note == 'this is fake'

    assert p.links.all()[0].url == 'http://example.com/link'

    assert p.start_date == '2015-05-18'
    assert p.end_date == '2015-05-19'
Exemple #12
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter,
                                    PersonImporter, PostImporter,
                                    MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer,
                                     person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id,
                                                person_importer, org_importer,
                                                bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id, org_importer,
                                       person_importer, bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            print('import organizations...')
            report.update(org_importer.import_directory(datadir))
            print('import people...')
            report.update(person_importer.import_directory(datadir))
            print('import posts...')
            report.update(post_importer.import_directory(datadir))
            print('import memberships...')
            report.update(membership_importer.import_directory(datadir))
            print('import bills...')
            report.update(bill_importer.import_directory(datadir))
            print('import events...')
            report.update(event_importer.import_directory(datadir))
            print('import vote events...')
            report.update(vote_event_importer.import_directory(datadir))

        return report
Exemple #13
0
def do_import(stream, stransaction):
    stream = list(stream)
    jurisdiction_id = stransaction.jurisdiction.id

    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(jurisdiction_id, person_importer,
                                             org_importer, post_importer)

    report = {}

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()

    with transaction.atomic():
        report.update(org_importer.import_data(tfilter(Organization, stream)))
        report.update(person_importer.import_data(tfilter(Person, stream)))
        report.update(post_importer.import_data(tfilter(Post, stream)))
        report.update(
            membership_importer.import_data(tfilter(Membership, stream)))

    return report
Exemple #14
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter,
                                    PersonImporter, PostImporter,
                                    MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        from pupa.reports import generate_session_report
        from pupa.models import SessionDataQualityReport
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer,
                                     person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id,
                                                person_importer, org_importer,
                                                bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id, org_importer,
                                       person_importer, bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            if settings.ENABLE_PEOPLE_AND_ORGS:
                print('import organizations...')
                report.update(org_importer.import_directory(datadir))
                print('import people...')
                report.update(person_importer.import_directory(datadir))
                print('import posts...')
                report.update(post_importer.import_directory(datadir))
                print('import memberships...')
                report.update(membership_importer.import_directory(datadir))
            if settings.ENABLE_BILLS:
                print('import bills...')
                report.update(bill_importer.import_directory(datadir))
            if settings.ENABLE_EVENTS:
                print('import events...')
                report.update(event_importer.import_directory(datadir))
            if settings.ENABLE_VOTES:
                print('import vote events...')
                report.update(vote_event_importer.import_directory(datadir))

        # compile info on all sessions that were updated in this run
        seen_sessions = set()
        seen_sessions.update(bill_importer.get_seen_sessions())
        seen_sessions.update(vote_event_importer.get_seen_sessions())
        for session in seen_sessions:
            new_report = generate_session_report(session)
            with transaction.atomic():
                SessionDataQualityReport.objects.filter(
                    legislative_session=session).delete()
                new_report.save()

        return report