Esempio n. 1
0
    def do_import(self, juris, args):
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        #bill_importer = BillImporter(juris.jurisdiction_id, org_importer)
        #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
        #                             bill_importer)
        #event_importer = EventImporter(juris.jurisdiction_id)

        report = {}
        # TODO: wrap in a transaction
        report.update(juris_importer.import_directory(datadir))
        report.update(org_importer.import_directory(datadir))
        report.update(person_importer.import_directory(datadir))
        report.update(post_importer.import_directory(datadir))
        report.update(membership_importer.import_directory(datadir))
        #report.update(bill_importer.import_from_json(datadir))
        #report.update(event_importer.import_from_json(datadir))
        #report.update(vote_importer.import_from_json(datadir))

        return report
def test_jurisdiction_merge_related():
    Division.objects.create(id='ocd-division/country:us', name='USA')
    # need to ensure legislative_sessions don't get deleted
    ji = JurisdictionImporter('jurisdiction-id')
    tj = FakeJurisdiction()
    ji.import_item(tj.as_dict())

    assert LegislativeSession.objects.count() == 2

    # disallow deletion of legislative sessions as it can remove bills
    tj.legislative_sessions.pop()
    ji.import_item(tj.as_dict())

    # should still have two
    assert LegislativeSession.objects.count() == 2

    # now will have three
    tj.legislative_sessions.append({
        'identifier': '2017',
        'name': '2017 Session'
    })
    ji.import_item(tj.as_dict())
    assert LegislativeSession.objects.count() == 3

    # and test that the non-identifier fields actually update
    tj.legislative_sessions.append({'identifier': '2016', 'name': 'updated'})
    ji.import_item(tj.as_dict())
    assert LegislativeSession.objects.count() == 3
    assert LegislativeSession.objects.get(identifier='2016').name == 'updated'
Esempio n. 3
0
def do_import(stream, transaction):
    stream = list(stream)
    jurisdiction_id = transaction.jurisdiction.id

    juris_importer = JurisdictionImporter(jurisdiction_id)
    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(jurisdiction_id, person_importer,
                                             org_importer, post_importer)

    report = {}

    # This basically relates to Pupa's pupa.clu.commands.update:113
    # (From there - wrap this in a transaction.)

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()

    report.update(juris_importer.import_data(tfilter(Jurisdiction, stream)))
    report.update(org_importer.import_data(tfilter(Organization, stream)))
    report.update(person_importer.import_data(tfilter(Person, stream)))
    report.update(post_importer.import_data(tfilter(Post, stream)))

    report.update(membership_importer.import_data(tfilter(Membership, stream)))

    return report
Esempio n. 4
0
    def do_import(self, juris, args):
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        #bill_importer = BillImporter(juris.jurisdiction_id, org_importer)
        #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
        #                             bill_importer)
        #event_importer = EventImporter(juris.jurisdiction_id)

        report = {}
        # TODO: wrap in a transaction
        report.update(juris_importer.import_directory(datadir))
        report.update(org_importer.import_directory(datadir))
        report.update(person_importer.import_directory(datadir))
        report.update(post_importer.import_directory(datadir))
        report.update(membership_importer.import_directory(datadir))
        #report.update(bill_importer.import_from_json(datadir))
        #report.update(event_importer.import_from_json(datadir))
        #report.update(vote_importer.import_from_json(datadir))

        return report
Esempio n. 5
0
def do_import(stream, transaction):
    stream = list(stream)
    jurisdiction_id = transaction.jurisdiction.id

    juris_importer = JurisdictionImporter(jurisdiction_id)
    org_importer = OrganizationImporter(jurisdiction_id)
    person_importer = PersonImporter(jurisdiction_id)
    post_importer = PostImporter(jurisdiction_id, org_importer)
    membership_importer = MembershipImporter(
        jurisdiction_id,
        person_importer,
        org_importer,
        post_importer
    )

    report = {}
    # This basically relates to Pupa's pupa.clu.commands.update:113
    # (From there - wrap this in a transaction.)

    def tfilter(otype, stream):
        for el in filter(lambda x: isinstance(x, otype), stream):
            yield el.as_dict()


    report.update(juris_importer.import_data(tfilter(Jurisdiction, stream)))
    report.update(org_importer.import_data(tfilter(Organization, stream)))
    report.update(person_importer.import_data(tfilter(Person, stream)))
    report.update(post_importer.import_data(tfilter(Post, stream)))

    report.update(membership_importer.import_data(
        tfilter(Membership, stream)))

    return report
Esempio n. 6
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter,
                                    PostImporter, MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        from pupa.reports import generate_session_report
        from pupa.models import SessionDataQualityReport
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer,
                                                org_importer, bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id,
                                       org_importer,
                                       person_importer,
                                       bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            if settings.ENABLE_PEOPLE_AND_ORGS:
                print('import organizations...')
                report.update(org_importer.import_directory(datadir))
                print('import people...')
                report.update(person_importer.import_directory(datadir))
                print('import posts...')
                report.update(post_importer.import_directory(datadir))
                print('import memberships...')
                report.update(membership_importer.import_directory(datadir))
            if settings.ENABLE_BILLS:
                print('import bills...')
                report.update(bill_importer.import_directory(datadir))
            if settings.ENABLE_EVENTS:
                print('import events...')
                report.update(event_importer.import_directory(datadir))
            if settings.ENABLE_VOTES:
                print('import vote events...')
                report.update(vote_event_importer.import_directory(datadir))

        # compile info on all sessions that were updated in this run
        seen_sessions = set()
        seen_sessions.update(bill_importer.get_seen_sessions())
        seen_sessions.update(vote_event_importer.get_seen_sessions())
        for session in seen_sessions:
            new_report = generate_session_report(session)
            with transaction.atomic():
                SessionDataQualityReport.objects.filter(legislative_session=session).delete()
                new_report.save()

        return report
def test_jurisdiction_merge_related():
    Division.objects.create(id='ocd-division/country:us', name='USA')
    # need to ensure legislative_sessions don't get deleted
    ji = JurisdictionImporter('jurisdiction-id')
    tj = FakeJurisdiction()
    ji.import_item(tj.as_dict())

    assert LegislativeSession.objects.count() == 2

    # disallow deletion of legislative sessions as it can remove bills
    tj.legislative_sessions.pop()
    ji.import_item(tj.as_dict())

    # should still have two
    assert LegislativeSession.objects.count() == 2

    # now will have three
    tj.legislative_sessions.append({'identifier': '2017', 'name': '2017 Session'})
    ji.import_item(tj.as_dict())
    assert LegislativeSession.objects.count() == 3

    # and test that the non-identifier fields actually update
    tj.legislative_sessions.append({'identifier': '2016', 'name': 'updated'})
    ji.import_item(tj.as_dict())
    assert LegislativeSession.objects.count() == 3
    assert LegislativeSession.objects.get(identifier='2016').name == 'updated'
Esempio n. 8
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter,
                                    PostImporter, MembershipImporter, BillImporter,
                                    VoteImporter, EventImporter,
                                    DisclosureImporter)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)

        org_importer = OrganizationImporter(juris.jurisdiction_id)

        person_importer = PersonImporter(juris.jurisdiction_id)

        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer,
                                                 org_importer, post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer)
        vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer,
                                     bill_importer)

        event_importer = EventImporter(juris.jurisdiction_id,
                                       org_importer,
                                       person_importer)

        disclosure_importer = DisclosureImporter(juris.jurisdiction_id,
                                                 org_importer,
                                                 person_importer,
                                                 event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            print('import organizations...')
            report.update(org_importer.import_directory(datadir))
            print('import people...')
            report.update(person_importer.import_directory(datadir))
            print('import posts...')
            report.update(post_importer.import_directory(datadir))
            print('import memberships...')
            report.update(membership_importer.import_directory(datadir))
            print('import bills...')
            report.update(bill_importer.import_directory(datadir))
            print('import events...')
            report.update(event_importer.import_directory(datadir))
            print('import disclosures...')
            report.update(disclosure_importer.import_directory(datadir))
            print('import votes...')
            report.update(vote_importer.import_directory(datadir))

        return report
Esempio n. 9
0
def test_jurisdiction_update():
    tj = FakeJurisdiction()
    ji = JurisdictionImporter('jurisdiction-id')
    _, what = ji.import_item(tj.as_dict())
    assert what == 'insert'

    _, what = ji.import_item(tj.as_dict())
    assert what == 'noop'
    assert Jurisdiction.objects.count() == 1

    tj.name = 'different name'
    obj, what = ji.import_item(tj.as_dict())
    assert what == 'update'
    assert Jurisdiction.objects.count() == 1
    assert obj.name == 'different name'
def test_jurisdiction_update():
    tj = FakeJurisdiction()
    ji = JurisdictionImporter('jurisdiction-id')
    _, what = ji.import_item(tj.as_dict())
    assert what == 'insert'

    _, what = ji.import_item(tj.as_dict())
    assert what == 'noop'
    assert Jurisdiction.objects.count() == 1

    tj.name = 'different name'
    obj, what = ji.import_item(tj.as_dict())
    assert what == 'update'
    assert Jurisdiction.objects.count() == 1
    assert Jurisdiction.objects.get().name == 'different name'
def test_jurisdiction_update():
    Division.objects.create(id='ocd-division/country:us', name='USA')
    tj = FakeJurisdiction()
    ji = JurisdictionImporter('jurisdiction-id')
    _, what = ji.import_item(tj.as_dict())
    assert what == 'insert'

    _, what = ji.import_item(tj.as_dict())
    assert what == 'noop'
    assert Jurisdiction.objects.count() == 1

    tj.name = 'different name'
    obj, what = ji.import_item(tj.as_dict())
    assert what == 'update'
    assert Jurisdiction.objects.count() == 1
    assert Jurisdiction.objects.get().name == 'different name'
def test_jurisdiction_update():
    Division.objects.create(id='ocd-division/country:us', name='USA')
    tj = FakeJurisdiction()
    ji = JurisdictionImporter('jurisdiction-id')
    _, what = ji.import_item(tj.as_dict())
    assert what == 'insert'

    _, what = ji.import_item(tj.as_dict())
    assert what == 'noop'
    assert Jurisdiction.objects.count() == 1

    tj.name = 'different name'
    obj, what = ji.import_item(tj.as_dict())
    assert what == 'update'
    assert Jurisdiction.objects.count() == 1
    assert Jurisdiction.objects.get().name == 'different name'
Esempio n. 13
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter,
                                    PersonImporter, PostImporter,
                                    MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer,
                                     person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id,
                                                person_importer, org_importer,
                                                bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id, org_importer,
                                       person_importer, bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            print('import organizations...')
            report.update(org_importer.import_directory(datadir))
            print('import people...')
            report.update(person_importer.import_directory(datadir))
            print('import posts...')
            report.update(post_importer.import_directory(datadir))
            print('import memberships...')
            report.update(membership_importer.import_directory(datadir))
            print('import bills...')
            report.update(bill_importer.import_directory(datadir))
            print('import events...')
            report.update(event_importer.import_directory(datadir))
            print('import vote events...')
            report.update(vote_event_importer.import_directory(datadir))

        return report
def test_jurisdiction_import():
    tj = FakeJurisdiction()
    juris_dict = tj.as_dict()
    JurisdictionImporter('jurisdiction-id').import_data([juris_dict])

    dbj = Jurisdiction.objects.get()
    assert dbj.id == tj.jurisdiction_id
    assert dbj.division_id == tj.division_id
    assert dbj.name == tj.name
    assert dbj.url == tj.url
def test_jurisdiction_import():
    Division.objects.create(id='ocd-division/country:us', name='USA')
    tj = FakeJurisdiction()
    juris_dict = tj.as_dict()
    JurisdictionImporter('jurisdiction-id').import_data([juris_dict])

    dbj = Jurisdiction.objects.get()
    assert dbj.id == tj.jurisdiction_id
    assert dbj.division_id == tj.division_id
    assert dbj.name == tj.name
    assert dbj.url == tj.url
Esempio n. 16
0
    def do_import(self, juris, args):
        # import inside here because to avoid loading Django code unnecessarily
        from pupa.importers import (JurisdictionImporter, OrganizationImporter,
                                    PersonImporter, PostImporter,
                                    MembershipImporter, BillImporter,
                                    VoteEventImporter, EventImporter)
        from pupa.reports import generate_session_report
        from pupa.models import SessionDataQualityReport
        datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module)

        juris_importer = JurisdictionImporter(juris.jurisdiction_id)
        org_importer = OrganizationImporter(juris.jurisdiction_id)
        person_importer = PersonImporter(juris.jurisdiction_id)
        post_importer = PostImporter(juris.jurisdiction_id, org_importer)
        membership_importer = MembershipImporter(juris.jurisdiction_id,
                                                 person_importer, org_importer,
                                                 post_importer)
        bill_importer = BillImporter(juris.jurisdiction_id, org_importer,
                                     person_importer)
        vote_event_importer = VoteEventImporter(juris.jurisdiction_id,
                                                person_importer, org_importer,
                                                bill_importer)
        event_importer = EventImporter(juris.jurisdiction_id, org_importer,
                                       person_importer, bill_importer,
                                       vote_event_importer)

        report = {}

        with transaction.atomic():
            print('import jurisdictions...')
            report.update(juris_importer.import_directory(datadir))
            if settings.ENABLE_PEOPLE_AND_ORGS:
                print('import organizations...')
                report.update(org_importer.import_directory(datadir))
                print('import people...')
                report.update(person_importer.import_directory(datadir))
                print('import posts...')
                report.update(post_importer.import_directory(datadir))
                print('import memberships...')
                report.update(membership_importer.import_directory(datadir))
            if settings.ENABLE_BILLS:
                print('import bills...')
                report.update(bill_importer.import_directory(datadir))
            if settings.ENABLE_EVENTS:
                print('import events...')
                report.update(event_importer.import_directory(datadir))
            if settings.ENABLE_VOTES:
                print('import vote events...')
                report.update(vote_event_importer.import_directory(datadir))

        # compile info on all sessions that were updated in this run
        seen_sessions = set()
        seen_sessions.update(bill_importer.get_seen_sessions())
        seen_sessions.update(vote_event_importer.get_seen_sessions())
        for session in seen_sessions:
            new_report = generate_session_report(session)
            with transaction.atomic():
                SessionDataQualityReport.objects.filter(
                    legislative_session=session).delete()
                new_report.save()

        return report