def do_import(self, juris, args): datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) #bill_importer = BillImporter(juris.jurisdiction_id, org_importer) #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer, # bill_importer) #event_importer = EventImporter(juris.jurisdiction_id) report = {} # TODO: wrap in a transaction report.update(juris_importer.import_directory(datadir)) report.update(org_importer.import_directory(datadir)) report.update(person_importer.import_directory(datadir)) report.update(post_importer.import_directory(datadir)) report.update(membership_importer.import_directory(datadir)) #report.update(bill_importer.import_from_json(datadir)) #report.update(event_importer.import_from_json(datadir)) #report.update(vote_importer.import_from_json(datadir)) return report
def do_import(stream, stransaction): stream = list(stream) jurisdiction_id = stransaction.jurisdiction.id org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter( jurisdiction_id, person_importer, org_importer, post_importer ) report = {} def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() with transaction.atomic(): report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update(membership_importer.import_data( tfilter(Membership, stream))) return report
def test_deduplication(): Organization.objects.create(id='us', name="United States Executive Branch", classification="executive", jurisdiction_id="us") Organization.objects.create(id='nc', name="North Carolina Executive Branch", classification="executive", jurisdiction_id="nc") pres = ScrapePost(label='executive', role='President', organization_id='~{"classification": "executive"}') vp = ScrapePost(label='vice-executive', role='Vice President', organization_id='~{"classification": "executive"}') gov = ScrapePost(label='executive', role='Governor', organization_id='~{"classification": "executive"}') # ensure pres, vp and gov are all imported # pres & gov - same label, different jurisdiction # vp & pres - same jurisdiction, different label us_oi = OrganizationImporter('us') nc_oi = OrganizationImporter('nc') PostImporter('us', us_oi).import_data([pres.as_dict(), vp.as_dict()]) PostImporter('nc', nc_oi).import_data([gov.as_dict()]) assert Post.objects.count() == 3 # ensure changing the role is allowed pres = ScrapePost(label='executive', role='King', organization_id='~{"classification": "executive"}') PostImporter('us', us_oi).import_data([pres.as_dict()]) # no new object, just an update for role assert Post.objects.count() == 3 assert Post.objects.get(organization_id='us', label='executive').role == 'King'
def test_resolve_special_json_id(): Organization.objects.create(id='us', name="United States Executive Branch", classification="executive", jurisdiction_id="us") Organization.objects.create(id='nc', name="North Carolina Executive Branch", classification="executive", jurisdiction_id="nc") Post.objects.create(id='pres', label='executive', role='President', organization_id='us') Post.objects.create(id='vpres', label='vice-executive', role='Vice President', organization_id='us') Post.objects.create(id='gov', label='executive', role='Governor', organization_id='nc') oi = OrganizationImporter('') assert PostImporter( 'us', oi).resolve_json_id('~{"label": "executive"}') == 'pres' assert PostImporter( 'us', oi).resolve_json_id('~{"label": "vice-executive"}') == 'vpres' assert PostImporter('nc', oi).resolve_json_id('~{"label": "executive"}') == 'gov'
def do_import(stream, transaction): stream = list(stream) jurisdiction_id = transaction.jurisdiction.id juris_importer = JurisdictionImporter(jurisdiction_id) org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter(jurisdiction_id, person_importer, org_importer, post_importer) report = {} # This basically relates to Pupa's pupa.clu.commands.update:113 # (From there - wrap this in a transaction.) def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() report.update(juris_importer.import_data(tfilter(Jurisdiction, stream))) report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update(membership_importer.import_data(tfilter(Membership, stream))) return report
def do_import(stream, transaction): stream = list(stream) jurisdiction_id = transaction.jurisdiction.id juris_importer = JurisdictionImporter(jurisdiction_id) org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter( jurisdiction_id, person_importer, org_importer, post_importer ) report = {} # This basically relates to Pupa's pupa.clu.commands.update:113 # (From there - wrap this in a transaction.) def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() report.update(juris_importer.import_data(tfilter(Jurisdiction, stream))) report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update(membership_importer.import_data( tfilter(Membership, stream))) return report
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) from pupa.reports import generate_session_report from pupa.models import SessionDataQualityReport datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_PEOPLE_AND_ORGS: print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print('import bills...') report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_EVENTS: print('import events...') report.update(event_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: new_report = generate_session_report(session) with transaction.atomic(): SessionDataQualityReport.objects.filter(legislative_session=session).delete() new_report.save() return report
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteImporter, EventImporter, DisclosureImporter) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer) disclosure_importer = DisclosureImporter(juris.jurisdiction_id, org_importer, person_importer, event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) print('import bills...') report.update(bill_importer.import_directory(datadir)) print('import events...') report.update(event_importer.import_directory(datadir)) print('import disclosures...') report.update(disclosure_importer.import_directory(datadir)) print('import votes...') report.update(vote_importer.import_directory(datadir)) return report
def test_full_post(): org = Organization.objects.create(name="United States Executive Branch", classification="executive", jurisdiction_id="jurisdiction-id") post = ScrapePost(label='executive', role='President', organization_id='~{"classification": "executive"}') post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') post.add_link('http://example.com/link') # import post oi = OrganizationImporter('jurisdiction-id') PostImporter('jurisdiction-id', oi).import_data([post.as_dict()]) # get person from db and assert it imported correctly p = Post.objects.get() assert 'ocd-post' in p.id assert p.label == post.label assert p.role == post.role assert p.organization_id == org.id assert p.contact_details.all()[0].type == 'phone' assert p.contact_details.all()[0].value == '555-555-1234' assert p.contact_details.all()[0].note == 'this is fake' assert p.links.all()[0].url == 'http://example.com/link'
def test_full_post(): create_jurisdictions() org = Organization.objects.create(name="United States Executive Branch", classification="executive", jurisdiction_id="us") post = ScrapePost(label='executive', role='President', organization_id='~{"classification": "executive"}', start_date=datetime.date(2015, 5, 18), end_date='2015-05-19', maximum_memberships=2 ) post.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') post.add_link('http://example.com/link') # import post oi = OrganizationImporter('us') PostImporter('jurisdiction-id', oi).import_data([post.as_dict()]) print(post.as_dict()) # get person from db and assert it imported correctly p = Post.objects.get() assert 'ocd-post' in p.id assert p.label == post.label assert p.role == post.role assert p.organization_id == org.id assert p.maximum_memberships == 2 assert p.contact_details.all()[0].type == 'phone' assert p.contact_details.all()[0].value == '555-555-1234' assert p.contact_details.all()[0].note == 'this is fake' assert p.links.all()[0].url == 'http://example.com/link' assert p.start_date == '2015-05-18' assert p.end_date == '2015-05-19'
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) print('import bills...') report.update(bill_importer.import_directory(datadir)) print('import events...') report.update(event_importer.import_directory(datadir)) print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) return report
def do_import(stream, stransaction): stream = list(stream) jurisdiction_id = stransaction.jurisdiction.id org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter(jurisdiction_id, person_importer, org_importer, post_importer) report = {} def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() with transaction.atomic(): report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update( membership_importer.import_data(tfilter(Membership, stream))) return report
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) from pupa.reports import generate_session_report from pupa.models import SessionDataQualityReport datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_PEOPLE_AND_ORGS: print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print('import bills...') report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_EVENTS: print('import events...') report.update(event_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: new_report = generate_session_report(session) with transaction.atomic(): SessionDataQualityReport.objects.filter( legislative_session=session).delete() new_report.save() return report