def do_import(juris, args): # import inside here because to avoid loading Django code unnecessarily from openstates.importers import ( JurisdictionImporter, BillImporter, VoteEventImporter, ) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) bill_importer = BillImporter(juris.jurisdiction_id) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, bill_importer) report = {} with transaction.atomic(): print("import jurisdictions...") report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print("import bills...") report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print("import vote events...") report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: generate_session_report(session) return report
def test_vote_event_bill_clearing(): # ensure that we don't wind up with vote events sitting around forever on bills as # changes make it look like there are multiple vote events j = create_jurisdiction() session = j.legislative_sessions.create(name="1900", identifier="1900") org = Organization.objects.create(id="org-id", name="House", classification="lower", jurisdiction=j) bill = Bill.objects.create( id="bill-1", identifier="HB 1", legislative_session=session, from_organization=org, ) Bill.objects.create( id="bill-2", identifier="HB 2", legislative_session=session, from_organization=org, ) oi = OrganizationImporter("jid") dmi = DumbMockImporter() bi = BillImporter("jid", dmi, oi) vote_event1 = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on somthing", # typo intentional bill=bill.identifier, bill_chamber="lower", chamber="lower", ) vote_event2 = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something else", bill=bill.identifier, bill_chamber="lower", chamber="lower", ) # have to use import_data so postimport is called VoteEventImporter("jid", dmi, oi, bi).import_data( [vote_event1.as_dict(), vote_event2.as_dict()]) assert VoteEvent.objects.count() == 2 # a typo is fixed, we don't want 3 vote events now vote_event1.motion_text = "a vote on something" VoteEventImporter("jid", dmi, oi, bi).import_data( [vote_event1.as_dict(), vote_event2.as_dict()]) assert VoteEvent.objects.count() == 2
def test_vote_event_bill_id_dedupe(): create_jurisdiction() bill = Bill.objects.create( id="bill-1", identifier="HB 1", legislative_session=LegislativeSession.objects.get(), from_organization=Organization.objects.get(classification="lower"), ) bill2 = Bill.objects.create( id="bill-2", identifier="HB 2", legislative_session=LegislativeSession.objects.get(), from_organization=Organization.objects.get(classification="lower"), ) vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", bill=bill.identifier, bill_chamber="lower", chamber="lower", ) bi = BillImporter("jid") _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 1 # same exact vote event, no changes _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "noop" assert VoteEvent.objects.count() == 1 # new info, update vote_event.result = "failed" _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new vote event, insert vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", bill=bill2.identifier, bill_chamber="lower", chamber="lower", ) _, what = VoteEventImporter("jid", bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 2
def test_vote_event_pupa_identifier_dedupe(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") Organization.objects.create(id="org-id", name="Legislature", classification="legislature", jurisdiction=j) vote_event = ScrapeVoteEvent( legislative_session="1900", start_date="2013", classification="anything", result="passed", motion_text="a vote on something", identifier="Roll Call No. 1", ) vote_event.pupa_id = "foo" dmi = DumbMockImporter() oi = OrganizationImporter("jid") bi = BillImporter("jid", dmi, oi) _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 1 # same exact vote event, no changes _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "noop" assert VoteEvent.objects.count() == 1 # new info, update vote_event.result = "failed" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new bill identifier, update vote_event.identifier = "First Roll Call" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "update" assert VoteEvent.objects.count() == 1 # new identifier, insert vote_event.pupa_id = "bar" _, what = VoteEventImporter("jid", dmi, oi, bi).import_item(vote_event.as_dict()) assert what == "insert" assert VoteEvent.objects.count() == 2
def test_vote_event_bill_actions_two_stage(): # this test is very similar to what we're testing in test_vote_event_bill_actions w/ # ve3 and ve4, that two bills that reference the same action won't conflict w/ the # OneToOneField, but in this case we do it in two stages so that the conflict is found # even if the votes weren't in the same scrape create_jurisdiction() bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", chamber="lower") bill.add_action(description="passage", date="1900-04-02", chamber="lower") ve1 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", chamber="lower", ) ve2 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", chamber="lower", ) # disambiguate them ve1.pupa_id = "one" ve2.pupa_id = "two" bi = BillImporter("jid") bi.import_data([bill.as_dict()]) # first imports just fine VoteEventImporter("jid", bi).import_data([ve1.as_dict()]) votes = list(VoteEvent.objects.all()) assert len(votes) == 1 assert votes[0].bill_action is not None # when second is imported, ensure that action stays pinned to first just as it would # have if they were both in same import VoteEventImporter("jid", bi).import_data([ve1.as_dict(), ve2.as_dict()]) votes = list(VoteEvent.objects.all()) assert len(votes) == 2 assert votes[0].bill_action is not None assert votes[1].bill_action is None
def test_full_vote_event(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") sp1 = ScrapePerson("John Smith", primary_org="lower") sp2 = ScrapePerson("Adam Smith", primary_org="lower") org = ScrapeOrganization(name="House", classification="lower") bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", from_organization=org._id) vote_event = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", organization=org._id, ) vote_event.set_count("yes", 20) vote_event.yes("John Smith") vote_event.no("Adam Smith") oi = OrganizationImporter("jid") oi.import_data([org.as_dict()]) pi = PersonImporter("jid") pi.import_data([sp1.as_dict(), sp2.as_dict()]) mi = MembershipImporter("jid", pi, oi, DumbMockImporter()) mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()]) bi = BillImporter("jid", oi, pi) bi.import_data([bill.as_dict()]) VoteEventImporter("jid", pi, oi, bi).import_data([vote_event.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session == LegislativeSession.objects.get() assert ve.motion_classification == ["passage:bill"] assert ve.bill == Bill.objects.get() count = ve.counts.get() assert count.option == "yes" assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == "John Smith": assert v.option == "yes" assert v.voter == Person.objects.get(name="John Smith") else: assert v.option == "no" assert v.voter == Person.objects.get(name="Adam Smith")
def do_import(juris: State, args: argparse.Namespace) -> dict[str, typing.Any]: # import inside here because to avoid loading Django code unnecessarily from openstates.importers import ( JurisdictionImporter, BillImporter, VoteEventImporter, EventImporter, ) from openstates.data.models import Jurisdiction as DatabaseJurisdiction datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) bill_importer = BillImporter(juris.jurisdiction_id) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, vote_event_importer) report = {} with transaction.atomic(): print("import jurisdictions...") report.update(juris_importer.import_directory(datadir)) print("import bills...") report.update(bill_importer.import_directory(datadir)) print("import vote events...") report.update(vote_event_importer.import_directory(datadir)) print("import events...") report.update(event_importer.import_directory(datadir)) DatabaseJurisdiction.objects.filter(id=juris.jurisdiction_id).update( latest_bill_update=datetime.datetime.utcnow()) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: generate_session_report(session) return report
def test_fix_bill_id(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") org1 = ScrapeOrganization(name="House", classification="lower") bill = ScrapeBill("HB 1", "1900", "Test Bill ID", classification="bill", chamber="lower") oi = OrganizationImporter("jid") oi.import_data([org1.as_dict()]) from openstates.settings import IMPORT_TRANSFORMERS IMPORT_TRANSFORMERS["bill"] = { "identifier": lambda x: re.sub(r"([A-Z]*)\s*0*([-\d]+)", r"\1 \2", x, 1) } bi = BillImporter("jid", oi, DumbMockImporter()) bi.import_data([bill.as_dict()]) ve = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="fail", bill_chamber="lower", bill="HB1", identifier="4", bill_action="passage", organization=org1._id, ) VoteEventImporter("jid", DumbMockImporter(), oi, bi).import_data([ve.as_dict()]) IMPORT_TRANSFORMERS["bill"] = {} ve = VoteEvent.objects.get() ve.bill.identifier == "HB 1"
def test_full_vote_event(): create_jurisdiction() bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", chamber="lower") vote_event = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", chamber="lower", ) vote_event.set_count("yes", 20) vote_event.yes("John Smith") vote_event.no("Adam Smith") Person.objects.create(name="John Smith") Person.objects.create(name="Adam Smith") for person in Person.objects.all(): person.memberships.create(organization=Organization.objects.get( classification="lower")) bi = BillImporter("jid") bi.import_data([bill.as_dict()]) VoteEventImporter("jid", bi).import_data([vote_event.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session == LegislativeSession.objects.get() assert ve.motion_classification == ["passage:bill"] assert ve.bill == Bill.objects.get() count = ve.counts.get() assert count.option == "yes" assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == "John Smith": assert v.option == "yes" assert v.voter == Person.objects.get(name="John Smith") else: assert v.option == "no" assert v.voter == Person.objects.get(name="Adam Smith")
def test_fix_bill_id(): create_jurisdiction() bill = ScrapeBill("HB 1", "1900", "Test Bill ID", classification="bill", chamber="lower") from openstates.settings import IMPORT_TRANSFORMERS IMPORT_TRANSFORMERS["bill"] = { "identifier": fix_bill_id, } bi = BillImporter("jid") bi.import_data([bill.as_dict()]) ve = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="fail", bill_chamber="lower", bill="HB1", identifier="4", bill_action="passage", chamber="lower", ) VoteEventImporter("jid", bi).import_data([ve.as_dict()]) IMPORT_TRANSFORMERS["bill"] = {} ve = VoteEvent.objects.get() ve.bill.identifier == "HB 1"
def test_vote_event_bill_actions_errors(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") org1 = ScrapeOrganization(name="House", classification="lower") org2 = ScrapeOrganization(name="Senate", classification="upper") bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", from_organization=org1._id) # for this bill, two identical actions, so vote matching will fail bill.add_action(description="passage", date="1900-04-01", chamber="lower") bill.add_action(description="passage", date="1900-04-01", chamber="lower") # this action is good, but two votes will try to match it bill.add_action(description="passage", date="1900-04-02", chamber="lower") # will match two actions ve1 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", identifier="1", bill_action="passage", organization=org1._id, ) # will match no actions ve2 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", identifier="2", bill_action="committee result", organization=org1._id, ) # these two votes will both match the same action ve3 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", identifier="3", bill_action="passage", organization=org1._id, ) ve4 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage-syz", start_date="1900-04-02", classification="passage:bill", result="fail", bill_chamber="lower", bill="HB 1", identifier="4", bill_action="passage", organization=org1._id, ) oi = OrganizationImporter("jid") oi.import_data([org1.as_dict(), org2.as_dict()]) bi = BillImporter("jid", oi, DumbMockImporter()) bi.import_data([bill.as_dict()]) VoteEventImporter("jid", DumbMockImporter(), oi, bi).import_data( [ve1.as_dict(), ve2.as_dict(), ve3.as_dict(), ve4.as_dict()]) bill = Bill.objects.get() votes = list(VoteEvent.objects.all().order_by("identifier")) # isn't matched, was ambiguous across two actions assert votes[0].bill_action is None # isn't matched, no match in actions assert votes[1].bill_action is None # these both try to match the same action, only first will succeed assert votes[2].bill_action is not None assert votes[3].bill_action is None
def test_vote_event_bill_actions(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") org1 = ScrapeOrganization(name="House", classification="lower") org2 = ScrapeOrganization(name="Senate", classification="upper") bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", from_organization=org1._id) # add actions, passage of upper & lower on same day, something else, # then passage in upper again on a different day bill.add_action(description="passage", date="1900-04-01", chamber="upper") bill.add_action(description="passage", date="1900-04-01", chamber="lower") bill.add_action(description="other event", date="1900-04-01", chamber="lower") bill.add_action(description="passage", date="1900-04-02", chamber="upper") # four passage votes, one per chamber, one on 04-01, and one on 04-02 ve1 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", organization=org1._id, ) ve2 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", organization=org2._id, ) ve3 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", organization=org1._id, ) ve4 = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-02", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", bill_action="passage", organization=org2._id, ) oi = OrganizationImporter("jid") oi.import_data([org1.as_dict(), org2.as_dict()]) bi = BillImporter("jid", oi, DumbMockImporter()) bi.import_data([bill.as_dict()]) VoteEventImporter("jid", DumbMockImporter(), oi, bi).import_data( [ve1.as_dict(), ve2.as_dict(), ve3.as_dict(), ve4.as_dict()]) bill = Bill.objects.get() votes = list(VoteEvent.objects.all()) actions = list(bill.actions.all()) assert len(actions) == 4 assert len(votes) == 4 votes = {(v.organization.classification, v.start_date): v.bill_action for v in votes} # ensure that votes are matched using action, chamber, and date assert votes[("upper", "1900-04-01")] == actions[0] assert votes[("lower", "1900-04-01")] == actions[1] assert votes[("upper", "1900-04-02")] == actions[3] assert votes[("lower", "1900-04-02")] is None
def ge(): event = ScrapeEvent( name="America's Birthday", start_date="2014-07-04T05:00Z", location_name="America", all_day=True, ) return event oi = OrganizationImporter("jid") pi = PersonImporter("jid") bi = BillImporter("jid", oi, pi) vei = VoteEventImporter("jid", pi, oi, bi) @pytest.mark.django_db def test_related_people_event(): create_jurisdiction() george = Person.objects.create(id="gw", name="George Washington") john = Person.objects.create(id="jqp", name="John Q. Public") o = Organization.objects.create(name="Merica", jurisdiction_id="jid") Membership.objects.create(person=george, organization=o) Membership.objects.create(person=john, organization=o) event1 = ge() event2 = ge()
def ge(): event = ScrapeEvent( name="America's Birthday", start_date="2014-07-04T05:00Z", location_name="America", all_day=True, ) return event oi = OrganizationImporter("jid") pi = PersonImporter("jid") bi = BillImporter("jid") vei = VoteEventImporter("jid", bi) @pytest.mark.django_db def test_related_people_event(): create_jurisdiction() george = Person.objects.create(id="gw", name="George Washington") john = Person.objects.create(id="jqp", name="John Q. Public") o = Organization.objects.create(name="Merica", jurisdiction_id="jid") Membership.objects.create(person=george, organization=o) Membership.objects.create(person=john, organization=o) event1 = ge() event2 = ge()
def do_import(juris, args): # import inside here because to avoid loading Django code unnecessarily from openstates.importers import ( JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter, ) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter( juris.jurisdiction_id, person_importer, org_importer, post_importer ) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter( juris.jurisdiction_id, person_importer, org_importer, bill_importer ) event_importer = EventImporter( juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer, ) report = {} with transaction.atomic(): print("import jurisdictions...") report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_PEOPLE_AND_ORGS: print("import organizations...") report.update(org_importer.import_directory(datadir)) print("import people...") report.update(person_importer.import_directory(datadir)) print("import posts...") report.update(post_importer.import_directory(datadir)) print("import memberships...") report.update(membership_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print("import bills...") report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_EVENTS: print("import events...") report.update(event_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print("import vote events...") report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: generate_session_report(session) return report