Example #1
0
def test_full_vote_event():
    j = create_jurisdiction()
    j.legislative_sessions.create(name="1900", identifier="1900")
    sp1 = ScrapePerson("John Smith", primary_org="lower")
    sp2 = ScrapePerson("Adam Smith", primary_org="lower")
    org = ScrapeOrganization(name="House", classification="lower")
    bill = ScrapeBill("HB 1",
                      "1900",
                      "Axe & Tack Tax Act",
                      from_organization=org._id)
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-01",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        organization=org._id,
    )
    vote_event.set_count("yes", 20)
    vote_event.yes("John Smith")
    vote_event.no("Adam Smith")

    oi = OrganizationImporter("jid")
    oi.import_data([org.as_dict()])

    pi = PersonImporter("jid")
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter("jid", pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter("jid", oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter("jid", pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ["passage:bill"]
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == "yes"
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == "John Smith":
            assert v.option == "yes"
            assert v.voter == Person.objects.get(name="John Smith")
        else:
            assert v.option == "no"
            assert v.voter == Person.objects.get(name="Adam Smith")
def test_same_name_people_other_name():
    create_jurisdiction()
    # ensure we're taking other_names into account for the name collision code
    Organization.objects.create(name="WWE", jurisdiction_id="jid")
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Rock", image="http://example.com/2")
    p2.add_name("Dwayne Johnson")

    # the people have the same name but are apparently different
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
def test_multiple_orgs_of_same_class():
    """
    We should be able to set memberships on organizations with the
    same classification within the same jurisdictions
    """
    create_jurisdiction()
    Organization.objects.create(
        id="fnd",
        name="Foundation",
        classification="foundation",
        jurisdiction_id="fnd-jid",
    )
    Organization.objects.create(
        id="fdr",
        name="Federation",
        classification="foundation",
        jurisdiction_id="fnd-jid",
    )

    hari = ScrapePerson(
        "Hari Seldon",
        primary_org="foundation",
        role="founder",
        primary_org_name="Foundation",
    )

    picard = ScrapePerson(
        "Jean Luc Picard",
        primary_org="foundation",
        role="founder",
        primary_org_name="Federation",
    )

    person_imp = PersonImporter("fnd-jid")
    person_imp.import_data([hari.as_dict()])
    person_imp.import_data([picard.as_dict()])

    # try to import a membership
    org_imp = OrganizationImporter("fnd-jid")
    dumb_imp = DumbMockImporter()
    memimp = MembershipImporter("fnd-jid", person_imp, org_imp, dumb_imp)

    memimp.import_data(
        [hari._related[0].as_dict(), picard._related[0].as_dict()])

    assert (Person.objects.get(
        name="Hari Seldon").memberships.get().organization.name == "Foundation"
            )
    assert (Person.objects.get(name="Jean Luc Picard").memberships.get().
            organization.name == "Federation")
def test_full_person():
    person = ScrapePerson("Tom Sawyer")
    person.add_identifier("1")
    person.add_name("Tommy", start_date="1880")
    person.add_contact_detail(type="phone",
                              value="555-555-1234",
                              note="this is fake")
    person.add_link("http://example.com/link")
    person.add_source("http://example.com/source")

    # import person
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])

    # get person from db and assert it imported correctly
    p = Person.objects.get()
    assert "ocd-person" in p.id
    assert p.name == person.name

    assert p.identifiers.all()[0].identifier == "1"
    assert p.identifiers.all()[0].scheme == ""

    assert p.other_names.all()[0].name == "Tommy"
    assert p.other_names.all()[0].start_date == "1880"

    assert p.contact_details.all()[0].type == "phone"
    assert p.contact_details.all()[0].value == "555-555-1234"
    assert p.contact_details.all()[0].note == "this is fake"

    assert p.links.all()[0].url == "http://example.com/link"
    assert p.sources.all()[0].url == "http://example.com/source"
def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill(
        "HB 1", "1900", "Axe & Tack Tax Act", classification="tax bill", chamber="lower"
    )
    bill.add_sponsorship_by_identifier(
        name="SNODGRASS",
        classification="sponsor",
        entity_type="person",
        primary=True,
        identifier="TOTALLY_REAL_ID",
        scheme="TOTALLY_REAL_SCHEME",
    )

    oi = OrganizationImporter("jid")
    pi = PersonImporter("jid")

    zs = ScrapePerson(name="Zadock Snodgrass")
    zs.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME")
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    BillImporter("jid", oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
def test_no_membership_for_person_including_party():
    """
    even though party is specified we should still get a no memberships error because it doesn't
    bind the person to a jurisdiction, thus causing duplication
    """
    create_jurisdiction()
    Organization.objects.create(
        id="fnd",
        name="Foundation",
        classification="foundation",
        jurisdiction_id="fnd-jid",
    )
    Organization.objects.create(id="dem",
                                name="Democratic",
                                classification="party")

    # import a person with no memberships
    p = ScrapePerson("a man without a country", party="Democratic")
    person_imp = PersonImporter("fnd-jid")
    org_imp = OrganizationImporter("fnd-jid")
    person_imp.import_data([p.as_dict()])

    # try to import a membership
    dumb_imp = DumbMockImporter()
    memimp = MembershipImporter("fnd-jid", person_imp, org_imp, dumb_imp)

    with pytest.raises(NoMembershipsError):
        memimp.import_data([p._related[0].as_dict()])
def test_same_name_people():
    create_jurisdiction()
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")

    # importing two people with the same name to a pristine database should error
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2")
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # importing one person should pass
    PersonImporter("jid").import_data([p1.as_dict()])
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # importing another person with the same name should fail
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # adding birth dates should pass
    p1.birth_date = "1970"
    p2.birth_date = "1930"
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1
    assert Person.objects.count() == 2
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # adding a third person with the same name but without a birthday should error
    p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3")

    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p3.as_dict()])

    # and now test that an update works and we can insert a new one with the same name
    p1.image = "http://example.com/1.jpg"
    p2.birth_date = "1931"  # change birth_date, means a new insert
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert Person.objects.count() == 3
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1
def test_deduplication_no_jurisdiction_overlap():
    create_jurisdiction()
    create_person()
    # make sure we get a new person if we're in a different org
    person = ScrapePerson("Dwayne Johnson")
    pd = person.as_dict()
    PersonImporter("new-jurisdiction-id").import_data([pd])
    assert Person.objects.all().count() == 2
def test_deduplication_same_name():
    create_jurisdiction()
    create_person()
    # simplest case- just the same name
    person = ScrapePerson("Dwayne Johnson")
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])
    assert Person.objects.all().count() == 1
def test_deduplication_other_name_exists():
    create_jurisdiction()
    create_person()
    # Rocky is already saved in other_names
    person = ScrapePerson("Rocky")
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])
    assert Person.objects.all().count() == 1
Example #11
0
def test_invalid_fields_related_item():
    p1 = ScrapePerson("Dwayne")
    p1.add_link("http://example.com")
    p1 = p1.as_dict()
    p1["links"][0]["test"] = 3

    with pytest.raises(DataImportError):
        PersonImporter("jid").import_data([p1])
def test_deduplication_no_name_overlap():
    create_jurisdiction()
    create_person()
    # make sure we're not just being ridiculous and avoiding importing anything in the same org
    person = ScrapePerson("CM Punk")
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])
    assert Person.objects.all().count() == 2
def test_deduplication_other_name_overlaps():
    create_jurisdiction()
    create_person()
    # Person has other_name that overlaps w/ existing name
    person = ScrapePerson("The Rock")
    person.add_name("Dwayne Johnson")
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])
    assert Person.objects.all().count() == 1
def test_same_name_second_import():
    create_jurisdiction()
    # ensure two people with the same name don't import without birthdays
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2")
    p1.birth_date = "1970"
    p2.birth_date = "1930"

    # when we give them birth dates all is well though
    PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # fake some memberships so future lookups work on these people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3")

    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p3.as_dict()])
Example #15
0
def test_save_object_basics():
    # ensure that save object dumps a file
    s = Scraper(juris, "/tmp/")
    p = Person("Michael Jordan")
    p.add_source("http://example.com")

    with mock.patch("json.dump") as json_dump:
        s.save_object(p)

    # ensure object is saved in right place
    filename = "person_" + p._id + ".json"
    assert filename in s.output_names["person"]
    json_dump.assert_called_once_with(p.as_dict(), mock.ANY, cls=mock.ANY)
def test_bill_sponsor_limit_lookup():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill(
        "HB 1", "1900", "Axe & Tack Tax Act", classification="tax bill", chamber="lower"
    )
    bill.add_sponsorship_by_identifier(
        name="SNODGRASS",
        classification="sponsor",
        entity_type="person",
        primary=True,
        identifier="TOTALLY_REAL_ID",
        scheme="TOTALLY_REAL_SCHEME",
    )

    oi = OrganizationImporter("jid")
    pi = PersonImporter("jid")

    zs = ScrapePerson(name="Zadock Snodgrass", birth_date="1800-01-01")
    zs.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME")
    pi.import_data([zs.as_dict()])

    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    zs2 = ScrapePerson(name="Zadock Snodgrass", birth_date="1900-01-01")
    zs2.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME")

    # This is contrived and perhaps broken, but we're going to check this.
    # We *really* don't want to *ever* cross jurisdiction bounds.
    PersonImporter("another-jurisdiction").import_data([zs.as_dict()])

    BillImporter("jid", oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
    assert entry.person.birth_date == "1800-01-01"
Example #17
0
def test_save_related():
    s = Scraper(juris, "/tmp/")
    p = Person("Michael Jordan")
    p.add_source("http://example.com")
    o = Organization("Chicago Bulls", classification="committee")
    o.add_source("http://example.com")
    p._related.append(o)

    with mock.patch("json.dump") as json_dump:
        s.save_object(p)

    assert json_dump.mock_calls == [
        mock.call(p.as_dict(), mock.ANY, cls=mock.ANY),
        mock.call(o.as_dict(), mock.ANY, cls=mock.ANY),
    ]
def test_multiple_memberships():
    create_jurisdiction()
    # there was a bug where two or more memberships to the same jurisdiction
    # would cause an ORM error, this test ensures that it is fixed
    p = Person.objects.create(name="Dwayne Johnson")
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")
    Membership.objects.create(person=p, organization=o)
    o = Organization.objects.create(name="WWF", jurisdiction_id="jid")
    Membership.objects.create(person=p, organization=o)

    person = ScrapePerson("Dwayne Johnson")
    pd = person.as_dict()
    PersonImporter("jid").import_data([pd])

    # deduplication should still work
    assert Person.objects.all().count() == 1
def test_no_membership_for_person():
    create_jurisdiction()
    Organization.objects.create(
        id="fnd",
        name="Foundation",
        classification="foundation",
        jurisdiction_id="fnd-jid",
    )

    # import a person with no memberships
    p = ScrapePerson("a man without a country")
    person_imp = PersonImporter("fnd-jid")
    person_imp.import_data([p.as_dict()])

    # try to import a membership
    dumb_imp = DumbMockImporter()
    memimp = MembershipImporter("fnd-jid", person_imp, dumb_imp, dumb_imp)

    with pytest.raises(NoMembershipsError):
        memimp.import_data([])
def test_full_bill():
    create_jurisdiction()
    sp = ScrapePerson("Adam Smith")
    org = ScrapeOrganization(name="House", classification="lower")
    com = ScrapeOrganization(
        name="Arbitrary Committee", classification="committee", parent_id=org._id
    )

    oldbill = ScrapeBill(
        "HB 99",
        "1899",
        "Axe & Tack Tax Act",
        classification="tax bill",
        from_organization=org._id,
    )

    bill = ScrapeBill(
        "HB 1",
        "1900",
        "Axe & Tack Tax Act",
        classification="tax bill",
        from_organization=org._id,
    )
    bill.subject = ["taxes", "axes"]
    bill.add_identifier("SB 9")
    bill.add_title("Tack & Axe Tax Act")
    bill.add_action("introduced in house", "1900-04-01", chamber="lower")
    act = bill.add_action("sent to arbitrary committee", "1900-04-04", chamber="lower")
    act.add_related_entity("arbitrary committee", "organization", com._id)
    bill.add_related_bill(
        "HB 99", legislative_session="1899", relation_type="prior-session"
    )
    bill.add_sponsorship(
        "Adam Smith",
        classification="extra sponsor",
        entity_type="person",
        primary=False,
        entity_id=sp._id,
    )
    bill.add_sponsorship(
        "Jane Smith", classification="lead sponsor", entity_type="person", primary=True
    )
    bill.add_abstract(
        "This is an act about axes and taxes and tacks.",
        note="official",
        date="1969-10-20",
    )
    bill.add_document_link(
        "Fiscal Note", "http://example.com/fn.pdf", media_type="application/pdf"
    )
    bill.add_document_link(
        "Fiscal Note", "http://example.com/fn.html", media_type="text/html"
    )
    bill.add_version_link(
        "Fiscal Note", "http://example.com/v/1", media_type="text/html"
    )
    bill.add_source("http://example.com/source")

    # import bill
    oi = OrganizationImporter("jid")
    oi.import_data([org.as_dict(), com.as_dict()])

    pi = PersonImporter("jid")
    pi.import_data([sp.as_dict()])

    BillImporter("jid", oi, pi).import_data([oldbill.as_dict(), bill.as_dict()])

    # get bill from db and assert it imported correctly
    b = Bill.objects.get(identifier="HB 1")
    assert b.from_organization.classification == "lower"
    assert b.identifier == bill.identifier
    assert b.title == bill.title
    assert b.classification == bill.classification
    assert b.subject == ["taxes", "axes"]
    assert b.abstracts.get().note == "official"
    assert b.abstracts.get().date == "1969-10-20"

    # other_title, other_identifier added
    assert b.other_titles.get().title == "Tack & Axe Tax Act"
    assert b.other_identifiers.get().identifier == "SB 9"

    # actions
    actions = list(b.actions.all())
    assert len(actions) == 2
    # ensure order was preserved (if this breaks it'll be intermittent)
    assert actions[0].organization == Organization.objects.get(classification="lower")
    assert actions[0].description == "introduced in house"
    assert actions[1].description == "sent to arbitrary committee"
    assert actions[1].related_entities.get().organization == Organization.objects.get(
        classification="committee"
    )

    # related_bills were added
    rb = b.related_bills.get()
    assert rb.identifier == "HB 99"

    # and bill got resolved
    assert rb.related_bill.identifier == "HB 99"

    # sponsors added, linked & unlinked
    sponsorships = b.sponsorships.all()
    assert len(sponsorships) == 2
    person = Person.objects.get(name="Adam Smith")
    for ss in sponsorships:
        if ss.primary:
            assert ss.person is None
            assert ss.organization is None
        else:
            assert ss.person == person

    # versions & documents with their links
    versions = b.versions.all()
    assert len(versions) == 1
    assert versions[0].links.count() == 1
    documents = b.documents.all()
    assert len(documents) == 1
    assert documents[0].links.count() == 2

    # sources
    assert b.sources.count() == 1