def test_same_name_people():
    create_jurisdiction()
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")

    # importing two people with the same name to a pristine database should error
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2")
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # importing one person should pass
    PersonImporter("jid").import_data([p1.as_dict()])
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # importing another person with the same name should fail
    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # adding birth dates should pass
    p1.birth_date = "1970"
    p2.birth_date = "1930"
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1
    assert Person.objects.count() == 2
    # create fake memberships so that future lookups work on the imported people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    # adding a third person with the same name but without a birthday should error
    p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3")

    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p3.as_dict()])

    # and now test that an update works and we can insert a new one with the same name
    p1.image = "http://example.com/1.jpg"
    p2.birth_date = "1931"  # change birth_date, means a new insert
    resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
    assert Person.objects.count() == 3
    assert resp["person"]["insert"] == 1
    assert resp["person"]["noop"] == 0
    assert resp["person"]["update"] == 1
def test_same_name_second_import():
    create_jurisdiction()
    # ensure two people with the same name don't import without birthdays
    o = Organization.objects.create(name="WWE", jurisdiction_id="jid")
    p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1")
    p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2")
    p1.birth_date = "1970"
    p2.birth_date = "1930"

    # when we give them birth dates all is well though
    PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])

    # fake some memberships so future lookups work on these people
    for p in Person.objects.all():
        Membership.objects.create(person=p, organization=o)

    p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3")

    with pytest.raises(SameNameError):
        PersonImporter("jid").import_data([p3.as_dict()])
Exemple #3
0
    def legislators(self, latest_only):
        legs = {}

        for member, chamber, term, url in self._memberships(latest_only):
            name, _, _, district, party = member.xpath("td")
            district = district.text
            detail_url = name.xpath("a/@href")[0]

            if party.text_content().strip() == "":
                party = "Independent"
            else:
                party = {"D": "Democratic", "R": "Republican", "I": "Independent"}[
                    party.text
                ]
            name = name.text_content().strip()

            # inactive legislator, skip them for now
            if name.endswith("*"):
                name = name.strip("*")
                continue

            name = AKA.get(name, name)

            if name in legs:
                p, terms = legs[name]
                terms.append((chamber, district, term, party))
            else:
                p = Person(name, party=party)
                legs[name] = p, [(chamber, district, term, party)]

            p.add_source(url)
            p.add_source(detail_url)
            p.add_link(detail_url)

            birth_date = BIRTH_DATES.get(name, None)
            if birth_date:
                p.birth_date = birth_date

            leg_html = self.get(detail_url).text
            leg_doc = lxml.html.fromstring(leg_html)
            leg_doc.make_links_absolute(detail_url)

            hotgarbage = (
                "Senate Biography Information for the 98th General "
                "Assembly is not currently available."
            )

            if hotgarbage in leg_html:
                # The legislator's bio isn't available yet.
                self.logger.warning("No legislator bio available for " + name)
                continue

            photo_url = leg_doc.xpath('//img[contains(@src, "/members/")]/@src')[0]
            p.image = photo_url

            p.contact_details = []
            # email
            email = leg_doc.xpath('//b[text()="Email: "]')
            if email:
                p.add_contact_detail(
                    type="email", value=email[0].tail.strip(), note="Capitol Office"
                )

            offices = {
                "Capitol Office": '//table[contains(string(), "Springfield Office")]',
                "District Office": '//table[contains(string(), "District Office")]',
            }

            for location, xpath in offices.items():
                table = leg_doc.xpath(xpath)
                if table:
                    for type, value in self._table_to_office(table[3]):
                        if type in ("fax", "voice") and not validate_phone_number(
                            value
                        ):
                            continue

                        p.add_contact_detail(type=type, value=value, note=location)

        return legs