def test_full_organization():
    create_jurisdictions()
    org = ScrapeOrganization("United Nations", classification="international")
    org.add_identifier("un")
    org.add_name("UN", start_date="1945")
    org.add_contact_detail(type="phone",
                           value="555-555-1234",
                           note="this is fake")
    org.add_link("http://example.com/link")
    org.add_source("http://example.com/source")

    # import org
    od = org.as_dict()
    OrganizationImporter("jid1").import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert "ocd-organization" in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == "un"
    assert o.identifiers.all()[0].scheme == ""

    assert o.other_names.all()[0].name == "UN"
    assert o.other_names.all()[0].start_date == "1945"

    assert o.contact_details.all()[0].type == "phone"
    assert o.contact_details.all()[0].value == "555-555-1234"
    assert o.contact_details.all()[0].note == "this is fake"

    assert o.links.all()[0].url == "http://example.com/link"
    assert o.sources.all()[0].url == "http://example.com/source"
Esempio n. 2
0
 def scrape_committees(self, chamber):
     url = _COMMITTEE_URL % _CHAMBERS[chamber]
     page = self.get(url).text
     html = lxml.html.fromstring(page)
     table = html.xpath(
         "body/section[2]/div/div/section[2]/div[2]/div/div/div/div")
     for row in table[1:]:
         # committee name, description, hours of operation,
         # secretary and office_phone
         text = list(row[0].xpath("div")[0].itertext())
         attributes = [
             list(
                 value.replace(u"\xa0", " ").replace(
                     "Secretary:", "").encode("ascii", "ignore")
                 for value in text if "Email:" not in value
                 and value != "\n" and "Phone:" not in value)
         ]
         for i in range(len(attributes[0])):
             if "Room" in str(attributes[0][i]):
                 attributes[0][i] = (str(
                     attributes[0][i]).split("Room")[0].replace(", ", " "))
         org = Organization(
             chamber=chamber,
             classification="committee",
             name=str(attributes[0][0].decode()),
         )
         if len(attributes[0]) > 5:
             org.add_contact_detail(
                 type="email",
                 value=str(attributes[0][4].decode()),
                 note="District Office",
             )
             org.add_contact_detail(
                 type="voice",
                 value=str(attributes[0][5].decode()),
                 note="District Office",
             )
         else:
             org.add_contact_detail(
                 type="email",
                 value=str(attributes[0][3].decode()),
                 note="District Office",
             )
             org.add_contact_detail(
                 type="voice",
                 value=str(attributes[0][4].decode()),
                 note="District Office",
             )
         org.add_source(url)
         # membership
         td_text = list()
         for td in row[1].xpath("div") + row[2].xpath("div"):
             td_text += td.itertext()
         members = list(value for value in td_text
                        if value != " " and value != "\n" and value != ",")
         role = "member"
         for member in members:
             if member in ["Chair", "Vice Chair"]:
                 role = member.lower()
                 continue
             elif member.strip():
                 org.add_member(member.strip(), role=role)
                 role = "member"
         yield org