コード例 #1
0
    def get_organizations(self):

        # Initialize the Organization class. Use keyword args to set the basic
        # properties.
        secretary_of_state = Organization(
            name="Office of the Secretary of State, State of Arizona",
            classification="office"
        )

        #  
        secretary_of_state.add_contact_detail(
            type="voice",
            value="602-542-4285"
        )

        secretary_of_state.add_contact_detail(
            type="address",
            value="1700 W Washington St Fl 7, Phoenix AZ 85007-2808"
        )
        secretary_of_state.add_link(
            url="http://www.azsos.gov/",
            note="Home page"
        )

        self._secretary_of_state = secretary_of_state

        yield secretary_of_state
コード例 #2
0
def test_full_organization():
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone', value='555-555-1234', note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jurisdiction-id').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
コード例 #3
0
    def get_organizations(self):
        org = Organization(name="Ferguson City Council",
                           classification="legislature")

        org.add_contact_detail(
            type='email',
            value='*****@*****.**'
        )

        org.add_post(
            label="Mayor",
            role="Mayor",
            division_id=self.division_id
        )

        WARDS = 3
        for ward in range(1, WARDS + 1):
            org.add_post(
                label="Council Member Ward {}".format(ward),
                role="Council Member Ward {}".format(ward),
                division_id=self.division_id,
                # num_seats=2,
            )

        yield org
コード例 #4
0
def test_full_organization():
    create_jurisdictions()
    org = ScrapeOrganization('United Nations', classification='international')
    org.add_identifier('un')
    org.add_name('UN', start_date='1945')
    org.add_contact_detail(type='phone',
                           value='555-555-1234',
                           note='this is fake')
    org.add_link('http://example.com/link')
    org.add_source('http://example.com/source')

    # import org
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])

    # get person from db and assert it imported correctly
    o = Organization.objects.get()
    assert 'ocd-organization' in o.id
    assert o.name == org.name

    assert o.identifiers.all()[0].identifier == 'un'
    assert o.identifiers.all()[0].scheme == ''

    assert o.other_names.all()[0].name == 'UN'
    assert o.other_names.all()[0].start_date == '1945'

    assert o.contact_details.all()[0].type == 'phone'
    assert o.contact_details.all()[0].value == '555-555-1234'
    assert o.contact_details.all()[0].note == 'this is fake'

    assert o.links.all()[0].url == 'http://example.com/link'
    assert o.sources.all()[0].url == 'http://example.com/source'
コード例 #5
0
    def categorize_data(self, csv_data):
        return_objs = []
        Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
        for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
            if not line:
                continue

            # cur_obj will be the person or organization that made the contribution
            cur_obj = None
            contribution = Contribution(*line.split(','))
            
            if contribution.Contributor_Type in self.business_contribution_types:
                cur_obj = Organization(contribution.Contributor_Name)
            elif contribution.Contributor_Type in self.individual_contribution_types:
                cur_obj = Person(contribution.Contributor_Name)
            elif contribution.Contributor_Type == 'Unknown/Anonymous':
                if contribution.Contributor_Name: #ignoring un-named contributors
                    #these look like catch-all business contributions
                    cur_obj = Organization(contribution.Contributor_Name)
            if cur_obj: 
                #we don't set cur_obj in the event that there was an 
                #anonymous/unknown contribution without a Contribution_Name
                #so we need to check that it exists before adding to it
                cur_obj.add_source(url=self.search_url)
                cur_obj.source_identified = True
                if contribution.Contributor_Address:
                    cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
                if contribution.Employer_Name:
                    cur_obj.extras['Employer'] = contribution.Employer_Name
                if contribution.Employer_Occupation:
                    cur_obj.extras['Occupation'] = contribution.Employer_Occupation
                
                #recipiant_obj is the organization that received the contribution
                recipiant_obj = Organization(contribution.Receiving_Committee)  
                recipiant_obj.extras['Office'] = contribution.Office
                recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
                recipiant_obj.extras['Fundtype'] = contribution.Fundtype

                #transaction is the event linking the donor and recipiant
                transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
                transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
                transaction.extras['Contribution Type'] = contribution.Contribution_Type
                transaction.add_source(url=self.search_url)
                #transaction.source_identified = True
                transaction.participants.append(cur_obj.as_dict())
                transaction.participants.append(recipiant_obj.as_dict())
                yield (cur_obj, recipiant_obj, transaction)        
            else:
                yield []
コード例 #6
0
    def get_organizations(self):
        secretary_of_state = Organization(
            name="Office of the Secretary of State, State of California",
            classification="office")

        secretary_of_state.add_contact_detail(type="voice",
                                              value="916-653-6814")

        secretary_of_state.add_contact_detail(
            type="address", value="1500 11th Street, Sacramento, CA 95814")

        secretary_of_state.add_link(url="http://www.sos.ca.gov",
                                    note="Home page")

        self._secretary_of_state = secretary_of_state
        yield secretary_of_state
コード例 #7
0
ファイル: committees.py プロジェクト: zenjen777/openstates
 def scrape_committees(self, chamber):
     url = _COMMITTEE_URL % _CHAMBERS[chamber]
     page = self.get(url).text
     html = lxml.html.fromstring(page)
     table = html.xpath(
         'body/section[2]/div/div/div/section[2]/div[2]/div/div/div/div'
     )[1:]
     for row in table:
         # committee name, description, hours of operation,
         # secretary and office_phone
         text = list(row[0].xpath('div')[0].itertext())
         attributes = [
             list(
                 value.replace(u'\xa0', ' ').replace(
                     'Secretary:', '').encode('ascii', 'ignore')
                 for value in text if 'Email:' not in value
                 and value != '\n' and 'Phone:' not in value)
         ]
         for i in range(len(attributes[0])):
             if 'Room' in str(attributes[0][i]):
                 attributes[0][i] = str(
                     attributes[0][i]).split('Room')[0].replace(', ', ' ')
         org = Organization(chamber=chamber,
                            classification="committee",
                            name=str(attributes[0][0].decode()))
         if len(attributes[0]) > 5:
             org.add_contact_detail(type='email',
                                    value=str(attributes[0][4].decode()),
                                    note='District Office')
             org.add_contact_detail(type='voice',
                                    value=str(attributes[0][5].decode()),
                                    note='District Office')
         else:
             org.add_contact_detail(type='email',
                                    value=str(attributes[0][3].decode()),
                                    note='District Office')
             org.add_contact_detail(type='voice',
                                    value=str(attributes[0][4].decode()),
                                    note='District Office')
         org.add_source(url)
         # membership
         for td in row[1].xpath('div'):
             td_text = list(td.itertext())
             members = list(
                 value for value in td_text
                 if value != ' ' and value != '\n' and value != ',')
         role = "member"
         for member in members:
             if (member in ['Chair', 'Vice Chair']):
                 role = member.lower()
                 continue
             else:
                 org.add_member(member.strip(), role=role)
                 role = "member"
         yield org
コード例 #8
0
    def categorize_data(self, csv_data):
        #Is there a better place to define this?
        return_objs = []
        Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
        for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
            if not line:
                continue
            cur_obj = None
            try:
                contribution = Contribution(*line.split(','))
            except Exception as e:
                import pdb; pdb.set_trace()
            if contribution.Contributor_Type in self.business_contribution_types:
                cur_obj = Organization(contribution.Contributor_Name)
            elif contribution.Contributor_Type in self.individual_contribution_types:
                cur_obj = Person(contribution.Contributor_Name)
            elif contribution.Contributor_Type == 'Unknown/Anonymous':
                if contribution.Contributor_Name: #ignoring un-named contributors
                    #these look like catch-all business contributions
                    cur_obj = Organization(contribution.Contributor_Name)
            if cur_obj: 
                cur_obj.add_source(url=self.search_url)
                cur_obj.source_identified = True
                if contribution.Contributor_Address:
                    cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
                if contribution.Employer_Name:
                    cur_obj.extras['Employer'] = contribution.Employer_Name
                if contribution.Employer_Occupation:
                    cur_obj.extras['Occupation'] = contribution.Employer_Occupation
                
                recipiant_obj = Organization(contribution.Receiving_Committee)  
                recipiant_obj.extras['Office'] = contribution.Office
                recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
                recipiant_obj.extras['Fundtype'] = contribution.Fundtype

                transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
                transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
                transaction.extras['Contribution Type'] = contribution.Contribution_Type
                transaction.add_source(url=self.search_url)
                #transaction.source_identified = True
                transaction.participants.append(cur_obj.as_dict())
                transaction.participants.append(recipiant_obj.as_dict())
                yield (cur_obj, recipiant_obj, transaction)        
            else:
                yield [] 
コード例 #9
0
    def get_organizations(self):
        org = Organization(name="Ferguson City Council",
                           classification="legislature")

        org.add_contact_detail(type='email',
                               value='*****@*****.**')

        org.add_post(label="Mayor", role="Mayor", division_id=self.division_id)

        WARDS = 3
        for ward in range(1, WARDS + 1):
            org.add_post(
                label="Council Member Ward {}".format(ward),
                role="Council Member Ward {}".format(ward),
                division_id=self.division_id,
                # num_seats=2,
            )

        yield org
コード例 #10
0
    def scrape_committees(self, repos):
        for repo in repos:
            source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(repo)
            committees = self.fetch_yaml(source)
            for committee in committees:
                org = Organization(committee["name"], classification="committee")

                org.add_source(source)

                for key in committee.keys() & {"url", "rss_url"}:
                    org.add_link(committee[key])

                for key in committee.keys() & {"phone", "address"}:
                    org.add_contact_detail(
                        type="voice", value=committee[key]
                    ) if key == "phone" else org.add_contact_detail(type=key, value=committee[key])

                for key in committee.keys() & {"senate_committee_id", "house_committee_id", "thomas_id"}:
                    org.add_identifier(committee[key], scheme=key)

                if "subcommittees" in committee:
                    for subcommittee in committee["subcommittees"]:
                        sub_org = Organization(subcommittee["name"], classification="committee", parent_id=org._id)

                        sub_org.add_identifier(subcommittee["thomas_id"], scheme="thomas")
                        sub_org.add_source(source)

                        for key in subcommittee.keys() & {"phone", "address"}:
                            sub_org.add_contact_detail(
                                type="voice", value=committee[key]
                            ) if key == "phone" else sub_org.add_contact_detail(type=key, value=committee[key])

                        yield sub_org

                yield org
コード例 #11
0
    def get_organizations(self):

        secretary_of_the_commonwealth = Organization(
            name="Office of the Secretary of the Commonwealth, Commonwealth of Virginia",
            classification="office"
        )
        secretary_of_the_commonwealth.add_contact_detail(
            type="voice",
            value="804-786-2441"
        )
        secretary_of_the_commonwealth.add_contact_detail(
            type="address",
            value="1111 East Broad Street, 4th Floor, Richmond, Virginia 23219"
        )
        secretary_of_the_commonwealth.add_link(
            url="https://commonwealth.virginia.gov/",
            note="Home page"
        )

        self._secretary_of_the_commonwealth = secretary_of_the_commonwealth

        yield secretary_of_the_commonwealth
コード例 #12
0
    def get_organizations(self):
        secretary_of_state = Organization(                                    
            name="Office of the Secretary of State, State of California",        
            classification="office"                                           
        )

        secretary_of_state.add_contact_detail(                                
            type="voice",                                                     
            value="916-653-6814"
        )                    

        secretary_of_state.add_contact_detail(                                
            type="address",                                                   
            value="1500 11th Street, Sacramento, CA 95814"
        )
                                                                     
        secretary_of_state.add_link(                                          
            url="http://www.sos.ca.gov",                                      
            note="Home page"                                                  
        )

        self._secretary_of_state = secretary_of_state
        yield secretary_of_state
コード例 #13
0
ファイル: committees.py プロジェクト: neelneelpurk/openstates
 def scrape_committees(self, chamber):
     url = _COMMITTEE_URL % _CHAMBERS[chamber]
     page = self.get(url, verify=False).text
     html = lxml.html.fromstring(page)
     table = html.xpath('body/section[2]/div/div/div/section[2]/div[2]/div/div/div/div')[1:]
     for row in table:
         # committee name, description, hours of operation,
         # secretary and office_phone
         text = list(row[0].xpath('div')[0].itertext())
         attributes = [list(value.replace(u'\xa0', ' ')
                       .replace('Secretary:', '').encode('ascii', 'ignore')
                       for value in text
                       if 'Email:' not in value and value != '\n' and 'Phone:' not in value)]
         for i in range(len(attributes[0])):
             if 'Room' in str(attributes[0][i]):
                 attributes[0][i] = str(attributes[0][i]).split('Room')[0].replace(', ', ' ')
         org = Organization(chamber=chamber, classification="committee",
                            name=str(attributes[0][0].decode()))
         if len(attributes[0]) > 5:
             org.add_contact_detail(type='email', value=str(attributes[0][4].decode()),
                                    note='District Office')
             org.add_contact_detail(type='voice', value=str(attributes[0][5].decode()),
                                    note='District Office')
         else:
             org.add_contact_detail(type='email', value=str(attributes[0][3].decode()),
                                    note='District Office')
             org.add_contact_detail(type='voice', value=str(attributes[0][4].decode()),
                                    note='District Office')
         org.add_source(url)
         # membership
         for td in row[1].xpath('div'):
             td_text = list(td.itertext())
             members = list(value
                            for value in td_text
                            if value != ' ' and value != '\n' and value != ',')
         role = "member"
         for member in members:
             if (member in ['Chair', 'Vice Chair']):
                 role = member.lower()
                 continue
             else:
                 org.add_member(member.strip(), role=role)
                 role = "member"
         yield org
コード例 #14
0
    def scrape_committees(self, repos):
        for repo in repos:
            source = "https://raw.githubusercontent.com/unitedstates/congress-legislators/master/{0}".format(
                repo)
            committees = self.fetch_yaml(source)
            for committee in committees:
                org = Organization(committee['name'],
                                   classification='committee')

                org.add_source(source)

                for key in committee.keys() & {'url', 'rss_url'}:
                    org.add_link(committee[key])

                for key in committee.keys() & {'phone', 'address'}:
                    org.add_contact_detail(
                        type='voice', value=committee[key]
                    ) if key == 'phone' else org.add_contact_detail(
                        type=key, value=committee[key])

                for key in committee.keys() & {
                        'senate_committee_id', 'house_committee_id',
                        'thomas_id'
                }:
                    org.add_identifier(committee[key], scheme=key)

                if 'subcommittees' in committee:
                    for subcommittee in committee['subcommittees']:
                        sub_org = Organization(subcommittee['name'],
                                               classification="committee",
                                               parent_id=org._id)

                        sub_org.add_identifier(subcommittee['thomas_id'],
                                               scheme="thomas")
                        sub_org.add_source(source)

                        for key in subcommittee.keys() & {'phone', 'address'}:
                            sub_org.add_contact_detail(
                                type='voice', value=committee[key]
                            ) if key == 'phone' else sub_org.add_contact_detail(
                                type=key, value=committee[key])

                        yield sub_org

                yield org
コード例 #15
0
    def get_organizations(self):
        legislature = Organization("United States Congress",
                                   classification='legislature')

        self._legislature = legislature

        yield legislature

        senate = Organization(
            name="United States Senate",
            classification='upper',
            parent_id=legislature._id,
        )

        self._senate = senate

        yield senate

        house = Organization(
            name="United States House",
            classification='lower',
            parent_id=legislature._id,
        )

        self._house = house

        yield house

        sopr = Organization(
            name="Office of Public Record, US Senate",
            classification="office",
            parent_id=senate._id,
        )

        sopr.add_contact_detail(type="voice", value="202-224-0322")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                        "one_item_and_teasers/opr.htm",
                        note="Profile page")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                        "g_three_sections_with_teasers/lobbyingdisc.htm"
                        "#lobbyingdisc=lda",
                        note="Disclosure Home")

        sopr.add_link(url="http://soprweb.senate.gov/index.cfm"
                      "?event=selectfields",
                      note="Disclosure Search Portal")

        sopr.add_link(url="http://soprweb.senate.gov/",
                      note="Disclosure Electronic Filing System")

        self._sopr = sopr

        yield sopr

        house_clerk = Organization(
            name="Office of the Clerk, US House",
            classification="office",
            parent_id=house._id,
        )

        house_clerk.add_contact_detail(type="voice", value="202-225-7000")

        house_clerk.add_source(url="http://clerk.house.gov/", note="Home page")

        self._house_clerk = house_clerk

        yield house_clerk
        yield legislature
コード例 #16
0
ファイル: committees.py プロジェクト: jtroxell1414/openstates
 def scrape_committees(self, chamber):
     url = _COMMITTEE_URL % _CHAMBERS[chamber]
     page = self.get(url).text
     html = lxml.html.fromstring(page)
     table = html.xpath(
         "body/section[2]/div/div/section[2]/div[2]/div/div/div/div")
     for row in table[1:]:
         # committee name, description, hours of operation,
         # secretary and office_phone
         text = list(row[0].xpath("div")[0].itertext())
         attributes = [
             list(
                 value.replace(u"\xa0", " ").replace(
                     "Secretary:", "").encode("ascii", "ignore")
                 for value in text if "Email:" not in value
                 and value != "\n" and "Phone:" not in value)
         ]
         for i in range(len(attributes[0])):
             if "Room" in str(attributes[0][i]):
                 attributes[0][i] = (str(
                     attributes[0][i]).split("Room")[0].replace(", ", " "))
         org = Organization(
             chamber=chamber,
             classification="committee",
             name=str(attributes[0][0].decode()),
         )
         if len(attributes[0]) > 5:
             org.add_contact_detail(
                 type="email",
                 value=str(attributes[0][4].decode()),
                 note="District Office",
             )
             org.add_contact_detail(
                 type="voice",
                 value=str(attributes[0][5].decode()),
                 note="District Office",
             )
         else:
             org.add_contact_detail(
                 type="email",
                 value=str(attributes[0][3].decode()),
                 note="District Office",
             )
             org.add_contact_detail(
                 type="voice",
                 value=str(attributes[0][4].decode()),
                 note="District Office",
             )
         org.add_source(url)
         # membership
         td_text = list()
         for td in row[1].xpath("div") + row[2].xpath("div"):
             td_text += td.itertext()
         members = list(value for value in td_text
                        if value != " " and value != "\n" and value != ",")
         role = "member"
         for member in members:
             if member in ["Chair", "Vice Chair"]:
                 role = member.lower()
                 continue
             elif member.strip():
                 org.add_member(member.strip(), role=role)
                 role = "member"
         yield org
コード例 #17
0
    def get_organizations(self):
        legislature = Organization("United States Congress",
                                   classification='legislature')

        self._legislature = legislature

        yield legislature

        senate = Organization(
            name="United States Senate",
            classification='upper',
            parent_id=legislature._id,
        )

        self._senate = senate

        yield senate

        house = Organization(
            name="United States House",
            classification='lower',
            parent_id=legislature._id,
        )

        self._house = house

        yield house

        sopr = Organization(
            name="Office of Public Record, US Senate",
            classification="office",
            parent_id=senate._id,
        )

        sopr.add_contact_detail(type="voice",
                                value="202-224-0322")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                            "one_item_and_teasers/opr.htm",
                        note="Profile page")

        sopr.add_source(url="http://www.senate.gov/pagelayout/legislative/"
                            "g_three_sections_with_teasers/lobbyingdisc.htm"
                            "#lobbyingdisc=lda",
                        note="Disclosure Home")

        sopr.add_link(url="http://soprweb.senate.gov/index.cfm"
                          "?event=selectfields",
                      note="Disclosure Search Portal")

        sopr.add_link(url="http://soprweb.senate.gov/",
                      note="Disclosure Electronic Filing System")

        self._sopr = sopr

        yield sopr

        house_clerk = Organization(
            name="Office of the Clerk, US House",
            classification="office",
            parent_id=house._id,
        )

        house_clerk.add_contact_detail(type="voice",
                                       value="202-225-7000")

        house_clerk.add_source(url="http://clerk.house.gov/",
                               note="Home page")

        self._house_clerk = house_clerk

        yield house_clerk
        yield legislature
コード例 #18
0
    def transform_parse(self, parsed_form, response):

        _source = {
            "url": response.url,
            "note": "LDA Form LD-1"
        }

        # basic disclosure fields
        _disclosure = Disclosure(
            effective_date=datetime.strptime(
                parsed_form['datetimes']['effective_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            timezone='America/New_York',
            submitted_date=datetime.strptime(
                parsed_form['datetimes']['signature_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            classification="lobbying"
        )

        _disclosure.add_authority(name=self.authority.name,
                                  type=self.authority._type,
                                  id=self.authority._id)

        _disclosure.add_identifier(
            identifier=parsed_form['_meta']['document_id'],
            scheme="urn:sopr:filing"
        )

        # disclosure extras
        _disclosure.extras = {}
        _disclosure.extras['registrant'] = {
            'self_employed_individual': parsed_form['registrant']['self_employed_individual'],
            'general_description': parsed_form['registrant']['registrant_general_description'],
            'signature': {
                "signature_date": parsed_form['datetimes']['signature_date'],
                "signature": parsed_form['signature']
            }
        }

        _disclosure.extras['client'] = {
            'same_as_registrant':
                parsed_form['client']['client_self'],
            'general_description':
                parsed_form['client']['client_general_description']
        }

        _disclosure.extras['registration_type'] = {
            'is_amendment':
                parsed_form['registration_type']['is_amendment'],
            'new_registrant':
                parsed_form['registration_type']['new_registrant'],
            'new_client_for_existing_registrant':
                parsed_form['registration_type'][
                    'new_client_for_existing_registrant'],
        }

        # # Registrant
        # build registrant
        _registrant_self_employment = None

        if parsed_form['registrant']['self_employed_individual']:
            n = ' '.join([p for p in [
                parsed_form['registrant']['registrant_individual_prefix'],
                parsed_form['registrant']['registrant_individual_firstname'],
                parsed_form['registrant']['registrant_individual_lastname']
            ] if len(p) > 0]).strip()

            _registrant = Person(
                name=n,
                source_identified=True
            )

            _registrant_self_employment = Organization(
                name='SELF-EMPLOYMENT of {n}'.format(n=n),
                classification='company',
                source_identified=True
            )

            _registrant.add_membership(
                organization=_registrant_self_employment,
                role='self_employed',
                label='self-employment of {n}'.format(n=n),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )
        else:
            _registrant = Organization(
                name=parsed_form['registrant']['registrant_org_name'],
                classification='company',
                source_identified=True
            )

        if len(parsed_form['registrant']['registrant_house_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_house_id'],
                scheme='urn:house_clerk:registrant'
            )

        if len(parsed_form['registrant']['registrant_senate_id']) > 0:
            _registrant.add_identifier(
                identifier=parsed_form['registrant']['registrant_senate_id'],
                scheme='urn:sopr:registrant'
            )

        registrant_contact_details = [
            {
                "type": "address",
                "note": "contact address",
                "value": '; '.join([
                    p for p in [
                        parsed_form['registrant']['registrant_address_one'],
                        parsed_form['registrant']['registrant_address_two'],
                        parsed_form['registrant']['registrant_city'],
                        parsed_form['registrant']['registrant_state'],
                        parsed_form['registrant']['registrant_zip'],
                        parsed_form['registrant']['registrant_country']]
                    if len(p) > 0]).strip(),
            },
            {
                "type": "voice",
                "note": "contact phone",
                "value": parsed_form['registrant']['registrant_contact_phone'],
            },
            {
                "type": "email",
                "note": "contact email",
                "value": parsed_form['registrant']['registrant_contact_email'],
            },
        ]

        registrant_contact_ppb = {
            "type": "address",
            "note": "principal place of business",
            "value": '; '.join([
                p for p in [
                    parsed_form['registrant']['registrant_ppb_city'],
                    parsed_form['registrant']['registrant_ppb_state'],
                    parsed_form['registrant']['registrant_ppb_zip'],
                    parsed_form['registrant']['registrant_ppb_country']]
                if len(p) > 0]).strip(),
        }

        if registrant_contact_ppb["value"]:
            registrant_contact_details.append(registrant_contact_ppb)

        for cd in registrant_contact_details:
            _registrant.add_contact_detail(**cd)

        _registrant.extras = {
            "contact_details_structured": [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address_one",
                            "value": parsed_form['registrant'][
                                'registrant_address_one'],
                        },
                        {
                            "note": "address_two",
                            "value": parsed_form['registrant'][
                                'registrant_address_two'],
                        },
                        {
                            "note": "city",
                            "value": parsed_form['registrant'][
                                'registrant_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['registrant'][
                                'registrant_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['registrant'][
                                'registrant_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['registrant'][
                                'registrant_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['registrant'][
                                'registrant_ppb_country'],
                        }
                    ],
                },
            ]
        }

        # # People
        # build contact
        _main_contact = Person(
            name=parsed_form['registrant']['registrant_contact_name'],
            source_identified=True
        )

        main_contact_contact_details = [
            {
                "type": "voice",
                "note": "contact phone",
                "value": parsed_form['registrant']['registrant_contact_phone'],
            },
            {
                "type": "email",
                "note": "contact email",
                "value": parsed_form['registrant']['registrant_contact_email'],
            }
        ]

        for cd in main_contact_contact_details:
            _main_contact.add_contact_detail(**cd)

        if _registrant._type == 'organization':
            _registrant.add_member(
                name_or_person=_main_contact,
                role='main_contact',
                label='main contact for {n}'.format(n=_registrant.name),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )
        else:
            _registrant_self_employment.add_member(
                name_or_person=_main_contact,
                role='main_contact',
                label='main contact for {n}'.format(n=_registrant.name),
                start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
            )

        # # Client
        # build client
        _client = Organization(
            name=parsed_form['client']['client_name'],
            classification='company',
            source_identified=True
        )

        client_contact_details = [
            {
                "type": "address",
                "note": "contact address",
                "value": '; '.join([
                    p for p in [
                        parsed_form['client']['client_address'],
                        parsed_form['client']['client_city'],
                        parsed_form['client']['client_state'],
                        parsed_form['client']['client_zip'],
                        parsed_form['client']['client_country']]
                    if len(p) > 0]).strip(),
            },
        ]

        client_contact_ppb = {
            "type": "address",
            "note": "principal place of business",
            "value": '; '.join([
                p for p in [
                    parsed_form['client']['client_ppb_city'],
                    parsed_form['client']['client_ppb_state'],
                    parsed_form['client']['client_ppb_zip'],
                    parsed_form['client']['client_ppb_country']]
                if len(p) > 0]).strip(),
        }

        if client_contact_ppb["value"]:
            client_contact_details.append(client_contact_ppb)

        for cd in client_contact_details:
            _client.add_contact_detail(**cd)

        _client.extras = {
            "contact_details_structured": [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": parsed_form['client']['client_address'],
                        },
                        {
                            "note": "city",
                            "value": parsed_form['client']['client_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['client']['client_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['client']['client_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['client']['client_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value": parsed_form['client']['client_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value": parsed_form['client']['client_ppb_state'],
                        },
                        {
                            "note": "zip",
                            "value": parsed_form['client']['client_ppb_zip'],
                        },
                        {
                            "note": "country",
                            "value": parsed_form['client'][
                                'client_ppb_country'],
                        }
                    ],
                },
            ],
        }

        # Collect Foreign Entities
        _foreign_entities = []
        _foreign_entities_by_name = {}
        for fe in parsed_form['foreign_entities']:
            fe_extras = {}
            fe_name = fe['foreign_entity_name']

            # check for name-based duplicates
            if fe_name in _foreign_entities_by_name:
                _foreign_entity = _foreign_entities_by_name[fe_name]
            else:
                _foreign_entity = Organization(
                    name=fe_name,
                    classification='company',
                    source_identified=True
                )

            # collect contact details
            foreign_entity_contact_details = [
                {
                    "type": "address",
                    "note": "contact address",
                    "value": '; '.join([
                        p for p in [
                            fe['foreign_entity_address'],
                            fe['foreign_entity_city'],
                            fe['foreign_entity_state'],
                            fe['foreign_entity_country']]
                        if len(p) > 0]).strip(),
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "value": '; '.join([
                        p for p in [
                            fe['foreign_entity_ppb_state'],
                            fe['foreign_entity_ppb_country']]
                        if len(p) > 0]).strip(),
                },
            ]

            foreign_entity_contact_ppb = {
                "type": "address",
                "note": "principal place of business",
                "value": '; '.join([
                    p for p in [
                        fe['foreign_entity_ppb_city'],
                        fe['foreign_entity_ppb_state'],
                        fe['foreign_entity_ppb_country']]
                    if len(p) > 0]),
            }

            if foreign_entity_contact_ppb["value"]:
                foreign_entity_contact_details.append(
                    foreign_entity_contact_ppb)

            # add contact details
            for cd in foreign_entity_contact_details:
                if cd['value'] != '':
                    _foreign_entity.add_contact_detail(**cd)

            # add extras
            fe_extras["contact_details_structured"] = [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": fe['foreign_entity_address'],
                        },
                        {
                            "note": "city",
                            "value": fe['foreign_entity_city'],
                        },
                        {
                            "note": "state",
                            "value": fe['foreign_entity_state'],
                        },
                        {
                            "note": "country",
                            "value": fe['foreign_entity_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "state",
                            "value": fe['foreign_entity_ppb_state'],
                        },
                        {
                            "note": "country",
                            "value": fe['foreign_entity_ppb_country'],
                        }
                    ],
                },
            ]

            _foreign_entity.extras = combine_dicts(_foreign_entity.extras,
                                                   fe_extras)

            _foreign_entities_by_name[fe_name] = _foreign_entity

        for unique_foreign_entity in _foreign_entities_by_name.values():
            _foreign_entities.append(unique_foreign_entity)

            # TODO: add a variant on memberships to represent inter-org
            # relationships (associations, ownership, etc)
            #
            # _client['memberships'].append({
            #     "id": _foreign_entity['id'],
            #     "classification": "organization",
            #     "name": _foreign_entity['name'],
            #     "extras": {
            #         "ownership_percentage":
            #             fe['foreign_entity_amount']
            #     }
            # })

        # Collect Lobbyists
        # TODO: deal with wierd non-name line continuation cases (blanks, "continued")
        _lobbyists_by_name = {}

        for l in parsed_form['lobbyists']:
            l_extras = {}
            l_name = ' '.join([l['lobbyist_first_name'],
                               l['lobbyist_last_name'],
                               l['lobbyist_suffix']
                               ]).strip()

            if l_name in _lobbyists_by_name:
                _lobbyist = _lobbyists_by_name[l_name]
            else:
                _lobbyist = Person(
                    name=l_name,
                    source_identified=True
                )

            if l['lobbyist_covered_official_position']:
                l_extras['lda_covered_official_positions'] = [
                    {
                        'date_reported':
                            parsed_form['datetimes']['effective_date'],
                        'covered_official_position':
                            l['lobbyist_covered_official_position']
                    },
                ]

            _lobbyist.extras = combine_dicts(_lobbyist.extras, l_extras)

            _lobbyists_by_name[l_name] = _lobbyist

        _lobbyists = []
        for unique_lobbyist in _lobbyists_by_name.values():
            _lobbyists.append(unique_lobbyist)

        if _registrant._type == 'organization':
            for l in _lobbyists:
                _registrant.add_member(
                    l,
                    role='lobbyist',
                    label='lobbyist for {n}'.format(n=_registrant.name),
                    start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
                )
        else:
            for l in _lobbyists:
                _registrant_self_employment.add_member(
                    l,
                    role='lobbyist',
                    label='lobbyist for {n}'.format(n=_registrant.name),
                    start_date=_disclosure.effective_date.strftime('%Y-%m-%d')
                )

        # # Document
        # build document
        _disclosure.add_document(
            note='submitted filing',
            date=parsed_form['datetimes']['effective_date'][:10],
            url=response.url
        )

        # Collect Affiliated orgs
        _affiliated_organizations = []
        _affiliated_organizations_by_name = {}
        for ao in parsed_form['affiliated_organizations']:
            ao_extras = {}
            ao_name = ao['affiliated_organization_name']
            if ao_name in _affiliated_organizations_by_name:
                # There's already one by this name
                _affiliated_organization = _affiliated_organizations_by_name[ao_name]
            else:
                # New affiliated org
                _affiliated_organization = Organization(
                    name=ao_name,
                    classification='company',
                    source_identified=True
                )

            # collect contact details
            affiliated_organization_contact_details = [
                {
                    "type": "address",
                    "note": "contact address",
                    "value": '; '.join([
                        p for p in [
                            ao['affiliated_organization_address'],
                            ao['affiliated_organization_city'],
                            ao['affiliated_organization_state'],
                            ao['affiliated_organization_zip'],
                            ao['affiliated_organization_country']]
                        if len(p) > 0]).strip(),
                },
            ]

            affiliated_organization_contact_ppb = {
                "type": "address",
                "note": "principal place of business",
                "value": '; '.join([
                    p for p in [
                        ao['affiliated_organization_ppb_city'],
                        ao['affiliated_organization_ppb_state'],
                        ao['affiliated_organization_ppb_country']]
                    if len(p) > 0]).strip(),
            }

            if affiliated_organization_contact_ppb["value"]:
                affiliated_organization_contact_details.append(
                    affiliated_organization_contact_ppb)

            # add contact details
            for cd in affiliated_organization_contact_details:
                _affiliated_organization.add_contact_detail(**cd)

            ao_extras["contact_details_structured"] = [
                {
                    "type": "address",
                    "note": "contact address",
                    "parts": [
                        {
                            "note": "address",
                            "value": ao['affiliated_organization_address'],
                        },
                        {
                            "note": "city",
                            "value": ao['affiliated_organization_city'],
                        },
                        {
                            "note": "state",
                            "value": ao['affiliated_organization_state'],
                        },
                        {
                            "note": "zip",
                            "value": ao['affiliated_organization_zip'],
                        },
                        {
                            "note": "country",
                            "value": ao['affiliated_organization_country'],
                        }
                    ],
                },
                {
                    "type": "address",
                    "note": "principal place of business",
                    "parts": [
                        {
                            "note": "city",
                            "value":
                                ao['affiliated_organization_ppb_city'],
                        },
                        {
                            "note": "state",
                            "value":
                                ao['affiliated_organization_ppb_state'],
                        },
                        {
                            "note": "country",
                            "value":
                                ao['affiliated_organization_ppb_country'],
                        }
                    ],
                },
            ],

            _affiliated_organization.extras = combine_dicts(
                _affiliated_organization.extras, ao_extras)

        for unique_affiliated_organization in _affiliated_organizations_by_name.values():
            _affiliated_organizations.append(unique_affiliated_organization)

        # # Events & Agendas
        # name
        if parsed_form['registration_type']['new_registrant']:
            registration_type = 'New Client, New Registrant'
        elif parsed_form['registration_type']['is_amendment']:
            registration_type = 'Amended Registration'
        else:
            registration_type = 'New Client for Existing Registrant'

        # Create registration event
        _event = Event(
            name="{rn} - {rt}, {cn}".format(rn=_registrant.name,
                                            rt=registration_type,
                                            cn=_client.name),
            timezone='America/New_York',
            location='United States',
            start_time=datetime.strptime(
                parsed_form['datetimes']['effective_date'],
                '%Y-%m-%d %H:%M:%S').replace(tzinfo=UTC),
            classification='registration'
        )

        # add participants
        _event.add_participant(type=_registrant._type,
                               id=_registrant._id,
                               name=_registrant.name,
                               note="registrant")

        if _registrant._type == 'person':
            _event.add_participant(type=_registrant._type,
                                   id=_registrant._id,
                                   name=_registrant.name,
                                   note="registrant")

        _event.add_participant(type=_client._type,
                               id=_client._id,
                               name=_client.name,
                               note="client")

        for l in _lobbyists:
            _event.add_participant(type=l._type,
                                   id=l._id,
                                   name=l.name,
                                   note='lobbyist')

        for fe in _foreign_entities:
            _event.add_participant(type=fe._type,
                                   id=fe._id,
                                   name=fe.name,
                                   note='foreign_entity')

        for ao in _affiliated_organizations:
            _event.add_participant(type=ao._type,
                                   id=ao._id,
                                   name=ao.name,
                                   note='affiliated_organization')

        # add agenda item
        _agenda = _event.add_agenda_item(
            description='issues lobbied on',
        )

        _agenda['notes'].append(
            parsed_form['lobbying_issues_detail']
        )

        for li in parsed_form['lobbying_issues']:
            if li['general_issue_area'] != '':
                _agenda.add_subject(li['general_issue_area'])

        _disclosure.add_disclosed_event(
            name=_event.name,
            type=_event._type,
            classification=_event.classification,
            id=_event._id
        )

        # add registrant to disclosure's _related and related_entities fields
        _disclosure.add_registrant(name=_registrant.name,
                                   type=_registrant._type,
                                   id=_registrant._id)

        _registrant.add_source(
            url=_source['url'],
            note='registrant'
        )
        yield _registrant

        if _registrant_self_employment is not None:
            _registrant_self_employment.add_source(
                url=_source['url'],
                note='registrant_self_employment'
            )

            yield _registrant_self_employment

        _client.add_source(
            url=_source['url'],
            note='client'
        )
        yield _client

        _main_contact.add_source(
            url=_source['url'],
            note='main_contact'
        )
        yield _main_contact

        for ao in _affiliated_organizations:
            ao.add_source(
                url=_source['url'],
                note='affiliated_organization'
            )
            yield ao
        for fe in _foreign_entities:
            fe.add_source(
                url=_source['url'],
                note='foreign_entity'
            )
            yield fe
        for l in _lobbyists:
            l.add_source(
                url=_source['url'],
                note='lobbyist'
            )
            yield l

        _event.add_source(**_source)
        yield _event
        _disclosure.add_source(**_source)
        yield _disclosure