def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1',
                      '1900',
                      'Axe & Tack Tax Act',
                      classification='tax bill',
                      chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass')
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry, ) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
Beispiel #2
0
def test_bill_sponsor_by_identifier():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
                      classification='tax bill', chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass')
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])
    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id,
                              organization_id=org.id)

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
def test_bill_sponsor_limit_lookup():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1',
                      '1900',
                      'Axe & Tack Tax Act',
                      classification='tax bill',
                      chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01")
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])

    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id, organization_id=org.id)

    zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01")
    zs2.add_identifier(identifier='TOTALLY_REAL_ID',
                       scheme='TOTALLY_REAL_SCHEME')

    # This is contrived and perhaps broken, but we're going to check this.
    # We *really* don't want to *ever* cross jurisdiction bounds.
    PersonImporter('another-jurisdiction').import_data([zs.as_dict()])

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry, ) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
    assert entry.person.birth_date == "1800-01-01"
Beispiel #4
0
def test_bill_sponsor_limit_lookup():
    create_jurisdiction()
    org = create_org()

    bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act',
                      classification='tax bill', chamber='lower')
    bill.add_sponsorship_by_identifier(name="SNODGRASS",
                                       classification='sponsor',
                                       entity_type='person',
                                       primary=True,
                                       identifier="TOTALLY_REAL_ID",
                                       scheme="TOTALLY_REAL_SCHEME")

    oi = OrganizationImporter('jid')
    pi = PersonImporter('jid')

    zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01")
    zs.add_identifier(identifier='TOTALLY_REAL_ID',
                      scheme='TOTALLY_REAL_SCHEME')
    pi.import_data([zs.as_dict()])

    za_db = Person.objects.get()
    Membership.objects.create(person_id=za_db.id,
                              organization_id=org.id)

    zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01")
    zs2.add_identifier(identifier='TOTALLY_REAL_ID',
                       scheme='TOTALLY_REAL_SCHEME')

    # This is contrived and perhaps broken, but we're going to check this.
    # We *really* don't want to *ever* cross jurisdiction bounds.
    PersonImporter('another-jurisdiction').import_data([zs.as_dict()])

    BillImporter('jid', oi, pi).import_data([bill.as_dict()])

    obj = Bill.objects.get()
    (entry,) = obj.sponsorships.all()
    assert entry.person.name == "Zadock Snodgrass"
    assert entry.person.birth_date == "1800-01-01"
    def scrape_bills(self):
        """
        Does the following

        1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module
        2) Iterates over bill data and converts each one to an OCD-compliant bill model.
        3) Yields the OCD-compliant bill model instance
        @return: yield Bill instance
        """

        # run scraper first to pull in all the bill data
        self.run_unitedstates_bill_scraper()
        # iterate over all the files and build and yield Bill objects
        for filename in find_files(settings.SCRAPED_DATA_DIR, '.*[a-z]*\/[a-z]*[0-9]*\/data\.json'):
            try:
                with open(filename) as json_file:
                    json_data = json.load(json_file)
                    # Initialize Object
                    bill = Bill(self.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'],
                                json_data['congress'],
                                json_data['official_title'],
                                chamber=self.TYPE_MAP[json_data['bill_type']]['chamber']
                    )

                    # Basics
                    bill.type = [json_data['bill_type']]
                    bill.subject = json_data['subjects']
                    bill.add_summary(json_data['summary']['as'],
                                     json_data['summary']['text'],
                                     json_data['summary']['date'])

                    # Common Fields
                    bill.sources = [{'url': json_data['url'], 'note': 'all'}]

                    # Other/Related Bills
                    bill.other_titles = [{'note': t['type'], 'title': t['title']} for t in json_data['titles']]
                    # change value of relationship_type to 'type' field from json_data when permitted by schema
                    bill.related_bills = [{'session': b['session'], 'name': b['name'], 'relationship_type':'companion'}
                                          for b in json_data['related_bills']]

                    # add primary sponsor
                    bill.add_sponsorship_by_identifier(json_data['sponsor']['name'], 'person', 'person', True,
                                                       scheme='thomas_id',
                                                       identifier=json_data['sponsor']['thomas_id'],
                                                       chamber=self.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add cosponsors
                    for cs in json_data['cosponsors']:
                        bill.add_sponsorship_by_identifier(cs['name'], 'person', 'person', False,
                                                           scheme='thomas_id', identifier=cs['thomas_id'],
                                                           chamber=self.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add introduced_at and actions
                    bill.actions.append({'date': json_data['introduced_at'], 'type': 'introduced',
                                         'description': 'date of introduction',
                                         'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'],
                                         'related_entities': []})
                    for action in json_data['actions']:
                        bill.actions.append({'date': action['acted_at'],
                                             'type': [action['type']],
                                             'description': action['text'],
                                             'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'],
                                             'related_entities': []
                                             })

                    # add bill versions
                    for version_path in find_files(os.path.join(settings.SCRAPED_DATA_DIR,
                                                   'data', bill.session, 'bills', json_data['bill_type'],
                                                   json_data['bill_type'] + json_data['number'],
                                                   'text-versions'), '*\.json'):
                        try:
                            with open(version_path) as version_file:
                                version_json_data = json.load(version_file)
                                for k, v in version_json_data['urls'].iteritems():
                                    bill.versions.append({'date': version_json_data['issued_on'],
                                                          'type': version_json_data['version_code'],
                                                          'name': self.VERSION_MAP[version_json_data['version_code']],
                                                          'links': [{'mimetype': k, 'url': v}]})
                        except IOError:
                            print("Unable to open or parse file with path " + version_path)
                            continue

                    yield bill

            except IOError:
                print("Unable to open or parse file with path " + filename)
                continue
Beispiel #6
0
    def scrape_bills(self):
        """
        Does the following

        1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module
        2) Iterates over bill data and converts each one to an OCD-compliant bill model.
        3) Yields the OCD-compliant bill model instance
        @return: yield Bill instance
        """

        # run scraper first to pull in all the bill data
        self.run_unitedstates_bill_scraper()
        # iterate over all the files and build and yield Bill objects
        for filename in find_files(settings.SCRAPED_DATA_DIR,
                                   '.*[a-z]*\/[a-z]*[0-9]*\/data\.json'):
            try:
                with open(filename) as json_file:
                    json_data = json.load(json_file)
                    # Initialize Object
                    bill = Bill(
                        self.TYPE_MAP[json_data['bill_type']]['canonical'] +
                        ' ' + json_data['number'],
                        json_data['congress'],
                        json_data['official_title'],
                        chamber=self.TYPE_MAP[
                            json_data['bill_type']]['chamber'])

                    # Basics
                    bill.type = [json_data['bill_type']]
                    bill.subject = json_data['subjects']
                    bill.add_summary(json_data['summary']['as'],
                                     json_data['summary']['text'],
                                     json_data['summary']['date'])

                    # Common Fields
                    bill.sources = [{'url': json_data['url'], 'note': 'all'}]

                    # Other/Related Bills
                    bill.other_titles = [{
                        'note': t['type'],
                        'title': t['title']
                    } for t in json_data['titles']]
                    # change value of relationship_type to 'type' field from json_data when permitted by schema
                    bill.related_bills = [{
                        'session': b['session'],
                        'name': b['name'],
                        'relationship_type': 'companion'
                    } for b in json_data['related_bills']]

                    # add primary sponsor
                    bill.add_sponsorship_by_identifier(
                        json_data['sponsor']['name'],
                        'person',
                        'person',
                        True,
                        scheme='thomas_id',
                        identifier=json_data['sponsor']['thomas_id'],
                        chamber=self.TYPE_MAP[
                            json_data['bill_type']]['chamber'])

                    # add cosponsors
                    for cs in json_data['cosponsors']:
                        bill.add_sponsorship_by_identifier(
                            cs['name'],
                            'person',
                            'person',
                            False,
                            scheme='thomas_id',
                            identifier=cs['thomas_id'],
                            chamber=self.TYPE_MAP[
                                json_data['bill_type']]['chamber'])

                    # add introduced_at and actions
                    bill.actions.append({
                        'date':
                        json_data['introduced_at'],
                        'type':
                        'introduced',
                        'description':
                        'date of introduction',
                        'actor':
                        self.TYPE_MAP[json_data['bill_type']]['chamber'],
                        'related_entities': []
                    })
                    for action in json_data['actions']:
                        bill.actions.append({
                            'date':
                            action['acted_at'],
                            'type': [action['type']],
                            'description':
                            action['text'],
                            'actor':
                            self.TYPE_MAP[json_data['bill_type']]['chamber'],
                            'related_entities': []
                        })

                    # add bill versions
                    for version_path in find_files(
                            os.path.join(
                                settings.SCRAPED_DATA_DIR, 'data',
                                bill.session, 'bills', json_data['bill_type'],
                                json_data['bill_type'] + json_data['number'],
                                'text-versions'), '*\.json'):
                        try:
                            with open(version_path) as version_file:
                                version_json_data = json.load(version_file)
                                for k, v in version_json_data[
                                        'urls'].iteritems():
                                    bill.versions.append({
                                        'date':
                                        version_json_data['issued_on'],
                                        'type':
                                        version_json_data['version_code'],
                                        'name':
                                        self.VERSION_MAP[
                                            version_json_data['version_code']],
                                        'links': [{
                                            'mimetype': k,
                                            'url': v
                                        }]
                                    })
                        except IOError:
                            print("Unable to open or parse file with path " +
                                  version_path)
                            continue

                    yield bill

            except IOError:
                print("Unable to open or parse file with path " + filename)
                continue
Beispiel #7
0
    def _scrape_bills(self):
        """
        Does the following

        1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module
        2) Iterates over bill data and converts each one to an OCD-compliant bill model.
        3) Yields the OCD-compliant bill model instance

        @return: generator for federal US bills in OCD-compliant format
        @rtype: generator
        """

        # run scraper first to pull in all the bill data
        self._run_unitedstates_bill_scraper()
        # iterate over all the files and build and yield Bill objects
        for filename in find_files(settings.SCRAPED_DATA_DIR, '.*/data/[0-9]+/bills/[^\/]+/[^\/]+/data.json'):
            try:
                with open(filename) as json_file:
                    json_data = json.load(json_file)

                    # Initialize Object
                    bill = Bill(constants.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'],
                                json_data['congress'],
                                json_data['official_title'],
                                chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber']
                    )

                    # add source of data
                    bill.add_source(json_data['url'], note='all')

                    # add subjects
                    for subject in json_data['subjects']:
                        bill.add_subject(subject)

                    # add summary
                    if 'summary' in json_data and json_data['summary'] is not None:
                        bill.add_abstract(json_data['summary']['text'],
                                          json_data['summary']['as'],
                                          json_data['summary']['date'])

                    # add titles
                    for item in json_data['titles']:
                        bill.add_title(item['title'], item['type'])

                    # add other/related Bills
                    for b in json_data['related_bills']:
                        if 'type' in b and b['type'] == 'bill':
                            split = b['bill_id'].split('-')
                            m = UnitedStatesBillScraper.BILL_SPLIT.match(split[0])

                            bill.add_related_bill(constants.TYPE_MAP[m.group(1)]['canonical'] + ' ' + m.group(2),
                                                  legislative_session=split[1],
                                                  relation_type='companion')

                    # add sponsor
                    bill.add_sponsorship_by_identifier(json_data['sponsor']['name'], 'person', 'person', True,
                                                       scheme='thomas_id', identifier=json_data['sponsor']['thomas_id'],
                                                       chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add cosponsors
                    for cs in json_data['cosponsors']:
                        bill.add_sponsorship_by_identifier(cs['name'], 'person', 'person', False,
                                                           scheme='thomas_id', identifier=cs['thomas_id'],
                                                           chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'])

                    # add introduced_at and actions
                    bill.add_action('date of introduction', datetime_to_date(json_data['introduced_at']),
                                    chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'],
                                    related_entities=[])

                    # add other actions
                    for action in json_data['actions']:
                        bill.actions.append({'date': datetime_to_date(action['acted_at']),
                                             'type': [action['type']],
                                             'description': action['text'],
                                             'actor': constants.TYPE_MAP[json_data['bill_type']]['chamber'],
                                             'related_entities': []
                                             })

                    # add bill versions
                    for version_path in find_files(os.path.join(settings.SCRAPED_DATA_DIR,
                                                   'data', bill.legislative_session, 'bills', json_data['bill_type'],
                                                   json_data['bill_type'] + json_data['number'],
                                                   'text-versions'), '/.*/*\.json'):
                        try:
                            with open(version_path) as version_file:
                                version_json_data = json.load(version_file)
                                for k, v in version_json_data['urls'].items():
                                    bill.versions.append({'date': datetime_to_date(version_json_data['issued_on']),
                                      'type': version_json_data['version_code'],
                                      'name': constants.VERSION_MAP[version_json_data['version_code']],
                                      'links': [{'mimetype': k, 'url': v}]})
                        except IOError:
                            print("Unable to open or parse file with path " + version_path)
                            continue

                    # finally yield bill object
                    yield bill

            except IOError:
                print("Unable to open file with path " + filename)
                print(traceback.format_exc())
                continue
            except KeyError:
                print("Unable to parse file with path " + filename)
                print(traceback.format_exc())
                continue
            except:
                print('Unknown error with ' + filename)
                print(traceback.format_exc())
                continue