def test_fields(self): q = model.Session.query(model.Package) names = [pkg.name for pkg in q.all()] pkg1 = model.Package.by_name(u'uk_official_holdings_of_international_reserves') cereals = model.Package.by_name(u'cereals_and_oilseeds_production_harvest') custody = model.Package.by_name(u'end_of_custody_licence_release_and_recalls') probation = model.Package.by_name(u'probation_statistics_brief') assert pkg1, names assert cereals, names assert custody, names assert probation, names assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title assert pkg1.notes.startswith("Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), pkg1.notes assert len(pkg1.resources) == 1, pkg1.resources assert pkg1.resources[0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[0] assert_equal(pkg1.resources[0].description, 'December 2009') assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345') assert len(custody.resources) == 2, custody.resources assert custody.resources[0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0] assert_equal(custody.resources[0].description, 'November 2009') assert_equal(custody.resources[0].extras['hub-id'], '119-36836') assert custody.resources[1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[0] assert_equal(custody.resources[1].description, 'December 2009') assert_equal(custody.resources[1].extras['hub-id'], '119-36838') assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras['date_released'] assert probation.extras['date_released'] == u'2010-01-04', probation.extras['date_released'] assert_equal(strip_organisation_id(pkg1.extras['published_by']), u"Her Majesty's Treasury [some_number]") assert_equal(strip_organisation_id(cereals.extras['published_by']), u"Department for Environment, Food and Rural Affairs [some_number]") assert_equal(strip_organisation_id(custody.extras['published_by']), u"Ministry of Justice [some_number]") assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category'] assert pkg1.extras['geographic_coverage'] == '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)', pkg1.extras['geographic_coverage'] assert pkg1.extras['national_statistic'] == 'no', pkg1.extras['national_statistic'] assert cereals.extras['national_statistic'] == 'yes', cereals.extras['national_statistic'] assert custody.extras['national_statistic'] == 'no', custody.extras['national_statistic'] assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes assert pkg1.extras['geographical_granularity'] == 'UK and GB', pkg1.extras['geographical_granularity'] assert 'Language: English' in pkg1.notes, pkg1.notes def check_tags(pkg, tags_list): pkg_tags = [tag.name for tag in pkg.tags] for tag in tags_list: assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % (tag, pkg_tags) check_tags(pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance')) check_tags(cereals, ('environment', 'farming')) check_tags(custody, ('public-order-justice-and-rights', 'justice-system', 'prisons')) assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras['external_reference'] assert 'Open Government Licence' in pkg.license.title, pkg.license.title assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras['update_frequency'] assert custody.extras['update_frequency'] == u'monthly', custody.extras['update_frequency'] assert pkg1.author == u"Her Majesty's Treasury", pkg1.author assert cereals.author == u'Department for Environment, Food and Rural Affairs', cereals.author assert custody.author == u'Ministry of Justice', custody.author for pkg in (pkg1, cereals, custody): assert pkg.extras['import_source'].startswith('ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])
def test_record_2_package(self): record = OrderedDict([ (u'title', u'UK Official Holdings of International Reserves - December 2009'), (u'link', u'http://www.hm-treasury.gov.uk/national_statistics.htm'), (u'description', u"Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), (u'pubDate', u'Wed, 06 Jan 2010 09:30:00 GMT'), (u'guid', u'http://www.statistics.gov.uk/hub/id/119-36345'), (u'hub:source-agency', u'HM Treasury'), (u'hub:theme', u'Economy'), (u'hub:coverage', u'UK'), (u'hub:designation', u''), (u'hub:geographic-breakdown', u'UK and GB'), (u'hub:language', u'English'), (u'hub:ipsv', u'Economics and finance'), (u'hub:keywords', u'reserves;currency;assets;liabilities;gold;reserves;currency;assets;liabilities;gold'), (u'hub:altTitle', u'UK Reserves'), (u'hub:nscl', u'Economy;Government Receipts and Expenditure;Public Sector Finance;Economy;Government Receipts and Expenditure;Public Sector Finance')]) ons_importer_ = importer.OnsImporter(filepaths=SAMPLE_FILEPATH_1) package_dict = ons_importer_.record_2_package(record) expected_package_dict = OrderedDict([ ('name', u'uk_official_holdings_of_international_reserves'), ('title', u'UK Official Holdings of International Reserves'), ('version', None), ('url', None), ('author', u"Her Majesty's Treasury"), ('author_email', None), ('maintainer', None), ('maintainer_email', None), ('notes', u"Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities.\n\nSource agency: HM Treasury\n\nLanguage: English\n\nAlternative title: UK Reserves"), ('license_id', u'uk-ogl'), ('tags', [u'assets', u'currency', u'economics-and-finance', u'economy', u'gold', u'government-receipts-and-expenditure', u'liabilities', u'public-sector-finance', u'reserves']), ('groups', []), ('resources', [OrderedDict([ ('url', u'http://www.hm-treasury.gov.uk/national_statistics.htm'), ('description', u'December 2009'), ('hub-id', u'119-36345'), ])]), ('extras', OrderedDict([ ('geographic_coverage', u'111100: United Kingdom (England, Scotland, Wales, Northern Ireland)'), ('geographical_granularity', u'UK and GB'), ('external_reference', u'ONSHUB'), ('temporal_granularity', u''), ('date_updated', u''), ('precision', u''), ('temporal_coverage-to', u''), ('temporal_coverage-from', u''), ('national_statistic', 'no'), ('update_frequency', 'monthly'), ('import_source', 'ONS-ons_hub_sample.xml'), ('date_released', '2010-01-06'), ('categories', u'Economy'), ('series', u'UK Official Holdings of International Reserves'), ('published_by', u"Her Majesty's Treasury [some_number]"), ('published_via', u''), ])), ]) for extra_key in ('published_by', 'published_via'): package_dict['extras'][extra_key] = strip_organisation_id(package_dict['extras'][extra_key]) PackageDictUtil.check_dict(package_dict, expected_package_dict)
def test_publishers(self): expected_results = [ # (hub:source-agency value, published_by, published_via) ('HM Treasury', 'Her Majesty\'s Treasury [some_number]', ''), ('Information Centre for Health and Social Care', 'NHS Information Centre for Health and Social Care [some_number]', ''), ('Environment (Northern Ireland)', 'Northern Ireland Executive [some_number]', 'Department of the Environment [some_number]'), ('Office for National Statistics', 'Office for National Statistics [some_number]', ''), ] for source_agency, expected_published_by, expected_published_via in expected_results: department, agency, published_by, published_via = importer.OnsImporter._source_to_organisations(source_agency) assert published_by is not None, source_agency assert published_via is not None, source_agency published_by = strip_organisation_id(published_by) published_via = strip_organisation_id(published_via) assert_equal(published_by, expected_published_by or u'') assert_equal(published_via, expected_published_via or u'')
def test_dept_to_organisation(self): for source_agency in get_ons_producers(): publisher = DrupalHelper.department_or_agency_to_organisation( source_agency) assert publisher, source_agency publisher = strip_organisation_id(publisher) assert '[some_number]' in publisher, publisher
def test_welsh_package(self): pkg = model.Package.by_name(u'annual_survey_of_hours_and_earnings_') assert pkg assert_equal(strip_organisation_id(pkg.extras['published_by']), 'Welsh Government [some_number]') assert len(pkg.resources) == 1, pkg.resources assert '2008 Results' in pkg.resources[0].description, pkg.resources
def test_packages(self): pkg = model.Package.by_name(u'regional_labour_market_statistics') assert pkg assert pkg.title == 'Regional Labour Market Statistics', pkg.title assert_equal(strip_organisation_id(pkg.extras['published_by']), 'Office for National Statistics [some_number]') assert len(pkg.resources) == 9, pkg.resources assert_equal(pkg.extras['date_released'], '2010-08-10') assert_equal(pkg.extras['date_updated'], '2010-08-13')
def test_reload(self): # Check that another package has not been created assert self.pkg_dict['name'] == self.orig_pkg_dict[ 'name'], self.pkg_dict['name'] pkg1 = model.Package.by_name(self.orig_pkg_dict['name']) assert_equal(strip_organisation_id(pkg1.extras.get('published_by')), u'Office for National Statistics [some_number]')
def test_ons_package(self): pkg = model.Package.by_name(u'annual_survey_of_hours_and_earnings') assert pkg assert_equal(strip_organisation_id(pkg.extras.get('published_by')), 'Office for National Statistics [some_number]') assert 'Office for National Statistics' in pkg.notes, pkg.notes assert len(pkg.resources) == 2, pkg.resources assert '2007 Results Phase 3 Tables' in pkg.resources[1].description, pkg.resources assert '2007 Pensions Results' in pkg.resources[0].description, pkg.resources
def test_packages(self): pkg = model.Package.by_name(u'regional_labour_market_statistics') assert pkg assert pkg.title == 'Regional Labour Market Statistics', pkg.title assert_equal(strip_organisation_id(pkg.extras['published_by']), 'Office for National Statistics [some_number]') assert len(pkg.resources) == 9, pkg.resources assert_equal(pkg.extras['date_released'], '2010-08-10') assert_equal(pkg.extras['date_updated'], '2010-08-13')
def test_get_package_edit_form(self): package = self.get_package_by_name(self.package_name) form, ret_status = self.get_package_edit_form(package.id, package_form='package_gov3') prefix = 'Package-%s-' % package.id self.assert_formfield(form, prefix + 'name', package.name) expected_values = dict([(key, value) for key, value in package.extras.items()]) expected_values['temporal_coverage-to'] = '6/2009' expected_values['temporal_coverage-from'] = '12:30 24/6/2008' expected_values['date_updated'] = '12:30 30/7/2009' expected_values['date_released'] = '30/7/2009' expected_values['date_update_future'] = '1/7/2009' expected_values['published_by'] = strip_organisation_id(expected_values['published_by']) expected_values['published_via'] = strip_organisation_id(expected_values['published_via']) del expected_values['national_statistic'] # restricted over form api del expected_values['geographic_coverage'] # don't test here del expected_values['external_reference'] del expected_values['import_source'] for key, value in expected_values.items(): self.assert_formfield(form, prefix + key, value)
def test_ons_package(self): pkg = model.Package.by_name(u'annual_survey_of_hours_and_earnings') assert pkg assert_equal(strip_organisation_id(pkg.extras.get('published_by')), 'Office for National Statistics [some_number]') assert 'Office for National Statistics' in pkg.notes, pkg.notes assert len(pkg.resources) == 2, pkg.resources assert '2007 Results Phase 3 Tables' in pkg.resources[ 1].description, pkg.resources assert '2007 Pensions Results' in pkg.resources[ 0].description, pkg.resources
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(cps_record) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id(pkg_dict['extras'][key]) print(pkg_dict) PackageDictUtil.check_dict(pkg_dict, cps_pkg_dict) expected_keys = set([key for key, value in cps_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def assert_formfield(self, form, name, expected): ''' Checks a specified form field exists, and if you give a non-None expected value, then it checks that too. ''' assert name in form.fields, 'No field named %r out of:\n%s' % \ (name, '\n'.join(sorted(form.fields))) if expected is not None: field = form[name] value = field.value value = strip_organisation_id(value) assert value == expected, 'Field %r: %r != %r' % \ (field.name, value, expected)
def assert_formfield(self, form, name, expected): ''' Checks a specified form field exists, and if you give a non-None expected value, then it checks that too. ''' assert name in form.fields, 'No field named %r out of:\n%s' % \ (name, '\n'.join(sorted(form.fields))) if expected is not None: field = form[name] value = field.value value = strip_organisation_id(value) assert value == expected, 'Field %r: %r != %r' % \ (field.name, value, expected)
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(cps_record) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id( pkg_dict['extras'][key]) print(pkg_dict) PackageDictUtil.check_dict(pkg_dict, cps_pkg_dict) expected_keys = set([key for key, value in cps_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def setup_class(self): super(TestNationalParkDuplicate, self).setup_class() filepath = sample_filepath(6) importer_ = importer.OnsImporter(filepath) pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()] self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental' for pkg_dict in pkg_dicts: assert pkg_dict['name'] == self.name, pkg_dict['name'] assert pkg_dict['title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]') assert pkg_dict['extras']['published_via'] == '', pkg_dict loader = OnsLoader(self.testclient) res = loader.load_packages(pkg_dicts) assert res['num_errors'] == 0, res
def test_get_package_edit_form(self): package = self.get_package_by_name(self.package_name) form, ret_status = self.get_package_edit_form( package.id, package_form='package_gov3') prefix = 'Package-%s-' % package.id self.assert_formfield(form, prefix + 'name', package.name) expected_values = dict([(key, value) for key, value in package.extras.items()]) expected_values['temporal_coverage-to'] = '6/2009' expected_values['temporal_coverage-from'] = '12:30 24/6/2008' expected_values['date_updated'] = '12:30 30/7/2009' expected_values['date_released'] = '30/7/2009' expected_values['date_update_future'] = '1/7/2009' expected_values['published_by'] = strip_organisation_id( expected_values['published_by']) expected_values['published_via'] = strip_organisation_id( expected_values['published_via']) del expected_values['national_statistic'] # restricted over form api del expected_values['geographic_coverage'] # don't test here del expected_values['external_reference'] del expected_values['import_source'] for key, value in expected_values.items(): self.assert_formfield(form, prefix + key, value)
def setup_class(self): super(TestOnsLoadSeries, self).setup_class() for filepath in [sample_filepath('4a'), sample_filepath('4b')]: importer_ = importer.OnsImporter(filepath) pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()] for pkg_dict in pkg_dicts: assert pkg_dict['title'] == 'Regional Labour Market Statistics', pkg_dict assert_equal(strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]') assert '2010-08-' in pkg_dict['extras']['date_released'], pkg_dict assert pkg_dict['extras']['date_updated'] == '', pkg_dict loader = OnsLoader(self.testclient) res = loader.load_packages(pkg_dicts) assert res['num_errors'] == 0, res
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(example_record) log = self.importer.get_log() assert_equal(len(log), 0, log) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id(pkg_dict['extras'][key]) PackageDictUtil.check_dict(pkg_dict, example_pkg_dict) expected_keys = set([key for key, value in example_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(example_record) log = self.importer.get_log() assert_equal(len(log), 0, log) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id( pkg_dict['extras'][key]) PackageDictUtil.check_dict(pkg_dict, example_pkg_dict) expected_keys = set([key for key, value in example_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def setup_class(self): super(TestNationalParkDuplicate, self).setup_class() filepath = sample_filepath(6) importer_ = importer.OnsImporter(filepath) pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()] self.name = u'national_park_parliamentary_constituency_and_ward_level_mid-year_population_estimates_experimental' for pkg_dict in pkg_dicts: assert pkg_dict['name'] == self.name, pkg_dict['name'] assert pkg_dict[ 'title'] == 'National Park, Parliamentary Constituency and Ward level mid-year population estimates (experimental)', pkg_dict assert_equal( strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]') assert pkg_dict['extras']['published_via'] == '', pkg_dict loader = OnsLoader(self.testclient) res = loader.load_packages(pkg_dicts) assert res['num_errors'] == 0, res
def setup_class(self): super(TestOnsLoadSeries, self).setup_class() for filepath in [sample_filepath('4a'), sample_filepath('4b')]: importer_ = importer.OnsImporter(filepath) pkg_dicts = [pkg_dict for pkg_dict in importer_.pkg_dict()] for pkg_dict in pkg_dicts: assert pkg_dict[ 'title'] == 'Regional Labour Market Statistics', pkg_dict assert_equal( strip_organisation_id(pkg_dict['extras']['published_by']), 'Office for National Statistics [some_number]') assert '2010-08-' in pkg_dict['extras'][ 'date_released'], pkg_dict assert pkg_dict['extras']['date_updated'] == '', pkg_dict loader = OnsLoader(self.testclient) res = loader.load_packages(pkg_dicts) assert res['num_errors'] == 0, res
def test_reload(self): # Check that another package has not been created assert self.pkg_dict['name'] == self.orig_pkg_dict['name'], self.pkg_dict['name'] pkg1 = model.Package.by_name(self.orig_pkg_dict['name']) assert_equal(strip_organisation_id(pkg1.extras.get('published_by')), u'Office for National Statistics [some_number]')
def test_welsh_package(self): pkg = model.Package.by_name(u'annual_survey_of_hours_and_earnings_') assert pkg assert_equal(strip_organisation_id(pkg.extras['published_by']), 'Welsh Government [some_number]') assert len(pkg.resources) == 1, pkg.resources assert '2008 Results' in pkg.resources[0].description, pkg.resources
def test_dept_to_organisation(self): for source_agency in get_ons_producers(): publisher = DrupalHelper.department_or_agency_to_organisation(source_agency) assert publisher, source_agency publisher = strip_organisation_id(publisher) assert '[some_number]' in publisher, publisher
def test_fields(self): q = model.Session.query(model.Package) names = [pkg.name for pkg in q.all()] pkg1 = model.Package.by_name( u'uk_official_holdings_of_international_reserves') cereals = model.Package.by_name( u'cereals_and_oilseeds_production_harvest') custody = model.Package.by_name( u'end_of_custody_licence_release_and_recalls') probation = model.Package.by_name(u'probation_statistics_brief') assert pkg1, names assert cereals, names assert custody, names assert probation, names assert pkg1.title == 'UK Official Holdings of International Reserves', pkg1.title assert pkg1.notes.startswith( "Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities." ), pkg1.notes assert len(pkg1.resources) == 1, pkg1.resources assert pkg1.resources[ 0].url == 'http://www.hm-treasury.gov.uk/national_statistics.htm', pkg1.resources[ 0] assert_equal(pkg1.resources[0].description, 'December 2009') assert_equal(pkg1.resources[0].extras['hub-id'], '119-36345') assert len(custody.resources) == 2, custody.resources assert custody.resources[ 0].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[ 0] assert_equal(custody.resources[0].description, 'November 2009') assert_equal(custody.resources[0].extras['hub-id'], '119-36836') assert custody.resources[ 1].url == 'http://www.justice.gov.uk/publications/endofcustodylicence.htm', custody.resources[ 0] assert_equal(custody.resources[1].description, 'December 2009') assert_equal(custody.resources[1].extras['hub-id'], '119-36838') assert pkg1.extras['date_released'] == u'2010-01-06', pkg1.extras[ 'date_released'] assert probation.extras[ 'date_released'] == u'2010-01-04', probation.extras[ 'date_released'] assert_equal(strip_organisation_id(pkg1.extras['published_by']), u"Her Majesty's Treasury [some_number]") assert_equal( strip_organisation_id(cereals.extras['published_by']), u"Department for Environment, Food and Rural Affairs [some_number]" ) assert_equal(strip_organisation_id(custody.extras['published_by']), u"Ministry of Justice [some_number]") assert u"Source agency: HM Treasury" in pkg1.notes, pkg1.notes assert pkg1.extras['categories'] == 'Economy', pkg1.extras['category'] assert pkg1.extras[ 'geographic_coverage'] == '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)', pkg1.extras[ 'geographic_coverage'] assert pkg1.extras['national_statistic'] == 'no', pkg1.extras[ 'national_statistic'] assert cereals.extras['national_statistic'] == 'yes', cereals.extras[ 'national_statistic'] assert custody.extras['national_statistic'] == 'no', custody.extras[ 'national_statistic'] assert 'Designation: Official Statistics not designated as National Statistics' in custody.notes assert pkg1.extras[ 'geographical_granularity'] == 'UK and GB', pkg1.extras[ 'geographical_granularity'] assert 'Language: English' in pkg1.notes, pkg1.notes def check_tags(pkg, tags_list): pkg_tags = [tag.name for tag in pkg.tags] for tag in tags_list: assert tag in pkg_tags, "Couldn't find tag '%s' in tags: %s" % ( tag, pkg_tags) check_tags( pkg1, ('economics-and-finance', 'reserves', 'currency', 'assets', 'liabilities', 'gold', 'economy', 'government-receipts-and-expenditure', 'public-sector-finance')) check_tags(cereals, ('environment', 'farming')) check_tags( custody, ('public-order-justice-and-rights', 'justice-system', 'prisons')) assert 'Alternative title: UK Reserves' in pkg1.notes, pkg1.notes assert pkg1.extras['external_reference'] == u'ONSHUB', pkg1.extras[ 'external_reference'] assert 'Open Government Licence' in pkg.license.title, pkg.license.title assert pkg1.extras['update_frequency'] == u'monthly', pkg1.extras[ 'update_frequency'] assert custody.extras[ 'update_frequency'] == u'monthly', custody.extras[ 'update_frequency'] assert pkg1.author == u"Her Majesty's Treasury", pkg1.author assert cereals.author == u'Department for Environment, Food and Rural Affairs', cereals.author assert custody.author == u'Ministry of Justice', custody.author for pkg in (pkg1, cereals, custody): assert pkg.extras['import_source'].startswith( 'ONS'), '%s %s' % (pkg.name, pkg.extras['import_source'])