def test_record_2_package(self): record = OrderedDict([ (u'title', u'UK Official Holdings of International Reserves - December 2009'), (u'link', u'http://www.hm-treasury.gov.uk/national_statistics.htm'), (u'description', u"Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities."), (u'pubDate', u'Wed, 06 Jan 2010 09:30:00 GMT'), (u'guid', u'http://www.statistics.gov.uk/hub/id/119-36345'), (u'hub:source-agency', u'HM Treasury'), (u'hub:theme', u'Economy'), (u'hub:coverage', u'UK'), (u'hub:designation', u''), (u'hub:geographic-breakdown', u'UK and GB'), (u'hub:language', u'English'), (u'hub:ipsv', u'Economics and finance'), (u'hub:keywords', u'reserves;currency;assets;liabilities;gold;reserves;currency;assets;liabilities;gold'), (u'hub:altTitle', u'UK Reserves'), (u'hub:nscl', u'Economy;Government Receipts and Expenditure;Public Sector Finance;Economy;Government Receipts and Expenditure;Public Sector Finance')]) ons_importer_ = importer.OnsImporter(filepaths=SAMPLE_FILEPATH_1) package_dict = ons_importer_.record_2_package(record) expected_package_dict = OrderedDict([ ('name', u'uk_official_holdings_of_international_reserves'), ('title', u'UK Official Holdings of International Reserves'), ('version', None), ('url', None), ('author', u"Her Majesty's Treasury"), ('author_email', None), ('maintainer', None), ('maintainer_email', None), ('notes', u"Monthly breakdown for government's net reserves, detailing gross reserves and gross liabilities.\n\nSource agency: HM Treasury\n\nLanguage: English\n\nAlternative title: UK Reserves"), ('license_id', u'uk-ogl'), ('tags', [u'assets', u'currency', u'economics-and-finance', u'economy', u'gold', u'government-receipts-and-expenditure', u'liabilities', u'public-sector-finance', u'reserves']), ('groups', []), ('resources', [OrderedDict([ ('url', u'http://www.hm-treasury.gov.uk/national_statistics.htm'), ('description', u'December 2009'), ('hub-id', u'119-36345'), ])]), ('extras', OrderedDict([ ('geographic_coverage', u'111100: United Kingdom (England, Scotland, Wales, Northern Ireland)'), ('geographical_granularity', u'UK and GB'), ('external_reference', u'ONSHUB'), ('temporal_granularity', u''), ('date_updated', u''), ('precision', u''), ('temporal_coverage-to', u''), ('temporal_coverage-from', u''), ('national_statistic', 'no'), ('update_frequency', 'monthly'), ('import_source', 'ONS-ons_hub_sample.xml'), ('date_released', '2010-01-06'), ('categories', u'Economy'), ('series', u'UK Official Holdings of International Reserves'), ('published_by', u"Her Majesty's Treasury [some_number]"), ('published_via', u''), ])), ]) for extra_key in ('published_by', 'published_via'): package_dict['extras'][extra_key] = strip_organisation_id(package_dict['extras'][extra_key]) PackageDictUtil.check_dict(package_dict, expected_package_dict)
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(cps_record) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id(pkg_dict['extras'][key]) print(pkg_dict) PackageDictUtil.check_dict(pkg_dict, cps_pkg_dict) expected_keys = set([key for key, value in cps_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(cps_record) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id( pkg_dict['extras'][key]) print(pkg_dict) PackageDictUtil.check_dict(pkg_dict, cps_pkg_dict) expected_keys = set([key for key, value in cps_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(example_record) log = self.importer.get_log() assert_equal(len(log), 0, log) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id(pkg_dict['extras'][key]) PackageDictUtil.check_dict(pkg_dict, example_pkg_dict) expected_keys = set([key for key, value in example_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def test_4_record_2_package(self): self.importer.clear_log() pkg_dict = self.importer.record_2_package(example_record) log = self.importer.get_log() assert_equal(len(log), 0, log) for key in ('published_by', 'published_via'): pkg_dict['extras'][key] = strip_organisation_id( pkg_dict['extras'][key]) PackageDictUtil.check_dict(pkg_dict, example_pkg_dict) expected_keys = set([key for key, value in example_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference
def assert_pkg_filtered(package_name, pkg_dict): pkg = model.Package.by_name(unicode(package_name)) expected_pkg = copy.deepcopy(pkg_dict) expected_pkg['extras']['national_statistic'] = 'no' PackageDictUtil.assert_subset(pkg.as_dict(), expected_pkg)
def assert_pkg_stayed_the_same(package_name, pkg_dict): pkg = model.Package.by_name(unicode(package_name)) PackageDictUtil.assert_subset(pkg.as_dict(), pkg_dict)
def test_1_row_2_package(self): row_dict = OrderedDict([ (u'Dataset Ref#', u'BIS-000002'), (u'Dataset Status', u'Proposed'), (u'Agency', u'Higher Education Statistics Agency'), (u'Primary Contact', u'*****@*****.**'), (u'Secondary Contact', None), (u'Title', u'Higher Education Statistics: All HE students by level of study, mode of study, subject of study, domicile and gender 2007/08'), (u'Abstract', u'This dataset provides the 2007/08 higher education statistics for all students by level of study, mode of study, subject of study, domicile and gender'), (u'Date Released', 2008), (u'Date Updated', 2008), (u'Update Frequency', u'Never'), (u'Tags', u'hesa higher-education-statistics 2007-2008'), (u'Department', u'Department for Business, Innovation & Skills'), (u'Wiki', u'-'), (u'Identifier', u'higher-education-statistics-all-he-students-by-level-of-study-mode-of-study-subject-of-study-meeeeeeeeeeeeeeeeeeeeeeeeeeeega-long-name-2007-2008'), (u'Licence', u'Higher Education Statistcs Agency Copyright with data.gov.uk rights'), (u'Version', u'-'), (u'Geographic Coverage', u'United Kingdom (England, Scotland, Wales, Northern Ireland)'), (u'Geographic Granularity', u'national'), (u'Temporal Granularity', u'years'), (u'Precision', u'integer to the nearest 5'), (u'Taxonomy url', u'-'), (u'Temporal Coverage From', datetime.date(2007, 8, 1)), (u'Temporal Coverage To', datetime.date(2008, 7, 31)), (u'National Statistic', u'no'), (u'Categories', u'-'), (u'Contact information', u'Higher Education Statistics Agency (HESA)\n+44 (0) 1242 211133\[email protected]'), (u'Data File', 1), (u'Reference Material', 2), (u'Information', 0), (u'Full Description', 0), (u'Unknown', 0), (u'Total', 3) ]) expected_pkg_dict = OrderedDict([ ('name', u'hesa-all-he-students-by-level-of-study-mode-of-study-subject-of-study-meeeeeeeeeeeeee-2007-2008'), ('title', u'Higher Education Statistics: All HE students by level of study, mode of study, subject of study, domicile and gender 2007/08'), ('version', u'-'), ('url', None), ('author', u'Higher Education Statistics Agency (HESA)'), ('author_email', u'*****@*****.**'), ('maintainer', u''), ('maintainer_email', u''), ('notes', u'This dataset provides the 2007/08 higher education statistics for all students by level of study, mode of study, subject of study, domicile and gender'), ('license_id', u'hesa-withrights'), ('tags', [u'2007-2008', u'education', u'hesa', \ u'higher-education', u'higher-education-statistics']), ('groups', ['ukgov']), ('resources', [OrderedDict([ ('url', 'http://www.hesa.ac.uk/dox/dataTables/studentsAndQualifiers/download/subject0708.xls?v=1.0'), ('format', 'XLS'), ('description', 'Data File - XLS Format'), ]), OrderedDict([ ('url', 'http://www.hesa.ac.uk/index.php/component/option,com_datatables/task,show_file/defs,1/Itemid,121/catdex,3/disp,/dld,subject0708.xls/yrStr,2007+to+2008/dfile,studefs0708.htm/area,subject/mx,0/'), ('format', 'HTML'), ('description', 'Reference Material - Data File Definition'), ]), OrderedDict([ ('url', 'http://www.hesa.ac.uk/index.php/component/option,com_datatables/task,show_file/defs,2/Itemid,121/catdex,3/disp,/dld,subject0708.xls/yrStr,2007+to+2008/dfile,notes0708.htm/area,subject/mx,0/'), ('format', 'HTML'), ('description', 'Reference Material - Notes Regarding Data File Content'), ]), ]), ('extras', OrderedDict([ ('external_reference', 'BIS-000002'), ('date_released', '2008'), ('date_updated', '2008'), ('temporal_granularity', 'years'), ('temporal_coverage_to', '2008-07-31'), ('temporal_coverage_from', '2007-08-01'), ('geographic_coverage', '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)'), ('geographical_granularity', 'national'), ('agency', u'Higher Education Statistics Agency'), ('precision', 'integer to the nearest 5'), ('taxonomy_url', '-'), ('import_source', 'BIS-%s' % os.path.basename(self._filepath)), ('department', u'Department for Business, Innovation and Skills'), ('update_frequency', 'Never'), ('national_statistic', 'no'), ('categories', '-'), ]) ), ]) pkg_dict = self.importer.row_2_package(row_dict) log = self.importer.get_log() assert not log, log PackageDictUtil.check_dict(pkg_dict, expected_pkg_dict) expected_keys = set([key for key, value in expected_pkg_dict.items()]) keys = set(pkg_dict.keys()) key_difference = expected_keys - keys assert not key_difference, key_difference