Ejemplo n.º 1
0
 def setup_class(self):
     self._filepath = BIS_1_FILEBASE + XL_EXTENSION
     self.importer = BisImporter(filepath=self._filepath)
     self.pkg_dicts = [pkg_dict for pkg_dict in self.importer.pkg_dict()]
Ejemplo n.º 2
0
class TestImport:
    @classmethod
    def setup_class(self):
        self._filepath = BIS_1_FILEBASE + XL_EXTENSION
        self.importer = BisImporter(filepath=self._filepath)
        self.pkg_dicts = [pkg_dict for pkg_dict in self.importer.pkg_dict()]

    @classmethod
    def teardown_class(cls):
        model.repo.rebuild_db()
        
    def test_0_munge(self):
        def test_munge(name, expected_munge):
            munge = self.importer.name_munge(name)
            assert munge == expected_munge, 'Got %s not %s' % (munge, expected_munge)
        test_munge('hesa-first-year-uk-domiciled-he-students-by-qualification-aim-mode-of-study-gender-and-disability-1994-1995', 'hesa-first-year-uk-domiciled-he-students-disability-1994-1995')


    def test_1_row_2_package(self):
        row_dict = OrderedDict([
            (u'Dataset Ref#', u'BIS-000002'),
            (u'Dataset Status', u'Proposed'),
            (u'Agency', u'Higher Education Statistics Agency'),
            (u'Primary Contact', u'*****@*****.**'),
            (u'Secondary Contact', None),
            (u'Title', u'Higher Education Statistics: All HE students by level of study, mode of study, subject of study, domicile and gender 2007/08'),
            (u'Abstract', u'This dataset provides the 2007/08 higher education statistics for all students by level of study, mode of study, subject of study, domicile and gender'),
            (u'Date Released', 2008),
            (u'Date Updated', 2008),
            (u'Update Frequency', u'Never'),
            (u'Tags', u'hesa higher-education-statistics 2007-2008'),
            (u'Department', u'Department for Business, Innovation & Skills'),
            (u'Wiki', u'-'),
            (u'Identifier', u'higher-education-statistics-all-he-students-by-level-of-study-mode-of-study-subject-of-study-meeeeeeeeeeeeeeeeeeeeeeeeeeeega-long-name-2007-2008'),
            (u'Licence', u'Higher Education Statistcs Agency Copyright with data.gov.uk rights'),
            (u'Version', u'-'),
            (u'Geographic Coverage', u'United Kingdom (England, Scotland, Wales, Northern Ireland)'),
            (u'Geographic Granularity', u'national'),
            (u'Temporal Granularity', u'years'),
            (u'Precision', u'integer to the nearest 5'),
            (u'Taxonomy url', u'-'),
            (u'Temporal Coverage From', datetime.date(2007, 8, 1)),
            (u'Temporal Coverage To', datetime.date(2008, 7, 31)),
            (u'National Statistic', u'no'),
            (u'Categories', u'-'),
            (u'Contact information', u'Higher Education Statistics Agency (HESA)\n+44 (0) 1242 211133\[email protected]'),
            (u'Data File', 1),
            (u'Reference Material', 2),
            (u'Information', 0),
            (u'Full Description', 0),
            (u'Unknown', 0),
            (u'Total', 3)
            ])
        expected_pkg_dict = OrderedDict([
            ('name', u'hesa-all-he-students-by-level-of-study-mode-of-study-subject-of-study-meeeeeeeeeeeeee-2007-2008'),
            ('title', u'Higher Education Statistics: All HE students by level of study, mode of study, subject of study, domicile and gender 2007/08'),
            ('version', u'-'),
            ('url', None),
            ('author', u'Higher Education Statistics Agency (HESA)'),
            ('author_email', u'*****@*****.**'),
            ('maintainer', u''),
            ('maintainer_email', u''),
            ('notes', u'This dataset provides the 2007/08 higher education statistics for all students by level of study, mode of study, subject of study, domicile and gender'),
            ('license_id', u'hesa-withrights'),
            ('tags', [u'2007-2008', u'education', u'hesa', \
                      u'higher-education', u'higher-education-statistics']),
            ('groups', ['ukgov']),
            ('resources', [OrderedDict([
                ('url', 'http://www.hesa.ac.uk/dox/dataTables/studentsAndQualifiers/download/subject0708.xls?v=1.0'),
                ('format', 'XLS'),
                ('description', 'Data File - XLS Format'),
                ]),
                           OrderedDict([
                               ('url', 'http://www.hesa.ac.uk/index.php/component/option,com_datatables/task,show_file/defs,1/Itemid,121/catdex,3/disp,/dld,subject0708.xls/yrStr,2007+to+2008/dfile,studefs0708.htm/area,subject/mx,0/'),
                               ('format', 'HTML'),
                               ('description', 'Reference Material - Data File Definition'),
                               ]),
                           OrderedDict([
                               ('url', 'http://www.hesa.ac.uk/index.php/component/option,com_datatables/task,show_file/defs,2/Itemid,121/catdex,3/disp,/dld,subject0708.xls/yrStr,2007+to+2008/dfile,notes0708.htm/area,subject/mx,0/'),
                               ('format', 'HTML'),
                               ('description', 'Reference Material - Notes Regarding Data File Content'),
                               ]),
                           ]),
            ('extras', OrderedDict([
                ('external_reference', 'BIS-000002'),
                ('date_released', '2008'),
                ('date_updated', '2008'),
                ('temporal_granularity', 'years'),
                ('temporal_coverage_to', '2008-07-31'),
                ('temporal_coverage_from', '2007-08-01'),
                ('geographic_coverage', '111100: United Kingdom (England, Scotland, Wales, Northern Ireland)'),
                ('geographical_granularity', 'national'),
                ('agency', u'Higher Education Statistics Agency'),
                ('precision', 'integer to the nearest 5'),
                ('taxonomy_url', '-'),
                ('import_source', 'BIS-%s' % os.path.basename(self._filepath)),
                ('department', u'Department for Business, Innovation and Skills'),
                ('update_frequency', 'Never'),
                ('national_statistic', 'no'),
                ('categories', '-'),
                ])
             ),
            ])
        pkg_dict = self.importer.row_2_package(row_dict)

        log = self.importer.get_log()
        assert not log, log

        PackageDictUtil.check_dict(pkg_dict, expected_pkg_dict)
        expected_keys = set([key for key, value in expected_pkg_dict.items()])
        keys = set(pkg_dict.keys())
        key_difference = expected_keys - keys
        assert not key_difference, key_difference

    @classmethod
    def check_dict(cls, dict_to_check, expected_dict):
        for key, value in expected_dict.items():
            if key == 'extras':
                cls.check_dict(dict_to_check['extras'], value)
            else:
                if value:
                    assert dict_to_check[key] == value, 'Key \'%s\' should be %r not: %r' % (key, value, dict_to_check[key])
                else:
                    assert not dict_to_check.get(key), 'Key \'%s\' should have no value, not: %s' % (key, dict_to_check[key])