Example #1
0
    def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema):
        "Test CSVPandasLoader header options"

        noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
            'options']['no_header'] = True
        noaa_dataset = Dataset(noaa_jfk_schema,
                               tmp_path,
                               mode=Dataset.InitializationMode.DOWNLOAD_ONLY)
        with pytest.raises(
                ValueError
        ) as exinfo:  # Pandas should error from trying to read string as another dtype
            noaa_dataset.load()
        assert ('could not convert string to float' in str(exinfo.value))
        noaa_dataset.delete()

        false_test_cases = [False, '',
                            None]  # These should all be treated as False
        for case in false_test_cases:
            noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
                'options']['no_header'] = case
            self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)

        del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
            'options']['no_header']
        self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)
Example #2
0
    def test_loading_undownloaded(self, tmp_path, gmb_schema):
        "Test loading before ``Dataset.download()`` has been called."

        dataset = Dataset(gmb_schema,
                          data_dir=tmp_path,
                          mode=Dataset.InitializationMode.LAZY)

        with pytest.raises(FileNotFoundError) as e:
            dataset.load(check=False)
        assert (
            'Failed to load subdataset "gmb_subset_full" because some files are not found. '
            'Did you forget to call Dataset.download()?\nCaused by:\n') in str(
                e.value)

        # Half-loaded data objects should get reset to None
        assert dataset._data is None
        with pytest.raises(RuntimeError) as e:
            dataset.data
        assert str(e.value) == (
            'Data has not been downloaded and/or loaded yet. Call Dataset.download() to download '
            'data, call Dataset.load() to load data.')

        # Force check undownloaded dataset should error
        with pytest.raises(RuntimeError) as e:
            dataset.load(check=True)
        assert str(e.value) == (
            f'Downloaded data files are not present in {dataset._data_dir_} or are corrupted.'
        )