Ejemplo n.º 1
0
    def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema):
        "Test CSVPandasLoader header options"

        noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
            'options']['no_header'] = True
        noaa_dataset = Dataset(noaa_jfk_schema,
                               tmp_path,
                               mode=Dataset.InitializationMode.DOWNLOAD_ONLY)
        with pytest.raises(
                ValueError
        ) as exinfo:  # Pandas should error from trying to read string as another dtype
            noaa_dataset.load()
        assert ('could not convert string to float' in str(exinfo.value))
        noaa_dataset.delete()

        false_test_cases = [False, '',
                            None]  # These should all be treated as False
        for case in false_test_cases:
            noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
                'options']['no_header'] = case
            self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)

        del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][
            'options']['no_header']
        self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)
Ejemplo n.º 2
0
    def test_csv_pandas_loader(self, tmp_path, noaa_jfk_schema):
        "Test the basic functioning of CSVPandasLoader."

        dataset = Dataset(noaa_jfk_schema,
                          tmp_path,
                          mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)
        data = dataset.data['jfk_weather_cleaned']
        assert isinstance(data, pd.DataFrame)
        assert data.shape == (75119, 16)
        dataset.delete()
Ejemplo n.º 3
0
    def test_deleting_data_dir(self, tmp_path, gmb_schema):
        "Test ``Dataset.delete()``."

        # Note we don't use tmp_sub_dir fixture because we want data_dir to be non-existing at the beginning of the
        # test.
        data_dir = tmp_path / 'data-dir'
        dataset = Dataset(gmb_schema,
                          data_dir=data_dir,
                          mode=Dataset.InitializationMode.LAZY)
        assert not data_dir.exists()  # sanity check: data_dir doesn't exist
        dataset.delete()  # no exception should be raised here
        assert not data_dir.exists()  # sanity check: data_dir doesn't exist

        dataset.download()
        # Sanity check: Files are in place
        assert dataset.is_downloaded()
        assert len(os.listdir(data_dir)) > 0
        # Delete the dir
        dataset.delete()
        assert not data_dir.exists()