def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema): "Test CSVPandasLoader header options" noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] = True noaa_dataset = Dataset(noaa_jfk_schema, tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_ONLY) with pytest.raises( ValueError ) as exinfo: # Pandas should error from trying to read string as another dtype noaa_dataset.load() assert ('could not convert string to float' in str(exinfo.value)) noaa_dataset.delete() false_test_cases = [False, '', None] # These should all be treated as False for case in false_test_cases: noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] = case self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema) del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)
def test_csv_pandas_loader(self, tmp_path, noaa_jfk_schema): "Test the basic functioning of CSVPandasLoader." dataset = Dataset(noaa_jfk_schema, tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD) data = dataset.data['jfk_weather_cleaned'] assert isinstance(data, pd.DataFrame) assert data.shape == (75119, 16) dataset.delete()
def test_deleting_data_dir(self, tmp_path, gmb_schema): "Test ``Dataset.delete()``." # Note we don't use tmp_sub_dir fixture because we want data_dir to be non-existing at the beginning of the # test. data_dir = tmp_path / 'data-dir' dataset = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY) assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.delete() # no exception should be raised here assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.download() # Sanity check: Files are in place assert dataset.is_downloaded() assert len(os.listdir(data_dir)) > 0 # Delete the dir dataset.delete() assert not data_dir.exists()