def test_should_download_zipped_csv(self): os = OSFS("./tests/test_integration/resources/") file_name = "test_csv_zipped" test_zip_file = 'http://localhost:8001/local_data/base_train.zip' test_ds_zip = DataSet(os, file_name, "test_id", test_zip_file, "test dataset", "zip") test_ds_zip.download() test_ds_zip.unzip_file() df = pd.read_csv(test_ds_zip.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name + "/train.csv") os.removedir(file_name) ## only download os = OSFS("./tests/test_integration/resources/") file_name = "train.csv" test_file = 'http://localhost:8001/local_data/train.csv' test_ds = DataSet(os, file_name, "test_id", test_file, "test dataset") test_ds.download() test_ds.unzip_file() df = pd.read_csv(test_ds.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name)
def test_dont_download_if_cached(self): test_online_cached = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") test_online_cached._download = mock.Mock() test_online_cached.is_cached = mock.Mock(return_value=True) test_online_cached.download() test_online_cached._download.assert_not_called()
def test_prepare_dataset(self): os = mock.Mock() test_ds = DataSet(os, "/local/path/test_id2", "test_id2", "http://source/to/file", "test dataset") test_ds.download = mock.Mock() test_ds.unzip_file = mock.Mock() test_ds.prepare() test_ds.download.assert_called_once_with() test_ds.unzip_file.assert_called_once_with()
def test_zip_download(self): test_ds = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") test_ds._download = mock.Mock() test_ds.download() test_ds._download.assert_called_with("/local/path.zip")