def test_should_download_zipped_csv(self): os = OSFS("./tests/test_integration/resources/") file_name = "test_csv_zipped" test_zip_file = 'http://localhost:8001/local_data/base_train.zip' test_ds_zip = DataSet(os, file_name, "test_id", test_zip_file, "test dataset", "zip") test_ds_zip.download() test_ds_zip.unzip_file() df = pd.read_csv(test_ds_zip.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name + "/train.csv") os.removedir(file_name) ## only download os = OSFS("./tests/test_integration/resources/") file_name = "train.csv" test_file = 'http://localhost:8001/local_data/train.csv' test_ds = DataSet(os, file_name, "test_id", test_file, "test dataset") test_ds.download() test_ds.unzip_file() df = pd.read_csv(test_ds.uri) self.assertEqual((2, 2), df.shape) os.remove(file_name)
def test_is_ftp_source(self): test_online = DataSet(OSFS("."), "/local/path", "test_id", "ftp://source/to/file", "test dataset") test_local = DataSet(OSFS("."), "/local/path", "test_id", "./source/to/file", "test dataset") self.assertTrue(test_online.is_online_source()) self.assertFalse(test_local.is_online_source())
def test_validate_is_zip(self): test_ds_zip = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") self.assertEquals(True, test_ds_zip.is_zipped()) test_ds = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset") self.assertEquals(False, test_ds.is_zipped())
def test_construct_dataset(self): test_ds_zip = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") self.assertEquals("zip", test_ds_zip.compression) test_ds = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset") self.assertEquals(None, test_ds.compression)
def test_validade_not_cached(self): os = mock.Mock() os.exists = mock.Mock(return_value=False) test_ds = DataSet(os, "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") self.assertFalse(test_ds.is_cached()) os.exists.assert_called_with("/local/path")
def test_path_to_read_in_dir(self): os = mock.Mock() os.root_path = "." os.listdir = mock.Mock(return_value=["something.json"]) os.isdir = mock.Mock(return_value=True) test_local = DataSet(os, "/local/path/test_id", "test_id", "./source/to/file", "test dataset") test_online = DataSet(os, "/local/path/test_id2", "test_id2", "http://source/to/file", "test dataset") expected_local = "././source/to/file/something.json" self.assertEqual(expected_local, test_local.uri) expected_online = "/local/path/test_id2/something.json" self.assertEqual(expected_online, test_online.uri)
def test_dont_download_if_cached(self): test_online_cached = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") test_online_cached._download = mock.Mock() test_online_cached.is_cached = mock.Mock(return_value=True) test_online_cached.download() test_online_cached._download.assert_not_called()
def test_prepare_dataset(self): os = mock.Mock() test_ds = DataSet(os, "/local/path/test_id2", "test_id2", "http://source/to/file", "test dataset") test_ds.download = mock.Mock() test_ds.unzip_file = mock.Mock() test_ds.prepare() test_ds.download.assert_called_once_with() test_ds.unzip_file.assert_called_once_with()
def test_unzip_local_data(self): os = OSFS(".") os_remove = os.remove os.remove = mock.Mock(return_value=None) os.copy("./tests/resources/local_data/base_train.zip", "./tests/resources/local_data/train.zip") test_local = DataSet(os, "/local/path", "train", "./tests/resources/local_data/train.zip", "test dataset", "zip") test_local.unzip_file() result = os.exists("./tests/resources/local_data/train/train.csv") os.remove = os_remove os.remove("./tests/resources/local_data/train/train.csv") os.remove("./tests/resources/local_data/train.zip") os.removedir("./tests/resources/local_data/train") self.assertTrue(result)
def get_datasets(self): """Returns a dict with all datasets informations. Returns: dict: The key is the identifier and the value is a dict with the configurations. The identifier is the name of the configuration file. """ datasets = self.__get_datasets() dataset = {} for k in datasets: d = datasets[k] source = d.pop("source") description = d.pop("description") compression = d.pop("compression", None) dataset[k] = DataSet(self.__fs, os.path.join(self.__local_path, k), k, source, description, compression, **d) return dataset
def test_zip_download(self): test_ds = DataSet(OSFS("."), "/local/path", "test_id", "http://source/to/file", "test dataset", "zip") test_ds._download = mock.Mock() test_ds.download() test_ds._download.assert_called_with("/local/path.zip")