Example #1
0
 def test_validade_not_cached(self):
     os = mock.Mock()
     os.exists = mock.Mock(return_value=False)
     test_ds = DataSet(os, "/local/path", "test_id",
                       "http://source/to/file", "test dataset", "zip")
     self.assertFalse(test_ds.is_cached())
     os.exists.assert_called_with("/local/path")
Example #2
0
    def test_construct_dataset(self):
        test_ds_zip = DataSet(OSFS("."), "/local/path", "test_id",
                              "http://source/to/file", "test dataset", "zip")

        self.assertEquals("zip", test_ds_zip.compression)

        test_ds = DataSet(OSFS("."), "/local/path", "test_id",
                          "http://source/to/file", "test dataset")

        self.assertEquals(None, test_ds.compression)
Example #3
0
 def test_unzip_local_data(self):
     os = OSFS(".")
     os_remove = os.remove
     os.remove = mock.Mock(return_value=None)
     os.copy("./tests/resources/local_data/base_train.zip",
             "./tests/resources/local_data/train.zip")
     test_local = DataSet(os, "/local/path", "train",
                          "./tests/resources/local_data/train.zip",
                          "test dataset", "zip")
     test_local.unzip_file()
     result = os.exists("./tests/resources/local_data/train/train.csv")
     os.remove = os_remove
     os.remove("./tests/resources/local_data/train/train.csv")
     os.remove("./tests/resources/local_data/train.zip")
     os.removedir("./tests/resources/local_data/train")
     self.assertTrue(result)
Example #4
0
    def test_path_to_read_in_dir(self):
        os = mock.Mock()
        os.root_path = "."
        os.listdir = mock.Mock(return_value=["something.json"])
        os.isdir = mock.Mock(return_value=True)

        test_local = DataSet(os, "/local/path/test_id", "test_id",
                             "./source/to/file", "test dataset")
        test_online = DataSet(os, "/local/path/test_id2", "test_id2",
                              "http://source/to/file", "test dataset")

        expected_local = "././source/to/file/something.json"
        self.assertEqual(expected_local, test_local.uri)

        expected_online = "/local/path/test_id2/something.json"
        self.assertEqual(expected_online, test_online.uri)
Example #5
0
 def test_is_ftp_source(self):
     test_online = DataSet(OSFS("."), "/local/path", "test_id",
                           "ftp://source/to/file", "test dataset")
     test_local = DataSet(OSFS("."), "/local/path", "test_id",
                          "./source/to/file", "test dataset")
     self.assertTrue(test_online.is_online_source())
     self.assertFalse(test_local.is_online_source())
Example #6
0
 def test_dont_download_if_cached(self):
     test_online_cached = DataSet(OSFS("."), "/local/path", "test_id",
                                  "http://source/to/file", "test dataset",
                                  "zip")
     test_online_cached._download = mock.Mock()
     test_online_cached.is_cached = mock.Mock(return_value=True)
     test_online_cached.download()
     test_online_cached._download.assert_not_called()
Example #7
0
    def test_validate_is_zip(self):
        test_ds_zip = DataSet(OSFS("."), "/local/path", "test_id",
                              "http://source/to/file", "test dataset", "zip")
        self.assertEquals(True, test_ds_zip.is_zipped())

        test_ds = DataSet(OSFS("."), "/local/path", "test_id",
                          "http://source/to/file", "test dataset")
        self.assertEquals(False, test_ds.is_zipped())
Example #8
0
 def test_prepare_dataset(self):
     os = mock.Mock()
     test_ds = DataSet(os, "/local/path/test_id2", "test_id2",
                       "http://source/to/file", "test dataset")
     test_ds.download = mock.Mock()
     test_ds.unzip_file = mock.Mock()
     test_ds.prepare()
     test_ds.download.assert_called_once_with()
     test_ds.unzip_file.assert_called_once_with()
Example #9
0
    def get_datasets(self):
        """Returns a dict with all datasets informations.

        Returns:
            dict: The key is the identifier and the value is a dict
            with the configurations. The identifier is the name of the
            configuration file.
        """

        datasets = self.__get_datasets()
        dataset = {}
        for k in datasets:
            d = datasets[k]
            source = d.pop("source")
            description = d.pop("description")
            compression = d.pop("compression", None)
            dataset[k] = DataSet(self.__fs, os.path.join(self.__local_path, k),
                                 k, source, description, compression, **d)
        return dataset
Example #10
0
 def test_zip_download(self):
     test_ds = DataSet(OSFS("."), "/local/path", "test_id",
                       "http://source/to/file", "test dataset", "zip")
     test_ds._download = mock.Mock()
     test_ds.download()
     test_ds._download.assert_called_with("/local/path.zip")
Example #11
0
    def test_should_download_zipped_csv(self):
        os = OSFS("./tests/test_integration/resources/")
        file_name = "test_csv_zipped"
        test_zip_file = 'http://localhost:8001/local_data/base_train.zip'

        test_ds_zip = DataSet(os, file_name, "test_id", test_zip_file,
                              "test dataset", "zip")
        test_ds_zip.download()
        test_ds_zip.unzip_file()
        df = pd.read_csv(test_ds_zip.uri)
        self.assertEqual((2, 2), df.shape)
        os.remove(file_name + "/train.csv")
        os.removedir(file_name)

        ## only download
        os = OSFS("./tests/test_integration/resources/")
        file_name = "train.csv"
        test_file = 'http://localhost:8001/local_data/train.csv'

        test_ds = DataSet(os, file_name, "test_id", test_file, "test dataset")
        test_ds.download()
        test_ds.unzip_file()
        df = pd.read_csv(test_ds.uri)
        self.assertEqual((2, 2), df.shape)
        os.remove(file_name)