def test_should_read_yaml_from_dir(self): expected = { "one_test": { "source": "http://source/teste", "description": "my little dataset" } } data = DatasetManager("./tests/resources/one_data") self.assertDictEqual(data.get_datasets(), expected)
def test_should_print_ascii(self): self.maxDiff = None result = """+---------------------+------------+-----------------------------------------------------------------------------+ | description | identifier | source | +---------------------+------------+-----------------------------------------------------------------------------+ | my little dataset | one_test | https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv | | my little dataset 2 | two_test | https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv | +---------------------+------------+-----------------------------------------------------------------------------+""" data = DatasetManager("./tests/resources/multiple_data") printer = Printer(data.get_datasets()) self.assertEqual(result, printer.__repr__())
def test_should_create_dataset_with_custom_data(self): data = DatasetManager(self.trash_dir, fs=self.os) identifier = "data_name_custom" dataset = { "identifier": identifier, "description": "description", "source": "/tmp/test.csv" } data.create_dataset(**dataset) self.assertTrue( self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(os.listdir(self.trash_dir)), 2) loaded_dataset = data.get_datasets() self.assertEqual(list(loaded_dataset.keys()), [identifier]) datasource_configs = loaded_dataset.get(identifier) self.assertEqual(datasource_configs["description"], dataset["description"]) self.assertEqual(datasource_configs["source"], dataset["source"])
def test_should_read_multiple_yaml_from_dir(self): expected = { "one_test": { "source": "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv", "description": "my little dataset" }, "two_test": { "source": "https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv", "description": "my little dataset 2" } } data = DatasetManager("./tests/resources/multiple_data", fs=self.os) result = list(data.get_datasets().keys()) result.sort() expected = ["one_test", "two_test"] self.assertListEqual(expected, result)
def test_should_create_dataset(self): data = DatasetManager(self.trash_dir, fs=self.os) identifier = "data_name" dataset = { "identifier": identifier, "description": "description", "source": "/tmp/test.csv", } data.create_dataset(**dataset) loaded_datasets = data.get_datasets() dataset_config = loaded_datasets.get(identifier) self.assertTrue( self.os.isfile("{}/{}.yaml".format(self.trash_dir, identifier))) self.assertEqual(len(self.os.listdir(self.trash_dir)), 2) self.assertEqual(list(loaded_datasets.keys())[0], identifier) self.assertEqual(dataset_config.get("description"), dataset["description"]) self.assertEqual(dataset_config.get("source"), dataset["source"])
def test_should_print_html(self): self.maxDiff = None result = """<table> <tr> <th>description</th> <th>identifier</th> <th>source</th> </tr> <tr> <td>my little dataset</td> <td>one_test</td> <td>https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv</td> </tr> <tr> <td>my little dataset 2</td> <td>two_test</td> <td>https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv</td> </tr> </table>""" data = DatasetManager("./tests/resources/multiple_data") printer = Printer(data.get_datasets()) self.assertEqual(result, printer._repr_html_())