def test_loadAllDatasets(self): chalearnWrapper = ChalearnWrapper(files_loc='../data/chalearn_autoML_challenge') available_datasets = chalearnWrapper.getAvailableDatasets() available_datasets = ['dorothea', 'christine', 'jasmine', 'madeline', 'philippine', 'sylvine', 'albert', 'dilbert', 'fabert', 'robert', 'volkert'] for dataset_name in available_datasets: print("loading dataset {0}".format(dataset_name)) chalearnWrapper.get_train_test_dataset(dataset_name)
class TestChalearnWrapper(TestCase): def setUp(self): self.dataset_loc = '../data/chalearn_autoML_challenge' self.adult_dataset_loc = path.join(self.dataset_loc, 'round0/adult') self.chalearnWrapper = ChalearnWrapper(self.dataset_loc) def test_getDatasetFiles(self): datasetFiles = self.chalearnWrapper.getDatasetFiles(self.adult_dataset_loc, 'adult') def test_getAvailableDatasets(self): available_datasets = self.chalearnWrapper.getAvailableDatasets() self.assertTrue('adult' in available_datasets) def test_getDataset(self): mlProblem = self.chalearnWrapper.getMLproblem('adult') self.assertTrue(mlProblem.has_key('train_data')) def test_loadDatasetPropertiesDict(self): self.chalearnWrapper.loadDatasetPropertiesDict(path.join(self.dataset_loc, 'round0/adult/adult_public.info')) def test_loadNoHeaderDataframe(self): df = self.chalearnWrapper.loadNoHeaderDataframe(path.join(self.dataset_loc, 'round0/adult/adult_train.data')) self.assertEqual(df.values.shape[0], 34190) self.assertEqual(df.values.shape[1], 24) def test_loadDataset(self): chalearnWrapper = ChalearnWrapper(files_loc='../data/chalearn_autoML_challenge') mlProblem = chalearnWrapper.getMLproblem('adult') dataLoader = TrainTestDataLoader(train=mlProblem.train_df, test=mlProblem.test_df, train_labels=mlProblem.train_labels, try_date_parse=False) dataLoader.cleanData(max_onehot_limit=200) X, X_sub, y = dataLoader.getTrainTestData() def test_loadAllDatasets(self): chalearnWrapper = ChalearnWrapper(files_loc='../data/chalearn_autoML_challenge') available_datasets = chalearnWrapper.getAvailableDatasets() available_datasets = ['dorothea', 'christine', 'jasmine', 'madeline', 'philippine', 'sylvine', 'albert', 'dilbert', 'fabert', 'robert', 'volkert'] for dataset_name in available_datasets: print("loading dataset {0}".format(dataset_name)) chalearnWrapper.get_train_test_dataset(dataset_name)