def test_rossmannDataLoads(self):
    t0 = time()
    dataLoader = TrainTestDataLoader('../data/rossmann/train_100.csv', '../data/rossmann/test_100.csv', train_labels_column='Sales', test_ids_column='Id')
    dataLoader.cleanData(max_onehot_limit=200)
    X, X_sub, y = dataLoader.getTrainTestData()

    print('completed in {0} seconds!'.format(time()-t0))
 def test_loadDataset(self):
   chalearnWrapper = ChalearnWrapper(files_loc='../data/chalearn_autoML_challenge')
   mlProblem = chalearnWrapper.getMLproblem('adult')
   dataLoader = TrainTestDataLoader(train=mlProblem.train_df, test=mlProblem.test_df, train_labels=mlProblem.train_labels, try_date_parse=False)
   dataLoader.cleanData(max_onehot_limit=200)
   X, X_sub, y = dataLoader.getTrainTestData()
 def test_featureExtractionFromActualDataset(self):
   dataLoader = TrainTestDataLoader('../data/rossmann/train_100.csv', '../data/rossmann/test_100.csv', train_labels_column='Sales', test_ids_column='Id')
   dataLoader.cleanData(max_onehot_limit=200)
   X, X_sub, y = dataLoader.getTrainTestData()
   featureSelection = FeatureSelection(lower_is_better=True, method='all', X=X, y=y, clf=LogisticRegressionCV(), problem_type='classification')