def test_rossmannDataLoads(self): t0 = time() dataLoader = TrainTestDataLoader('../data/rossmann/train_100.csv', '../data/rossmann/test_100.csv', train_labels_column='Sales', test_ids_column='Id') dataLoader.cleanData(max_onehot_limit=200) X, X_sub, y = dataLoader.getTrainTestData() print('completed in {0} seconds!'.format(time()-t0))
def test_loadDataset(self): chalearnWrapper = ChalearnWrapper(files_loc='../data/chalearn_autoML_challenge') mlProblem = chalearnWrapper.getMLproblem('adult') dataLoader = TrainTestDataLoader(train=mlProblem.train_df, test=mlProblem.test_df, train_labels=mlProblem.train_labels, try_date_parse=False) dataLoader.cleanData(max_onehot_limit=200) X, X_sub, y = dataLoader.getTrainTestData()
def test_featureExtractionFromActualDataset(self): dataLoader = TrainTestDataLoader('../data/rossmann/train_100.csv', '../data/rossmann/test_100.csv', train_labels_column='Sales', test_ids_column='Id') dataLoader.cleanData(max_onehot_limit=200) X, X_sub, y = dataLoader.getTrainTestData() featureSelection = FeatureSelection(lower_is_better=True, method='all', X=X, y=y, clf=LogisticRegressionCV(), problem_type='classification')