def test_getTargetType(self): # Check that it works as expected iris = load_iris() dm = DatasetManager(dataset=iris, xlabels='data', ylabels='target') expected = 'classification' actual = dm.getTargetType() self.assertEqual(actual, expected) bc = load_breast_cancer(as_frame=True) bc = bc.frame data_cols = bc.columns[:-1] target_col = 'target' dm = DatasetManager(dataset=bc, xlabels=data_cols, ylabels=target_col) expected = 'classification' actual = dm.getTargetType() self.assertEqual(actual, expected) wine = load_wine() dm = DatasetManager(dataset=wine, xlabels='data', ylabels='target') expected = 'classification' actual = dm.getTargetType() self.assertEqual(actual, expected) # Return None if the target is regression, not classification diabetes = load_diabetes(as_frame=True) diabetes = diabetes.frame data_cols = diabetes.columns[:-1] target_col = 'target' dm = DatasetManager(dataset=diabetes, xlabels=data_cols, ylabels=target_col) expected = 'regression' actual = dm.getTargetType() self.assertEqual(actual, expected) boston = load_boston() dm = DatasetManager(dataset=boston, xlabels='data', ylabels='target') expected = 'regression' actual = dm.getTargetType() self.assertEqual(actual, expected)
class Experiment: def __init__(self, name, dataset=None, xlabels=None, ylabels=None, libs=None, models=None, modellibdict=None): if type(name) == str: self.name = name else: raise ValueError( 'Experiment \'name\' argument must be string, not {}'.format( str(type(name)))) # Initialize the dataset manager object self.dm = DatasetManager() if type(dataset) != type(None): if type(xlabels) == type(None) or type(ylabels) == type(None): self.dm.setData(dataset) else: self.dm.setData(dataset=dataset, xlabels=xlabels, ylabels=ylabels) # Initialize the models object self.mm = ModelsManager() # Add the provided model information self.addModels(libs=libs, models=models, modellibdict=modellibdict) # This method only exists to allow for users to feel more comfortable with the API def addModel(self, lib=None, model=None, modellibdict=None): self.addModels(libs=lib, models=model, modellibdict=modellibdict) def addModels(self, modellibdict=None, libs=None, models=None): # We can hand this data over if modellibdict != None: self.mm.addModels(modellibdict) # If model values were specified, we must translate them into a dictionary # for the model manager else: if models != None: modellibdict = self.createForModelsManager(libs=libs, models=models) self.mm.addModels(modellibdict) def createForModelsManager(self, libs=[], models=[]): return_dict = dict() # If libs is None, then we will assume the default library on the other end if libs == None: libs = '' # If the libs argument is a string, then add all the models with this library if type(libs) == str: lib = libs return_dict[lib] = models elif type(libs) == list: if type(models) == list: if len(models) == len(libs): for lib, model in zip(libs, models): if lib in return_dict: return_dict[lib].append(model) else: return_dict[lib] = [model] return return_dict def getName(self): return self.name def setName(self, name): if type(name) == str: self.name = name else: raise ValueError( 'Experiment \'name\' argument must be string, not {}'.format( str(type(name)))) def getModels(self, aslist=False): return self.mm.getModels(aslist) def getNumModels(self): return len(self.mm.getNumModels()) def train(self, X=None, y=None, *args, **kwargs): X = X y = y exp_type = '' if type(X) == type(None) or type(y) == type(None): self.dm.splitData(*args, **kwargs) X, y = self.dm.getXtrain(), self.dm.getytrain() exp_type = self.dm.target_type else: # If user provides data, we need to figure out what type of experiment it is exp_type = self.dm.getExperimentTypeOf(y) # Train the models on the provided data self.mm.trainModelsOnXy(X, y, exp_type) def trainCV(self, X=None, y=None, nfolds=-1, metrics=''): if type(metrics) == type(None) or metrics == '': raise ValueError('Metrics must be defined for cross validation!') X = X y = y exp_type = '' if type(X) == type(None) or type(y) == type(None): X = self.dm.getX() y = self.dm.gety() exp_type = self.dm.getTargetType() else: # If user provides data, we need to figure out what type of experiment it is exp_type = self.dm.getExperimentTypeOf(y) self.mm.trainCV(X, y, metrics=metrics, nfolds=nfolds, exp_type=exp_type) def predict(self, X=None): if type(X) != type(None): return self.mm.generateModelPredictions(X) else: return self.mm.generateModelPredictions(self.dm.getXtest()) def getMetrics(self): return self.mm.getMetrics() def summarizeMetrics(self): # TO-DO: Make this output a lot smarter and more customizable print('\n' + self.name) metrics = self.mm.getMetrics() # List will hold all the rows for tabulate rows = [] headers = [] # Go through all of the metrics in the dictionary for model_metrics in metrics.values(): if type(model_metrics) == dict: # Set up the headers if headers == []: headers = list(model_metrics.keys()) # Initialize the row row = [] for name, value in model_metrics.items(): # If there is an average and standard deviation, then let's output both if type(value ) == dict and 'avg' in value and 'std' in value: row.append('{:.4f}\u00B1{:.4f}'.format( value['avg'], value['std'])) else: row.append(value) rows.append(row) print(tabulate(rows, headers=headers)) def compareModels_tTest(self, a, X=None, y=None): X = X y = y exp_type = '' if type(X) == type(None) or type(y) == type(None): X = self.dm.getX() y = self.dm.gety() exp_type = self.dm.getTargetType() else: # If user provides data, we need to figure out what type of experiment it is exp_type = self.dm.getExperimentTypeOf(y) self.mm.compareModels_tTest(X, y, exp_type=exp_type, a=a)