コード例 #1
0
    def test_setData(self):
        # Check that we can add the data into the DatasetManager object
        iris = load_iris()
        dm = DatasetManager()

        dm.setData(iris)
        self.assertEqual(dm.dataset, iris)
        self.assertEqual(dm.xlabels, None)
        self.assertEqual(dm.xlabels, None)
        self.assertEqual(dm.target_type, None)
        self.assertEqual(dm.num_classes, None)
        xlabels = 'data'
        ylabels = 'target'
        dm.setData(iris, xlabels=xlabels, ylabels=ylabels)
        self.assertEqual(dm.dataset, iris)
        self.assertEqual(dm.xlabels, xlabels)
        self.assertEqual(dm.ylabels, ylabels)
        self.assertEqual(dm.target_type, 'classification')
        self.assertEqual(dm.num_classes, 3)

        # Fail if datatypes for xlabel and ylabel are wrong
        iris = load_iris()
        try:
            xlabels = ['data']
            ylabels = 512.39
            dm = DatasetManager()
            dm.setData(dataset=iris, xlabels=xlabels, ylabels=ylabels)

            fail(self)
        except ValueError as ve:
            self.assertEqual(
                str(ve), 'ylabels argument must be string, not {}'.format(
                    type(ylabels)))

        iris = load_iris()
        try:
            xlabels = {'name': 'value'}
            ylabels = ['test1', 'test2']
            dm = DatasetManager()
            dm.setData(dataset=iris, xlabels=xlabels, ylabels=ylabels)

            fail(self)
        except ValueError as ve:
            self.assertEqual(
                str(ve),
                'xlabels argument must be string or list of strings, not {}'.
                format(type(xlabels)))
コード例 #2
0
ファイル: experiment.py プロジェクト: ibrahim85/alexandria
class Experiment:
    def __init__(self,
                 name,
                 dataset=None,
                 xlabels=None,
                 ylabels=None,
                 libs=None,
                 models=None,
                 modellibdict=None):
        if type(name) == str:
            self.name = name
        else:
            raise ValueError(
                'Experiment \'name\' argument must be string, not {}'.format(
                    str(type(name))))

        # Initialize the dataset manager object
        self.dm = DatasetManager()
        if type(dataset) != type(None):
            if type(xlabels) == type(None) or type(ylabels) == type(None):
                self.dm.setData(dataset)
            else:
                self.dm.setData(dataset=dataset,
                                xlabels=xlabels,
                                ylabels=ylabels)

        # Initialize the models object
        self.mm = ModelsManager()

        # Add the provided model information
        self.addModels(libs=libs, models=models, modellibdict=modellibdict)

    # This method only exists to allow for users to feel more comfortable with the API
    def addModel(self, lib=None, model=None, modellibdict=None):
        self.addModels(libs=lib, models=model, modellibdict=modellibdict)

    def addModels(self, modellibdict=None, libs=None, models=None):
        # We can hand this data over
        if modellibdict != None:
            self.mm.addModels(modellibdict)

        # If model values were specified, we must translate them into a dictionary
        #   for the model manager
        else:
            if models != None:
                modellibdict = self.createForModelsManager(libs=libs,
                                                           models=models)
                self.mm.addModels(modellibdict)

    def createForModelsManager(self, libs=[], models=[]):
        return_dict = dict()

        # If libs is None, then we will assume the default library on the other end
        if libs == None:
            libs = ''

        # If the libs argument is a string, then add all the models with this library
        if type(libs) == str:
            lib = libs
            return_dict[lib] = models
        elif type(libs) == list:
            if type(models) == list:
                if len(models) == len(libs):
                    for lib, model in zip(libs, models):
                        if lib in return_dict:
                            return_dict[lib].append(model)
                        else:
                            return_dict[lib] = [model]

        return return_dict

    def getName(self):
        return self.name

    def setName(self, name):
        if type(name) == str:
            self.name = name
        else:
            raise ValueError(
                'Experiment \'name\' argument must be string, not {}'.format(
                    str(type(name))))

    def getModels(self, aslist=False):
        return self.mm.getModels(aslist)

    def getNumModels(self):
        return len(self.mm.getNumModels())

    def train(self, X=None, y=None, *args, **kwargs):
        X = X
        y = y
        exp_type = ''
        if type(X) == type(None) or type(y) == type(None):
            self.dm.splitData(*args, **kwargs)
            X, y = self.dm.getXtrain(), self.dm.getytrain()
            exp_type = self.dm.target_type
        else:
            # If user provides data, we need to figure out what type of experiment it is
            exp_type = self.dm.getExperimentTypeOf(y)

        # Train the models on the provided data
        self.mm.trainModelsOnXy(X, y, exp_type)

    def trainCV(self, X=None, y=None, nfolds=-1, metrics=''):
        if type(metrics) == type(None) or metrics == '':
            raise ValueError('Metrics must be defined for cross validation!')

        X = X
        y = y
        exp_type = ''
        if type(X) == type(None) or type(y) == type(None):
            X = self.dm.getX()
            y = self.dm.gety()
            exp_type = self.dm.getTargetType()
        else:
            # If user provides data, we need to figure out what type of experiment it is
            exp_type = self.dm.getExperimentTypeOf(y)

        self.mm.trainCV(X,
                        y,
                        metrics=metrics,
                        nfolds=nfolds,
                        exp_type=exp_type)

    def predict(self, X=None):
        if type(X) != type(None):
            return self.mm.generateModelPredictions(X)
        else:
            return self.mm.generateModelPredictions(self.dm.getXtest())

    def getMetrics(self):
        return self.mm.getMetrics()

    def summarizeMetrics(self):
        # TO-DO: Make this output a lot smarter and more customizable
        print('\n' + self.name)

        metrics = self.mm.getMetrics()

        # List will hold all the rows for tabulate
        rows = []
        headers = []

        # Go through all of the metrics in the dictionary
        for model_metrics in metrics.values():
            if type(model_metrics) == dict:
                # Set up the headers
                if headers == []:
                    headers = list(model_metrics.keys())

                # Initialize the row
                row = []

                for name, value in model_metrics.items():
                    # If there is an average and standard deviation, then let's output both
                    if type(value
                            ) == dict and 'avg' in value and 'std' in value:
                        row.append('{:.4f}\u00B1{:.4f}'.format(
                            value['avg'], value['std']))
                    else:
                        row.append(value)

                rows.append(row)
        print(tabulate(rows, headers=headers))

    def compareModels_tTest(self, a, X=None, y=None):
        X = X
        y = y
        exp_type = ''
        if type(X) == type(None) or type(y) == type(None):
            X = self.dm.getX()
            y = self.dm.gety()
            exp_type = self.dm.getTargetType()
        else:
            # If user provides data, we need to figure out what type of experiment it is
            exp_type = self.dm.getExperimentTypeOf(y)

        self.mm.compareModels_tTest(X, y, exp_type=exp_type, a=a)