def execute(self, experiment: Experiment) -> ExperimentPerformance:
        """
        Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the
        given parameters.

        PARAMETERS
        ----------
        experiment : Experiment
            Experiment to be run.

        RETURNS
        -------
        ExperimentPerformance
            An ExperimentPerformance instance.
        """
        result = ExperimentPerformance()
        instanceList = experiment.getDataSet().getInstanceList()
        partition = Partition(instanceList, 0.25,
                              experiment.getParameter().getSeed(), True)
        crossValidation = KFoldCrossValidation(
            partition.get(1).getInstances(), self.K,
            experiment.getParameter().getSeed())
        self.runExperiment(experiment.getClassifier(),
                           experiment.getParameter(), result, crossValidation,
                           partition.get(0))
        return result
Esempio n. 2
0
    def train(self, trainSet: InstanceList, parameters: Parameter):
        """
        Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin,
        2015).

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : Parameter
            Parameter of the Lda algorithm.
        """
        w0 = {}
        w = {}
        priorDistribution = trainSet.classDistribution()
        classLists = Partition(trainSet)
        covariance = Matrix(trainSet.get(0).continuousAttributeSize(), trainSet.get(0).continuousAttributeSize())
        for i in range(classLists.size()):
            averageVector = Vector(classLists.get(i).continuousAverage())
            classCovariance = classLists.get(i).covariance(averageVector)
            classCovariance.multiplyWithConstant(classLists.get(i).size() - 1)
            covariance.add(classCovariance)
        covariance.divideByConstant(trainSet.size() - classLists.size())
        covariance.inverse()
        for i in range(classLists.size()):
            Ci = classLists.get(i).getClassLabel()
            averageVector = Vector(classLists.get(i).continuousAverage())
            wi = covariance.multiplyWithVectorFromRight(averageVector)
            w[Ci] = wi
            w0i = -0.5 * wi.dotProduct(averageVector) + math.log(priorDistribution.getProbability(Ci))
            w0[Ci] = w0i
        self.model = LdaModel(priorDistribution, w, w0)
Esempio n. 3
0
 def train(self, trainSet: InstanceList, parameters: KMeansParameter):
     priorDistribution = trainSet.classDistribution()
     classMeans = InstanceList()
     classLists = Partition(trainSet)
     for i in range(classLists.size()):
         classMeans.add(classLists.get(i).average())
     self.model = KMeansModel(priorDistribution, classMeans,
                              parameters.getDistanceMetric())
    def train(self, trainSet: InstanceList, parameters: DeepNetworkParameter):
        """
        Training algorithm for deep network classifier.

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : DeepNetworkParameter
            Parameters of the deep network algorithm. crossValidationRatio and seed are used as parameters.
        """
        partition = Partition(trainSet, parameters.getCrossValidationRatio(), parameters.getSeed(), True)
        self.model = DeepNetworkModel(partition.get(1), partition.get(0), parameters)
Esempio n. 5
0
 def execute(self, experiment: Experiment) -> ExperimentPerformance:
     result = ExperimentPerformance()
     for j in range(self.M):
         instanceList = experiment.getDataSet().getInstanceList()
         partition = Partition(instanceList, 0.25,
                               experiment.getParameter().getSeed(), True)
         crossValidation = StratifiedKFoldCrossValidation(
             Partition(partition.get(1)).getLists(), self.K,
             experiment.getParameter().getSeed())
         self.runExperiment(experiment.getClassifier(),
                            experiment.getParameter(), result,
                            crossValidation, partition.get(0))
     return result
Esempio n. 6
0
    def train(self, trainSet: InstanceList, parameters: MultiLayerPerceptronParameter):
        """
        Training algorithm for the multilayer perceptron algorithm. 20 percent of the data is separated as
        cross-validation data used for selecting the best weights. 80 percent of the data is used for training the
        multilayer perceptron with gradient descent.

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm
        parameters : MultiLayerPerceptronParameter
            Parameters of the multilayer perceptron.
        """
        partition = Partition(trainSet, parameters.getCrossValidationRatio(), parameters.getSeed(), True)
        self.model = MultiLayerPerceptronModel(partition.get(1), partition.get(0), parameters)
Esempio n. 7
0
    def train(self, trainSet: InstanceList,
              parameters: MultiLayerPerceptronParameter):
        """
        Training algorithm for auto encoders. An auto encoder is a neural network which attempts to replicate its input
        at its output.

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : MultiLayerPerceptronParameter
            Parameters of the auto encoder.
        """
        partition = Partition(trainSet, 0.2, parameters.getSeed(), True)
        self.model = AutoEncoderModel(partition.get(1), partition.get(0),
                                      parameters)
    def getClassInstances(self) -> list:
        """
        Returns instances of the items at the list of instance lists from the partitions.

        RETURNS
        -------
        list
            Instances of the items at the list of instance lists from the partitions.
        """
        return Partition(self.__instances).getLists()
Esempio n. 9
0
    def train(self, trainSet: InstanceList, parameters: C45Parameter):
        """
        Training algorithm for C4.5 univariate decision tree classifier. 20 percent of the data are left aside for
        pruning 80 percent of the data is used for constructing the tree.

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters: C45Parameter
            Parameter of the C45 algorithm.
        """
        if parameters.isPrune():
            partition = Partition(trainSet,
                                  parameters.getCrossValidationRatio(),
                                  parameters.getSeed(), True)
            tree = DecisionTree(DecisionNode(partition.get(1)))
            tree.prune(partition.get(0))
        else:
            tree = DecisionTree(DecisionNode(trainSet))
        self.model = tree
Esempio n. 10
0
    def train(self, trainSet: InstanceList, parameters: Parameter):
        """
        Training algorithm for the quadratic discriminant analysis classifier (Introduction to Machine Learning,
        Alpaydin, 2015).

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        """
        w0 = {}
        w = {}
        W = {}
        classLists = Partition(trainSet)
        priorDistribution = trainSet.classDistribution()
        for i in range(classLists.size()):
            Ci = classLists.get(i).getClassLabel()
            averageVector = Vector(classLists.get(i).continuousAverage())
            classCovariance = classLists.get(i).covariance(averageVector)
            determinant = classCovariance.determinant()
            classCovariance.inverse()
            Wi = deepcopy(classCovariance)
            Wi.multiplyWithConstant(-0.5)
            W[Ci] = Wi
            wi = classCovariance.multiplyWithVectorFromLeft(averageVector)
            w[Ci] = wi
            w0i = -0.5 * (wi.dotProduct(averageVector) + math.log(determinant)) + math.log(priorDistribution.
                                                                                           getProbability(Ci))
            w0[Ci] = w0i
        self.model = QdaModel(priorDistribution, W, w, w0)