def __init__(self, trainSet: InstanceList, validationSet: InstanceList,
                 parameters: MultiLayerPerceptronParameter):
        """
        The AutoEncoderModel method takes two InstanceLists as inputs; train set and validation set. First it allocates
        the weights of W and V matrices using given MultiLayerPerceptronParameter and takes the clones of these
        matrices as the bestW and bestV. Then, it gets the epoch and starts to iterate over them. First it shuffles the
        train set and tries to find the new W and V matrices. At the end it tests the autoencoder with given validation
        set and if its performance is better than the previous one, it reassigns the bestW and bestV matrices. Continue
        to iterate with a lower learning rate till the end of an episode.

        PARAMETERS
        ----------
        trainSet : InstanceList
            InstanceList to use as train set.
        validationSet : InstanceList
            InstanceList to use as validation set.
        parameters : MultiLayerPerceptronParameter
            MultiLayerPerceptronParameter is used to get the parameters.
        """
        super().__init__(trainSet)
        self.K = trainSet.get(0).continuousAttributeSize()
        self.__allocateWeights(parameters.getHiddenNodes(),
                               parameters.getSeed())
        bestW = copy.deepcopy(self.__W)
        bestV = copy.deepcopy(self.__V)
        bestPerformance = Performance(1000000000)
        epoch = parameters.getEpoch()
        learningRate = parameters.getLearningRate()
        for i in range(epoch):
            trainSet.shuffle(parameters.getSeed())
            for j in range(trainSet.size()):
                self.createInputVector(trainSet.get(j))
                self.r = trainSet.get(j).toVector()
                hidden = self.calculateHidden(self.x, self.__W)
                hiddenBiased = hidden.biased()
                self.y = self.__V.multiplyWithVectorFromRight(hiddenBiased)
                rMinusY = self.r.difference(self.y)
                deltaV = Matrix(rMinusY, hiddenBiased)
                oneMinusHidden = self.calculateOneMinusHidden(hidden)
                tmph = self.__V.multiplyWithVectorFromLeft(rMinusY)
                tmph.remove(0)
                tmpHidden = oneMinusHidden.elementProduct(
                    hidden.elementProduct(tmph))
                deltaW = Matrix(tmpHidden, self.x)
                deltaV.multiplyWithConstant(learningRate)
                self.__V.add(deltaV)
                deltaW.multiplyWithConstant(learningRate)
                self.__W.add(deltaW)
            currentPerformance = self.testAutoEncoder(validationSet)
            if currentPerformance.getErrorRate(
            ) < bestPerformance.getErrorRate():
                bestPerformance = currentPerformance
                bestW = copy.deepcopy(self.__W)
                bestV = copy.deepcopy(self.__V)
        self.__W = bestW
        self.__V = bestV
    def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: MultiLayerPerceptronParameter):
        """
        A constructor that takes InstanceLists as trainsSet and validationSet. It  sets the NeuralNetworkModel nodes
        with given InstanceList then creates an input vector by using given trainSet and finds error. Via the
        validationSet it finds the classification performance and reassigns the allocated weight Matrix with the matrix
        that has the best accuracy and the Matrix V with the best Vector input.

        PARAMETERS
        ----------
        trainSet : InstanceList
            InstanceList that is used to train.
        validationSet : InstanceList
            InstanceList that is used to validate.
        parameters : MultiLayerPerceptronParameter
            Multi layer perceptron parameters; seed, learningRate, etaDecrease, crossValidationRatio, epoch,
            hiddenNodes.
        """
        super().initWithTrainSet(trainSet)
        self.__allocateWeights(parameters.getHiddenNodes(), parameters.getSeed())
        bestW = copy.deepcopy(self.W)
        bestV = copy.deepcopy(self.__V)
        bestClassificationPerformance = ClassificationPerformance(0.0)
        epoch = parameters.getEpoch()
        learningRate = parameters.getLearningRate()
        for i in range(epoch):
            trainSet.shuffle(parameters.getSeed())
            for j in range(trainSet.size()):
                self.createInputVector(trainSet.get(j))
                hidden = self.calculateHidden(self.x, self.W)
                hiddenBiased = hidden.biased()
                rMinusY = self.calculateRMinusY(trainSet.get(j), hiddenBiased, self.__V)
                deltaV = Matrix(rMinusY, hiddenBiased)
                oneMinusHidden = self.calculateOneMinusHidden(hidden)
                tmph = self.__V.multiplyWithVectorFromLeft(rMinusY)
                tmph.remove(0)
                tmpHidden = oneMinusHidden.elementProduct(hidden.elementProduct(tmph))
                deltaW = Matrix(tmpHidden, self.x)
                deltaV.multiplyWithConstant(learningRate)
                self.__V.add(deltaV)
                deltaW.multiplyWithConstant(learningRate)
                self.W.add(deltaW)
            currentClassificationPerformance = self.testClassifier(validationSet)
            if currentClassificationPerformance.getAccuracy() > bestClassificationPerformance.getAccuracy():
                bestClassificationPerformance = currentClassificationPerformance
                bestW = copy.deepcopy(self.W)
                bestV = copy.deepcopy(self.__V)
            learningRate *= parameters.getEtaDecrease()
        self.W = bestW
        self.__V = bestV
    def __init__(self, trainSet: InstanceList, validationSet: InstanceList,
                 parameters: LinearPerceptronParameter):
        """
        Constructor that takes InstanceLists as trainsSet and validationSet. Initially it allocates layer weights,
        then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the
        classification performance and at the end it reassigns the allocated weight Matrix with the matrix that has the
        best accuracy.

        PARAMETERS
        ----------
        trainSet : InstanceList
            InstanceList that is used to train.
        validationSet : InstanceList
            InstanceList that is used to validate.
        parameters : LinearPerceptronParameter
            Linear perceptron parameters; learningRate, etaDecrease, crossValidationRatio, epoch.
        """
        super().__init__(trainSet)
        self.W = self.allocateLayerWeights(self.K, self.d + 1,
                                           parameters.getSeed())
        bestW = copy.deepcopy(self.W)
        bestClassificationPerformance = ClassificationPerformance(0.0)
        epoch = parameters.getEpoch()
        learningRate = parameters.getLearningRate()
        for i in range(epoch):
            trainSet.shuffle(parameters.getSeed())
            for j in range(trainSet.size()):
                self.createInputVector(trainSet.get(j))
                rMinusY = self.calculateRMinusY(trainSet.get(j), self.x,
                                                self.W)
                deltaW = Matrix(rMinusY, self.x)
                deltaW.multiplyWithConstant(learningRate)
                self.W.add(deltaW)
            currentClassificationPerformance = self.testClassifier(
                validationSet)
            if currentClassificationPerformance.getAccuracy(
            ) > bestClassificationPerformance.getAccuracy():
                bestClassificationPerformance = currentClassificationPerformance
                bestW = copy.deepcopy(self.W)
            learningRate *= parameters.getEtaDecrease()
        self.W = bestW
Exemple #4
0
 def __init__(self, instanceList: InstanceList = None, ratio=None, seed=None, stratified: bool = None):
     """
     Divides the instances in the instance list into partitions so that all instances of a class are grouped in a
     single partition.
     PARAMETERS
     ----------
     ratio
         Ratio of the stratified partition. Ratio is between 0 and 1. If the ratio is 0.2, then 20 percent of the
         instances are put in the first group, 80 percent of the instances are put in the second group.
     seed
         seed is used as a random number.
     """
     self.__multilist = []
     if instanceList is not None:
         if ratio is None:
             classLabels = instanceList.getDistinctClassLabels()
             for classLabel in classLabels:
                 self.add(InstanceListOfSameClass(classLabel))
             for instance in instanceList.getInstances():
                 self.get(classLabels.index(instance.getClassLabel())).add(instance)
         else:
             if isinstance(ratio, float):
                 self.add(InstanceList())
                 self.add(InstanceList())
                 if stratified:
                     distribution = instanceList.classDistribution()
                     counts = [0] * len(distribution)
                     randomArray = [i for i in range(instanceList.size())]
                     random.seed(seed)
                     random.shuffle(randomArray)
                     for i in range(instanceList.size()):
                         instance = instanceList.get(randomArray[i])
                         classIndex = distribution.getIndex(instance.getClassLabel())
                         if counts[classIndex] < instanceList.size() * ratio * \
                                 distribution.getProbability(instance.getClassLabel()):
                             self.get(0).add(instance)
                         else:
                             self.get(1).add(instance)
                         counts[classIndex] = counts[classIndex] + 1
                 else:
                     instanceList.shuffle(seed)
                     for i in range(self.size()):
                         instance = instanceList.get(i)
                         if i < instanceList.size() * ratio:
                             self.get(0).add(instance)
                         else:
                             self.get(1).add(instance)
             elif isinstance(ratio, int):
                 attributeIndex = ratio
                 if seed is None:
                     valueList = instanceList.getAttributeValueList(attributeIndex)
                     for _ in valueList:
                         self.add(InstanceList())
                     for instance in instanceList.getInstances():
                         self.get(valueList.index(instance.getAttribute(attributeIndex).getValue())).add(instance)
                 elif isinstance(seed, int):
                     attributeValue = seed
                     self.add(InstanceList())
                     self.add(InstanceList())
                     for instance in instanceList.getInstances():
                         if instance.getAttribute(attributeIndex).getIndex() == attributeValue:
                             self.get(0).add(instance)
                         else:
                             self.get(1).add(instance)
                 elif isinstance(seed, float):
                     splitValue = seed
                     self.add(InstanceList())
                     self.add(InstanceList())
                     for instance in instanceList.getInstances():
                         if instance.getAttribute(attributeIndex).getValue() < splitValue:
                             self.get(0).add(instance)
                         else:
                             self.get(1).add(instance)
Exemple #5
0
    def __init__(self, trainSet: InstanceList, validationSet: InstanceList,
                 parameters: DeepNetworkParameter):
        """
        Constructor that takes two InstanceList train set and validation set and DeepNetworkParameter as
        inputs. First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given
        train set and allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops
        through the each item of that train set, it multiplies the weights Matrix with input Vector than applies the
        sigmoid function and stores the result as hidden and add bias. Then updates weights and at the end it compares
        the performance of these weights with validation set. It updates the bestClassificationPerformance and
        bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value
        and finishes with clearing the weights.

        PARAMETERS
        ----------
        trainSet : InstanceList
            InstanceList to be used as trainSet.
        validationSet : InstanceList
            InstanceList to be used as validationSet.
        parameters : DeepNetworkParameter
            DeepNetworkParameter input.
        """
        super().__init__(trainSet)
        deltaWeights = []
        hidden = []
        hiddenBiased = []
        self.__allocateWeights(parameters)
        bestWeights = self.__setBestWeights()
        bestClassificationPerformance = ClassificationPerformance(0.0)
        epoch = parameters.getEpoch()
        learningRate = parameters.getLearningRate()
        for i in range(epoch):
            trainSet.shuffle(parameters.getSeed())
            for j in range(trainSet.size()):
                self.createInputVector(trainSet.get(j))
                hidden.clear()
                hiddenBiased.clear()
                deltaWeights.clear()
                for k in range(self.__hiddenLayerSize):
                    if k == 0:
                        hidden.append(
                            self.calculateHidden(self.x, self.__weights[k]))
                    else:
                        hidden.append(
                            self.calculateHidden(hiddenBiased[k - 1],
                                                 self.__weights[k]))
                    hiddenBiased.append(hidden[k].biased())
                rMinusY = self.calculateRMinusY(
                    trainSet.get(j), hiddenBiased[self.__hiddenLayerSize - 1],
                    self.__weights[len(self.__weights) - 1])
                deltaWeights.insert(
                    0, Matrix(rMinusY,
                              hiddenBiased[self.__hiddenLayerSize - 1]))
                for k in range(len(self.__weights) - 2, -1, -1):
                    oneMinusHidden = self.calculateOneMinusHidden(hidden[k])
                    tmph = deltaWeights[0].elementProduct(
                        self.__weights[k + 1]).sumOfRows()
                    tmph.remove(0)
                    tmpHidden = oneMinusHidden.elementProduct(tmph)
                    if k == 0:
                        deltaWeights.insert(0, Matrix(tmpHidden, self.x))
                    else:
                        deltaWeights.insert(
                            0, Matrix(tmpHidden, hiddenBiased[k - 1]))
                for k in range(len(self.__weights)):
                    deltaWeights[k].multiplyWithConstant(learningRate)
                    self.__weights[k].add(deltaWeights[k])
            currentClassificationPerformance = self.testClassifier(
                validationSet)
            if currentClassificationPerformance.getAccuracy(
            ) > bestClassificationPerformance.getAccuracy():
                bestClassificationPerformance = currentClassificationPerformance
                bestWeights = self.__setBestWeights()
            learningRate *= parameters.getEtaDecrease()
        self.__weights.clear()
        for m in bestWeights:
            self.__weights.append(m)