Ejemplo n.º 1
0
    def __checkDefinition(self, instance: Instance) -> bool:
        """
        Checks the correctness of the attribute type, for instance, if the attribute of given instance is a Binary
        attribute, and the attribute type of the corresponding item of the data definition is also a Binary attribute,
        it then returns true, and false otherwise.

        PARAMETERS
        ----------
        instance : Instance
            Instance to checks the attribute type.

        RETURNS
        -------
        bool
            true if attribute types of given Instance and data definition matches.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.BINARY:
                    return False
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE_INDEXED:
                    return False
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.DISCRETE:
                    return False
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                if self.__definition.getAttributeType(
                        i) is not AttributeType.CONTINUOUS:
                    return False
        return True
    def __logLikelihoodContinuous(self, classLabel: str,
                                  instance: Instance) -> float:
        """
        The logLikelihoodContinuous method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood. Then it loops times of given
        instance attribute size, and accumulates the logLikelihood by calculating -0.5 * ((xi - mi) / si )** 2).

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            mi = self.__classMeans[classLabel].getValue(i)
            si = self.__classDeviations[classLabel].getValue(i)
            if si != 0:
                loglikelihood += -0.5 * math.pow((xi - mi) / si, 2)
        return loglikelihood
    def __logLikelihoodDiscrete(self, classLabel: str,
                                instance: Instance) -> float:
        """
        The logLikelihoodDiscrete method takes an Instance and a class label as inputs. First it gets the logarithm
        of given class label's probability via prior distribution as logLikelihood and gets the class attribute
        distribution of given class label. Then it loops times of given instance attribute size, and accumulates the
        logLikelihood by calculating the logarithm of corresponding attribute distribution's smoothed probability by
        using laplace smoothing on xi.

        PARAMETERS
        ----------
        classLabel : str
            String input class label.
        instance : Instance
            Instance input.

        RETURNS
        -------
        float
            The log likelihood of given class label and Instance.
        """
        loglikelihood = math.log(
            self.priorDistribution.getProbability(classLabel))
        attributeDistributions = self.__classAttributeDistributions.get(
            classLabel)
        for i in range(instance.attributeSize()):
            xi = instance.getAttribute(i).getValue()
            loglikelihood += math.log(
                attributeDistributions[i].getProbabilityLaplaceSmoothing(xi))
        return loglikelihood
Ejemplo n.º 4
0
    def convertInstance(self, instance: Instance):
        """
        Normalizes the continuous attributes of a single instance. For all i, new x_i = (x_i - m_i) / s_i.

        PARAMETERS
        ----------
        instance : Instance
            Instance whose attributes will be normalized.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), ContinuousAttribute):
                xi = instance.getAttribute(i)
                mi = self.__averageInstance.getAttribute(i)
                si = self.__standardDeviationInstance.getAttribute(i)
                if isinstance(xi, ContinuousAttribute):
                    xi.setValue(
                        (xi.getValue() - mi.getValue()) / si.getValue())
Ejemplo n.º 5
0
 def distance(self, instance1: Instance, instance2: Instance) -> float:
     result = 0
     for i in range(instance1.attributeSize()):
         if isinstance(instance1.getAttribute(i), DiscreteAttribute) and \
                 isinstance(instance2.getAttribute(i), DiscreteAttribute):
             if instance1.getAttribute(i).getValue() is not None and \
                     instance1.getAttribute(i).getValue() != instance2.getAttribute(i).getValue():
                 result += 1
         else:
             if isinstance(instance1.getAttribute(i), ContinuousAttribute) and \
                     isinstance(instance2.getAttribute(i), ContinuousAttribute):
                 result += math.pow(
                     instance1.getAttribute(i).getValue() -
                     instance2.getAttribute(i).getValue(), 2)
     return result
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to indexed version.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                instance.addAttribute(
                    DiscreteIndexedAttribute(
                        instance.getAttribute(i).__str__(), index,
                        len(self.attributeDistributions[i])))
        self.removeDiscreteAttributesFromInstance(instance, size)
Ejemplo n.º 7
0
    def __setDefinition(self, instance: Instance):
        """
        Adds the attribute types according to given Instance. For instance, if the attribute type of given Instance
        is a Discrete type, it than adds a discrete attribute type to the list of attribute types.

        PARAMETERS
        ----------
        instance : Instance
            Instance input.
        """
        attributeTypes = []
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), BinaryAttribute):
                attributeTypes.append(AttributeType.BINARY)
            elif isinstance(instance.getAttribute(i),
                            DiscreteIndexedAttribute):
                attributeTypes.append(AttributeType.DISCRETE_INDEXED)
            elif isinstance(instance.getAttribute(i), DiscreteAttribute):
                attributeTypes.append(AttributeType.DISCRETE)
            elif isinstance(instance.getAttribute(i), ContinuousAttribute):
                attributeTypes.append(AttributeType.CONTINUOUS)
        self.__definition = DataDefinition(attributeTypes)
    def satisfy(self, instance: Instance):
        """
        The satisfy method takes an Instance as an input.

        If defined Attribute value is a DiscreteIndexedAttribute it compares the index of Attribute of instance at the
        attributeIndex and the index of Attribute value and returns the result.

        If defined Attribute value is a DiscreteAttribute it compares the value of Attribute of instance at the
        attributeIndex and the value of Attribute value and returns the result.

        If defined Attribute value is a ContinuousAttribute it compares the value of Attribute of instance at the
        attributeIndex and the value of Attribute value and returns the result according to the comparison character
        whether it is less than or greater than signs.

        PARAMETERS
        ----------
        instance : Instance
            Instance to compare.

        RETURNS
        -------
        bool
            True if gicen instance satisfies the conditions.
        """
        if isinstance(self.__value, DiscreteIndexedAttribute):
            if self.__value.getIndex() != -1:
                return instance.getAttribute(self.__attributeIndex).getIndex() == self.__value.getIndex()
            else:
                return True
        elif isinstance(self.__value, DiscreteAttribute):
            return instance.getAttribute(self.__attributeIndex).getValue() == self.__value.getValue()
        elif isinstance(self.__value, ContinuousAttribute):
            if self.__comparison == "<":
                return instance.getAttribute(self.__attributeIndex).getValue() <= self.__value.getValue()
            else:
                return instance.getAttribute(self.__attributeIndex).getValue() > self.__value.getValue()
        return False
Ejemplo n.º 9
0
    def discreteCheck(self, instance: Instance) -> bool:
        """
        Checks given instance's attribute and returns true if it is a discrete indexed attribute, false otherwise.

        PARAMETERS
        ----------
        instance Instance to check.

        RETURNS
        -------
        bool
            True if instance is a discrete indexed attribute, false otherwise.
        """
        for i in range(instance.attributeSize()):
            if isinstance(instance.getAttribute(i), DiscreteAttribute) and not isinstance(instance.getAttribute(i),
                                                                                          DiscreteIndexedAttribute):
                return False
        return True
    def convertInstance(self, instance: Instance):
        """
        Converts discrete attributes of a single instance to continuous version using 1-of-L encoding. For example, if
        an attribute has values red, green, blue; this attribute will be converted to 3 continuous attributes where
        red will have the value 100, green will have the value 010, and blue will have the value 001.

        PARAMETERS
        ----------
        instance : Instance
            The instance to be converted.
        """
        size = instance.attributeSize()
        for i in range(size):
            if len(self.attributeDistributions[i]) > 0:
                index = self.attributeDistributions[i].getIndex(
                    instance.getAttribute(i).__str__())
                for j in range(len(self.attributeDistributions[i])):
                    if j != index:
                        instance.addAttribute(ContinuousAttribute(0))
                    else:
                        instance.addAttribute(ContinuousAttribute(1))
        self.removeDiscreteAttributesFromInstance(instance, size)