Ejemplo n.º 1
0
def error(tree, instance, attr, val):
    """
    Computes the error between the predicted value and the actual value for an
    attribute. One of the scoring functions for incremental_evaluation.

    .. warning:: We are not quite sure how to compute error or squared for
        a :ref:`Numeric values<val-num>` being missing (e.g., 0-1 vs. scale 
        of the numeric value cannot be averaged). So currently, this scoring
        function raises an Exception when it encounters a missing nunmeric
        value. We are also not sure how to handle error in the case of
        :ref:`Component Values<val-comp>` so it will also throw an exception 
        if encounters one of those.
    
    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate error on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The error of the given instance attribute value in the given tree
    :rtype: float, or int in the nominal case.
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}

    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Currently does not support prediction error of component attributes."
        )
    elif isNumber(val):
        prediction = concept.predict(attr)
        if prediction is None:
            raise Exception(
                "Not sure how to handle continuous values that are predicted to be missing."
            )
        e = val - prediction
    else:
        prediction = concept.predict(attr)

        if val is None and isNumber(prediction):
            raise Exception(
                "Not sure how to compare Continuous Values and None")

        if val == prediction:
            e = 0
        else:
            e = 1

    return e
Ejemplo n.º 2
0
def error(tree, instance, attr, val):
    """
    Computes the error between the predicted value and the actual value for an
    attribute. One of the scoring functions for incremental_evaluation.

    .. warning:: We are not quite sure how to compute error or squared for
        a :ref:`Numeric values<val-num>` being missing (e.g., 0-1 vs. scale
        of the numeric value cannot be averaged). So currently, this scoring
        function raises an Exception when it encounters a missing nunmeric
        value. We are also not sure how to handle error in the case of
        :ref:`Component Values<val-comp>` so it will also throw an exception
        if encounters one of those.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate error on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The error of the given instance attribute value in the given tree
    :rtype: float, or int in the nominal case.
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}

    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception("Currently does not support prediction error of"
                        " component attributes.")
    elif isNumber(val):
        prediction = concept.predict(attr)
        if prediction is None:
            raise Exception("Not sure how to handle continuous values that are"
                            " predicted to be missing.")
        e = val - prediction
    else:
        prediction = concept.predict(attr)

        if val is None and isNumber(prediction):
            raise Exception(
                "Not sure how to compare Continuous Values and None")

        if val == prediction:
            e = 0
        else:
            e = 1

    return e
Ejemplo n.º 3
0
    def increment_counts(self, instance):
        """
        Increment the counts at the current node according to the specified
        instance.

        Cobweb3Node uses a modified version of
        :meth:`CobwebNode.increment_counts
        <concept_formation.cobweb.CobwebNode.increment_counts>` that handles
        numerical attributes properly. Any attribute value where
        ``isinstance(instance[attr], Number)`` returns ``True`` will be treated
        as a numerical attribute and included under an assumption that the
        number should follow a normal distribution.

        .. warning:: If a numeric attribute is found in an instance with the
            name of a previously nominal attribute, or vice versa, this
            function will raise an exception. See: :class:`NumericToNominal
            <concept_formation.preprocessor.NumericToNominal>` for a way to fix
            this error.
        
        :param instance: A new instances to incorporate into the node.
        :type instance: :ref:`Instance<instance-rep>`

        """
        self.count += 1 
            
        for attr in instance:
            self.av_counts[attr] = self.av_counts.setdefault(attr,{})

            if isNumber(instance[attr]):
                if cv_key not in self.av_counts[attr]:
                    self.av_counts[attr][cv_key] = ContinuousValue()
                self.av_counts[attr][cv_key].update(instance[attr])
            else:
                prior_count = self.av_counts[attr].get(instance[attr], 0)
                self.av_counts[attr][instance[attr]] = prior_count + 1
Ejemplo n.º 4
0
def index_key(fact):
    """
    A new total indexing of the fact. Just build the whole damn thing, assuming
    it doesn't explode the memory usage.

    >>> index_key('cell')
    'cell'

    >>> index_key(('cell',))
    ('cell',)

    >>> index_key(('cell', '5'))
    ('cell', '5')

    >>> index_key((('value', '?x'), '5'))
    (('value', '?'), '5')

    >>> index_key((('X',('Position','Block1')), 10))
    (('X',('Position','Block1')), '#NUM')

    >>> index_key((('value', ('Add', ('value', '?x'),
    ...                              ('value', '?y'))), '5'))
    (('value', ('Add', ('value', '?'), ('value', '?'))), '5')
    """
    if isinstance(fact, tuple):
        return tuple(index_key(ele) for ele in fact)
    elif is_variable(fact):
        return '?'
    elif isNumber(fact):
        return '#NUM'
    else:
        return fact
Ejemplo n.º 5
0
    def increment_counts(self, instance):
        """
        Increment the counts at the current node according to the specified
        instance.

        Cobweb3Node uses a modified version of
        :meth:`CobwebNode.increment_counts
        <concept_formation.cobweb.CobwebNode.increment_counts>` that handles
        numerical attributes properly. Any attribute value where
        ``isinstance(instance[attr], Number)`` returns ``True`` will be treated
        as a numerical attribute and included under an assumption that the
        number should follow a normal distribution.

        .. warning:: If a numeric attribute is found in an instance with the
            name of a previously nominal attribute, or vice versa, this
            function will raise an exception. See: :class:`NumericToNominal
            <concept_formation.preprocessor.NumericToNominal>` for a way to fix
            this error.

        :param instance: A new instances to incorporate into the node.
        :type instance: :ref:`Instance<instance-rep>`

        """
        self.count += 1

        for attr in instance:
            self.av_counts[attr] = self.av_counts.setdefault(attr, {})

            if isNumber(instance[attr]):
                if cv_key not in self.av_counts[attr]:
                    self.av_counts[attr][cv_key] = ContinuousValue()
                self.av_counts[attr][cv_key].update(instance[attr])
            else:
                prior_count = self.av_counts[attr].get(instance[attr], 0)
                self.av_counts[attr][instance[attr]] = prior_count + 1
Ejemplo n.º 6
0
 def update_scales(self, instance):
     """
     Reads through all the attributes in an instance and updates the
     tree scales object so that the attributes can be properly scaled.
     """
     for attr in instance:
         if isNumber(instance[attr]):
             inner_attr = self.get_inner_attr(attr)
             if inner_attr not in self.attr_scales:
                 self.attr_scales[inner_attr] = ContinuousValue()
             self.attr_scales[inner_attr].update(instance[attr])
Ejemplo n.º 7
0
 def update_scales(self, instance):
     """
     Reads through all the attributes in an instance and updates the
     tree scales object so that the attributes can be properly scaled.
     """
     for attr in instance:
         if isNumber(instance[attr]):
             inner_attr = self.get_inner_attr(attr)
             if inner_attr not in self.attr_scales:
                 self.attr_scales[inner_attr] = ContinuousValue()
             self.attr_scales[inner_attr].update(instance[attr])
Ejemplo n.º 8
0
    def is_exact_match(self, instance):
        """
        Returns true if the concept exactly matches the instance.

        :param instance: The instance currently being categorized
        :type instance: :ref:`Instance<instance-rep>`
        :return: whether the instance perfectly matches the concept
        :rtype: boolean

        .. seealso:: :meth:`CobwebNode.get_best_operation`
        """
        for attr in set(instance).union(set(self.attrs())):
            if attr[0] == '_':
                continue
            if attr in instance and attr not in self.av_counts:
                return False
            if attr in self.av_counts and attr not in instance:
                return False
            if attr in self.av_counts and attr in instance:
                if (isNumber(instance[attr]) and
                        cv_key not in self.av_counts[attr]):
                    return False
                if (isNumber(instance[attr]) and cv_key in
                        self.av_counts[attr]):
                    if (len(self.av_counts[attr]) != 1 or
                            self.av_counts[attr][cv_key].num != self.count):
                        return False
                    if (not self.av_counts[attr][cv_key].unbiased_std() ==
                            0.0):
                        return False
                    if (not self.av_counts[attr][cv_key].unbiased_mean() ==
                            instance[attr]):
                        return False
                elif not instance[attr] in self.av_counts[attr]:
                    return False
                elif not self.av_counts[attr][instance[attr]] == self.count:
                    return False
        return True
Ejemplo n.º 9
0
    def is_exact_match(self, instance):
        """
        Returns true if the concept exactly matches the instance.

        :param instance: The instance currently being categorized
        :type instance: :ref:`Instance<instance-rep>`
        :return: whether the instance perfectly matches the concept
        :rtype: boolean

        .. seealso:: :meth:`CobwebNode.get_best_operation`
        """
        for attr in set(instance).union(set(self.attrs())):
            if attr[0] == '_':
                continue
            if attr in instance and attr not in self.av_counts:
                return False
            if attr in self.av_counts and attr not in instance:
                return False
            if attr in self.av_counts and attr in instance:
                if (isNumber(instance[attr]) and
                        cv_key not in self.av_counts[attr]):
                    return False
                if (isNumber(instance[attr]) and cv_key in
                        self.av_counts[attr]):
                    if (len(self.av_counts[attr]) != 1 or
                            self.av_counts[attr][cv_key].num != self.count):
                        return False
                    if (not self.av_counts[attr][cv_key].unbiased_std() ==
                            0.0):
                        return False
                    if (not self.av_counts[attr][cv_key].unbiased_mean() ==
                            instance[attr]):
                        return False
                elif not instance[attr] in self.av_counts[attr]:
                    return False
                elif not self.av_counts[attr][instance[attr]] == self.count:
                    return False
        return True
Ejemplo n.º 10
0
    def probability(self, attr, val):
        """
        Returns the probability of a particular attribute value at the current
        concept.

        This takes into account the possibilities that an attribute can take
        any of the values available at the root, or be missing.

        For numerical attributes it returns the integral of the product of two
        gaussians. One gaussian has :math:`\\mu = val` and :math:`\\sigma =
        \\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}` (where
        :math:`\\sigma_{noise}` is from
        :meth:`Cobweb3Node.expected_correct_guesses
        <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` and
        ensures the probability or expected correct guesses never exceeds 1).
        The second gaussian has the mean ad std values from the current concept
        with additional gaussian noise (independent and normally distributed
        noise with :math:`\\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}`).

        The integral of this gaussian product is another gaussian with
        :math:`\\mu` equal to the concept attribut mean and :math:`\\sigma =
        \\sqrt{\\sigma_{attr}^2 + 2 * \\sigma_{noise}^2}` or, slightly
        simplified, :math:`\\sigma =
        \\sqrt{\\sigma_{attr}^2 + 2 * \\frac{1}{2 * \\pi}}`.

        :param attr: an attribute of an instance
        :type attr: :ref:`Attribute<attributes>`
        :param val: a value for the given attribute
        :type val: :ref:`Value<values>`
        :return: The probability of attr having the value val in the current
            concept.
        :rtype: float
        """
        if val is None:
            c = 0.0
            if attr in self.av_counts:
                c = sum([self.av_counts[attr][v].num if v == cv_key
                         else self.av_counts[attr][v] for v in
                         self.av_counts[attr]])
            return (self.count - c) / self.count

        if attr in self.av_counts and isNumber(val):
            if cv_key not in self.av_counts[attr]:
                return 0.0

            prob_attr = self.av_counts[attr][cv_key].num / self.count
            if self.tree is not None and self.tree.scaling:
                inner_attr = self.tree.get_inner_attr(attr)
                scale = ((1/self.tree.scaling) *
                         self.tree.attr_scales[inner_attr].unbiased_std())

                if scale == 0:
                    scale = 1
                shift = self.tree.attr_scales[inner_attr].mean
                val = (val - shift) / scale
            else:
                scale = 1.0
                shift = 0.0

            mean = (self.av_counts[attr][cv_key].mean - shift) / scale
            ostd = self.av_counts[attr][cv_key].scaled_unbiased_std(scale)
            std = sqrt(ostd * ostd + (1 / (2 * pi)))
            p = (prob_attr *
                 (1/(sqrt(2*pi) * std)) *
                 exp(-((val - mean) * (val - mean)) / (2.0 * std * std)))
            return p

        if attr in self.av_counts and val in self.av_counts[attr]:
            return self.av_counts[attr][val] / self.count

        return 0.0
Ejemplo n.º 11
0
    def probability(self, attr, val):
        """
        Returns the probability of a particular attribute value at the current
        concept.

        This takes into account the possibilities that an attribute can take
        any of the values available at the root, or be missing.

        For numerical attributes it returns the integral of the product of two
        gaussians. One gaussian has :math:`\\mu = val` and :math:`\\sigma =
        \\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}` (where
        :math:`\\sigma_{noise}` is from
        :meth:`Cobweb3Node.expected_correct_guesses
        <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` and
        ensures the probability or expected correct guesses never exceeds 1).
        The second gaussian has the mean ad std values from the current concept
        with additional gaussian noise (independent and normally distributed
        noise with :math:`\\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}`).

        The integral of this gaussian product is another gaussian with
        :math:`\\mu` equal to the concept attribut mean and :math:`\\sigma =
        \\sqrt{\\sigma_{attr}^2 + 2 * \\sigma_{noise}^2}` or, slightly
        simplified, :math:`\\sigma =
        \\sqrt{\\sigma_{attr}^2 + 2 * \\frac{1}{2 * \\pi}}`.

        :param attr: an attribute of an instance
        :type attr: :ref:`Attribute<attributes>`
        :param val: a value for the given attribute
        :type val: :ref:`Value<values>`
        :return: The probability of attr having the value val in the current
            concept.
        :rtype: float
        """
        if val is None:
            c = 0.0
            if attr in self.av_counts:
                c = sum([self.av_counts[attr][v].num if v == cv_key
                         else self.av_counts[attr][v] for v in
                         self.av_counts[attr]])
            return (self.count - c) / self.count

        if attr in self.av_counts and isNumber(val):
            if cv_key not in self.av_counts[attr]:
                return 0.0

            prob_attr = self.av_counts[attr][cv_key].num / self.count
            if self.tree is not None and self.tree.scaling:
                inner_attr = self.tree.get_inner_attr(attr)
                scale = ((1/self.tree.scaling) *
                         self.tree.attr_scales[inner_attr].unbiased_std())

                if scale == 0:
                    scale = 1
                shift = self.tree.attr_scales[inner_attr].mean
                val = (val - shift) / scale
            else:
                scale = 1.0
                shift = 0.0

            mean = (self.av_counts[attr][cv_key].mean - shift) / scale
            ostd = self.av_counts[attr][cv_key].scaled_unbiased_std(scale)
            std = sqrt(ostd * ostd + (1 / (2 * pi)))
            p = (prob_attr *
                 (1/(sqrt(2*pi) * std)) *
                 exp(-((val - mean) * (val - mean)) / (2.0 * std * std)))
            return p

        if attr in self.av_counts and val in self.av_counts[attr]:
            return self.av_counts[attr][val] / self.count

        return 0.0
Ejemplo n.º 12
0
    def probability(self, attr, val):
        """
        Returns the probability of a particular attribute value at the current
        concept. 

        This takes into account the possibilities that an attribute can take any
        of the values available at the root, or be missing. 

        For numerical attributes the probability of val given a gaussian 
        distribution is returned. This distribution is defined by the
        mean and std of past values stored in the concept. However like
        :meth:`Cobweb3Node.expected_correct_guesses
        <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` it
        adds :math:`\\frac{1}{2 * \\sqrt{\\pi}}` to the estimated std (i.e,
        assumes some independent, normally distributed noise).
        
        :param attr: an attribute of an instance
        :type attr: :ref:`Attribute<attributes>`
        :param val: a value for the given attribute
        :type val: :ref:`Value<values>`
        :return: The probability of attr having the value val in the current concept.
        :rtype: float
        """
        if val is None:
            c = 0.0
            if attr in self.av_counts:
                c = sum([
                    self.av_counts[attr][v].num
                    if v == cv_key else self.av_counts[attr][v]
                    for v in self.av_counts[attr]
                ])
            return (self.count - c) / self.count

        if isNumber(val):
            if cv_key not in self.av_counts[attr]:
                return 0.0

            prob_attr = self.av_counts[attr][cv_key].num / self.count
            if self.tree is not None and self.tree.scaling:
                inner_attr = self.tree.get_inner_attr(attr)
                scale = ((1 / self.tree.scaling) *
                         self.tree.attr_scales[inner_attr].unbiased_std())

                if scale == 0:
                    scale = 1
                shift = self.tree.attr_scales[inner_attr].mean
                val = (val - shift) / scale
            else:
                scale = 1.0
                shift = 0.0

            mean = (self.av_counts[attr][cv_key].mean - shift) / scale
            std = sqrt(
                self.av_counts[attr][cv_key].scaled_unbiased_std(scale) *
                self.av_counts[attr][cv_key].scaled_unbiased_std(scale) +
                (1 / (4 * pi)))
            p = (prob_attr * (1 / (sqrt(2 * pi) * std)) *
                 exp(-((val - mean) * (val - mean)) / (2.0 * std * std)))
            return p

        if attr in self.av_counts and val in self.av_counts[attr]:
            return self.av_counts[attr][val] / self.count

        return 0.0