def error(tree, instance, attr, val): """ Computes the error between the predicted value and the actual value for an attribute. One of the scoring functions for incremental_evaluation. .. warning:: We are not quite sure how to compute error or squared for a :ref:`Numeric values<val-num>` being missing (e.g., 0-1 vs. scale of the numeric value cannot be averaged). So currently, this scoring function raises an Exception when it encounters a missing nunmeric value. We are also not sure how to handle error in the case of :ref:`Component Values<val-comp>` so it will also throw an exception if encounters one of those. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate error on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The error of the given instance attribute value in the given tree :rtype: float, or int in the nominal case. """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Currently does not support prediction error of component attributes." ) elif isNumber(val): prediction = concept.predict(attr) if prediction is None: raise Exception( "Not sure how to handle continuous values that are predicted to be missing." ) e = val - prediction else: prediction = concept.predict(attr) if val is None and isNumber(prediction): raise Exception( "Not sure how to compare Continuous Values and None") if val == prediction: e = 0 else: e = 1 return e
def error(tree, instance, attr, val): """ Computes the error between the predicted value and the actual value for an attribute. One of the scoring functions for incremental_evaluation. .. warning:: We are not quite sure how to compute error or squared for a :ref:`Numeric values<val-num>` being missing (e.g., 0-1 vs. scale of the numeric value cannot be averaged). So currently, this scoring function raises an Exception when it encounters a missing nunmeric value. We are also not sure how to handle error in the case of :ref:`Component Values<val-comp>` so it will also throw an exception if encounters one of those. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate error on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The error of the given instance attribute value in the given tree :rtype: float, or int in the nominal case. """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception("Currently does not support prediction error of" " component attributes.") elif isNumber(val): prediction = concept.predict(attr) if prediction is None: raise Exception("Not sure how to handle continuous values that are" " predicted to be missing.") e = val - prediction else: prediction = concept.predict(attr) if val is None and isNumber(prediction): raise Exception( "Not sure how to compare Continuous Values and None") if val == prediction: e = 0 else: e = 1 return e
def increment_counts(self, instance): """ Increment the counts at the current node according to the specified instance. Cobweb3Node uses a modified version of :meth:`CobwebNode.increment_counts <concept_formation.cobweb.CobwebNode.increment_counts>` that handles numerical attributes properly. Any attribute value where ``isinstance(instance[attr], Number)`` returns ``True`` will be treated as a numerical attribute and included under an assumption that the number should follow a normal distribution. .. warning:: If a numeric attribute is found in an instance with the name of a previously nominal attribute, or vice versa, this function will raise an exception. See: :class:`NumericToNominal <concept_formation.preprocessor.NumericToNominal>` for a way to fix this error. :param instance: A new instances to incorporate into the node. :type instance: :ref:`Instance<instance-rep>` """ self.count += 1 for attr in instance: self.av_counts[attr] = self.av_counts.setdefault(attr,{}) if isNumber(instance[attr]): if cv_key not in self.av_counts[attr]: self.av_counts[attr][cv_key] = ContinuousValue() self.av_counts[attr][cv_key].update(instance[attr]) else: prior_count = self.av_counts[attr].get(instance[attr], 0) self.av_counts[attr][instance[attr]] = prior_count + 1
def index_key(fact): """ A new total indexing of the fact. Just build the whole damn thing, assuming it doesn't explode the memory usage. >>> index_key('cell') 'cell' >>> index_key(('cell',)) ('cell',) >>> index_key(('cell', '5')) ('cell', '5') >>> index_key((('value', '?x'), '5')) (('value', '?'), '5') >>> index_key((('X',('Position','Block1')), 10)) (('X',('Position','Block1')), '#NUM') >>> index_key((('value', ('Add', ('value', '?x'), ... ('value', '?y'))), '5')) (('value', ('Add', ('value', '?'), ('value', '?'))), '5') """ if isinstance(fact, tuple): return tuple(index_key(ele) for ele in fact) elif is_variable(fact): return '?' elif isNumber(fact): return '#NUM' else: return fact
def increment_counts(self, instance): """ Increment the counts at the current node according to the specified instance. Cobweb3Node uses a modified version of :meth:`CobwebNode.increment_counts <concept_formation.cobweb.CobwebNode.increment_counts>` that handles numerical attributes properly. Any attribute value where ``isinstance(instance[attr], Number)`` returns ``True`` will be treated as a numerical attribute and included under an assumption that the number should follow a normal distribution. .. warning:: If a numeric attribute is found in an instance with the name of a previously nominal attribute, or vice versa, this function will raise an exception. See: :class:`NumericToNominal <concept_formation.preprocessor.NumericToNominal>` for a way to fix this error. :param instance: A new instances to incorporate into the node. :type instance: :ref:`Instance<instance-rep>` """ self.count += 1 for attr in instance: self.av_counts[attr] = self.av_counts.setdefault(attr, {}) if isNumber(instance[attr]): if cv_key not in self.av_counts[attr]: self.av_counts[attr][cv_key] = ContinuousValue() self.av_counts[attr][cv_key].update(instance[attr]) else: prior_count = self.av_counts[attr].get(instance[attr], 0) self.av_counts[attr][instance[attr]] = prior_count + 1
def update_scales(self, instance): """ Reads through all the attributes in an instance and updates the tree scales object so that the attributes can be properly scaled. """ for attr in instance: if isNumber(instance[attr]): inner_attr = self.get_inner_attr(attr) if inner_attr not in self.attr_scales: self.attr_scales[inner_attr] = ContinuousValue() self.attr_scales[inner_attr].update(instance[attr])
def update_scales(self, instance): """ Reads through all the attributes in an instance and updates the tree scales object so that the attributes can be properly scaled. """ for attr in instance: if isNumber(instance[attr]): inner_attr = self.get_inner_attr(attr) if inner_attr not in self.attr_scales: self.attr_scales[inner_attr] = ContinuousValue() self.attr_scales[inner_attr].update(instance[attr])
def is_exact_match(self, instance): """ Returns true if the concept exactly matches the instance. :param instance: The instance currently being categorized :type instance: :ref:`Instance<instance-rep>` :return: whether the instance perfectly matches the concept :rtype: boolean .. seealso:: :meth:`CobwebNode.get_best_operation` """ for attr in set(instance).union(set(self.attrs())): if attr[0] == '_': continue if attr in instance and attr not in self.av_counts: return False if attr in self.av_counts and attr not in instance: return False if attr in self.av_counts and attr in instance: if (isNumber(instance[attr]) and cv_key not in self.av_counts[attr]): return False if (isNumber(instance[attr]) and cv_key in self.av_counts[attr]): if (len(self.av_counts[attr]) != 1 or self.av_counts[attr][cv_key].num != self.count): return False if (not self.av_counts[attr][cv_key].unbiased_std() == 0.0): return False if (not self.av_counts[attr][cv_key].unbiased_mean() == instance[attr]): return False elif not instance[attr] in self.av_counts[attr]: return False elif not self.av_counts[attr][instance[attr]] == self.count: return False return True
def is_exact_match(self, instance): """ Returns true if the concept exactly matches the instance. :param instance: The instance currently being categorized :type instance: :ref:`Instance<instance-rep>` :return: whether the instance perfectly matches the concept :rtype: boolean .. seealso:: :meth:`CobwebNode.get_best_operation` """ for attr in set(instance).union(set(self.attrs())): if attr[0] == '_': continue if attr in instance and attr not in self.av_counts: return False if attr in self.av_counts and attr not in instance: return False if attr in self.av_counts and attr in instance: if (isNumber(instance[attr]) and cv_key not in self.av_counts[attr]): return False if (isNumber(instance[attr]) and cv_key in self.av_counts[attr]): if (len(self.av_counts[attr]) != 1 or self.av_counts[attr][cv_key].num != self.count): return False if (not self.av_counts[attr][cv_key].unbiased_std() == 0.0): return False if (not self.av_counts[attr][cv_key].unbiased_mean() == instance[attr]): return False elif not instance[attr] in self.av_counts[attr]: return False elif not self.av_counts[attr][instance[attr]] == self.count: return False return True
def probability(self, attr, val): """ Returns the probability of a particular attribute value at the current concept. This takes into account the possibilities that an attribute can take any of the values available at the root, or be missing. For numerical attributes it returns the integral of the product of two gaussians. One gaussian has :math:`\\mu = val` and :math:`\\sigma = \\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}` (where :math:`\\sigma_{noise}` is from :meth:`Cobweb3Node.expected_correct_guesses <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` and ensures the probability or expected correct guesses never exceeds 1). The second gaussian has the mean ad std values from the current concept with additional gaussian noise (independent and normally distributed noise with :math:`\\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}`). The integral of this gaussian product is another gaussian with :math:`\\mu` equal to the concept attribut mean and :math:`\\sigma = \\sqrt{\\sigma_{attr}^2 + 2 * \\sigma_{noise}^2}` or, slightly simplified, :math:`\\sigma = \\sqrt{\\sigma_{attr}^2 + 2 * \\frac{1}{2 * \\pi}}`. :param attr: an attribute of an instance :type attr: :ref:`Attribute<attributes>` :param val: a value for the given attribute :type val: :ref:`Value<values>` :return: The probability of attr having the value val in the current concept. :rtype: float """ if val is None: c = 0.0 if attr in self.av_counts: c = sum([self.av_counts[attr][v].num if v == cv_key else self.av_counts[attr][v] for v in self.av_counts[attr]]) return (self.count - c) / self.count if attr in self.av_counts and isNumber(val): if cv_key not in self.av_counts[attr]: return 0.0 prob_attr = self.av_counts[attr][cv_key].num / self.count if self.tree is not None and self.tree.scaling: inner_attr = self.tree.get_inner_attr(attr) scale = ((1/self.tree.scaling) * self.tree.attr_scales[inner_attr].unbiased_std()) if scale == 0: scale = 1 shift = self.tree.attr_scales[inner_attr].mean val = (val - shift) / scale else: scale = 1.0 shift = 0.0 mean = (self.av_counts[attr][cv_key].mean - shift) / scale ostd = self.av_counts[attr][cv_key].scaled_unbiased_std(scale) std = sqrt(ostd * ostd + (1 / (2 * pi))) p = (prob_attr * (1/(sqrt(2*pi) * std)) * exp(-((val - mean) * (val - mean)) / (2.0 * std * std))) return p if attr in self.av_counts and val in self.av_counts[attr]: return self.av_counts[attr][val] / self.count return 0.0
def probability(self, attr, val): """ Returns the probability of a particular attribute value at the current concept. This takes into account the possibilities that an attribute can take any of the values available at the root, or be missing. For numerical attributes it returns the integral of the product of two gaussians. One gaussian has :math:`\\mu = val` and :math:`\\sigma = \\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}` (where :math:`\\sigma_{noise}` is from :meth:`Cobweb3Node.expected_correct_guesses <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` and ensures the probability or expected correct guesses never exceeds 1). The second gaussian has the mean ad std values from the current concept with additional gaussian noise (independent and normally distributed noise with :math:`\\sigma_{noise} = \\frac{1}{2 * \\sqrt{\\pi}}`). The integral of this gaussian product is another gaussian with :math:`\\mu` equal to the concept attribut mean and :math:`\\sigma = \\sqrt{\\sigma_{attr}^2 + 2 * \\sigma_{noise}^2}` or, slightly simplified, :math:`\\sigma = \\sqrt{\\sigma_{attr}^2 + 2 * \\frac{1}{2 * \\pi}}`. :param attr: an attribute of an instance :type attr: :ref:`Attribute<attributes>` :param val: a value for the given attribute :type val: :ref:`Value<values>` :return: The probability of attr having the value val in the current concept. :rtype: float """ if val is None: c = 0.0 if attr in self.av_counts: c = sum([self.av_counts[attr][v].num if v == cv_key else self.av_counts[attr][v] for v in self.av_counts[attr]]) return (self.count - c) / self.count if attr in self.av_counts and isNumber(val): if cv_key not in self.av_counts[attr]: return 0.0 prob_attr = self.av_counts[attr][cv_key].num / self.count if self.tree is not None and self.tree.scaling: inner_attr = self.tree.get_inner_attr(attr) scale = ((1/self.tree.scaling) * self.tree.attr_scales[inner_attr].unbiased_std()) if scale == 0: scale = 1 shift = self.tree.attr_scales[inner_attr].mean val = (val - shift) / scale else: scale = 1.0 shift = 0.0 mean = (self.av_counts[attr][cv_key].mean - shift) / scale ostd = self.av_counts[attr][cv_key].scaled_unbiased_std(scale) std = sqrt(ostd * ostd + (1 / (2 * pi))) p = (prob_attr * (1/(sqrt(2*pi) * std)) * exp(-((val - mean) * (val - mean)) / (2.0 * std * std))) return p if attr in self.av_counts and val in self.av_counts[attr]: return self.av_counts[attr][val] / self.count return 0.0
def probability(self, attr, val): """ Returns the probability of a particular attribute value at the current concept. This takes into account the possibilities that an attribute can take any of the values available at the root, or be missing. For numerical attributes the probability of val given a gaussian distribution is returned. This distribution is defined by the mean and std of past values stored in the concept. However like :meth:`Cobweb3Node.expected_correct_guesses <concept_formation.cobweb3.Cobweb3Node.expected_correct_guesses>` it adds :math:`\\frac{1}{2 * \\sqrt{\\pi}}` to the estimated std (i.e, assumes some independent, normally distributed noise). :param attr: an attribute of an instance :type attr: :ref:`Attribute<attributes>` :param val: a value for the given attribute :type val: :ref:`Value<values>` :return: The probability of attr having the value val in the current concept. :rtype: float """ if val is None: c = 0.0 if attr in self.av_counts: c = sum([ self.av_counts[attr][v].num if v == cv_key else self.av_counts[attr][v] for v in self.av_counts[attr] ]) return (self.count - c) / self.count if isNumber(val): if cv_key not in self.av_counts[attr]: return 0.0 prob_attr = self.av_counts[attr][cv_key].num / self.count if self.tree is not None and self.tree.scaling: inner_attr = self.tree.get_inner_attr(attr) scale = ((1 / self.tree.scaling) * self.tree.attr_scales[inner_attr].unbiased_std()) if scale == 0: scale = 1 shift = self.tree.attr_scales[inner_attr].mean val = (val - shift) / scale else: scale = 1.0 shift = 0.0 mean = (self.av_counts[attr][cv_key].mean - shift) / scale std = sqrt( self.av_counts[attr][cv_key].scaled_unbiased_std(scale) * self.av_counts[attr][cv_key].scaled_unbiased_std(scale) + (1 / (4 * pi))) p = (prob_attr * (1 / (sqrt(2 * pi) * std)) * exp(-((val - mean) * (val - mean)) / (2.0 * std * std))) return p if attr in self.av_counts and val in self.av_counts[attr]: return self.av_counts[attr][val] / self.count return 0.0