Beispiel #1
0
    def infer_missing(self, instance, choice_fn=weighted_choice):
        """
        Given a tree and an instance, returns a new instance with missing
        atributes-values inferred using the given choice_fn.

        :param instance: an instance to be completed.
        :type instance: {a1: v1, a2: v2, ...}
        :param choice_fn: A function for deciding which attribute/value to
            chose. The default is: concept_formation.utils.weighted_choice. The
            other option is: concept_formation.utils.most_likely_choice.
        :type choice_fn: a python function
        :type instance: {a1: v1, a2: v2, ...}
        :return: A completed instance
        :rtype: instance
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        concept = self._cobweb_categorize(temp_instance)

        for attr in concept.av_counts:
            if attr in temp_instance:
                continue

            missing_prob = concept.get_probability_missing(attr)
            attr_choices = ((None, missing_prob), (attr, 1 - missing_prob))
            if choice_fn(attr_choices) == attr:

                if isinstance(concept.av_counts[attr], ContinuousValue):
                    temp_instance[attr] = concept.av_counts[attr].unbiased_mean()
                else:
                    temp_instance[attr] = choice_fn(concept.get_weighted_values(attr))

        return structure_mapper.undo_transform(temp_instance)
Beispiel #2
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance.

    The instance should not contain the attribute, but if it does then a
    shallow copy is created that does not have the attribute.
    """
    if attr in instance:
        instance = {a:instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        #temp_instance = flatten_json(instance)
        #mapping = flat_match(concept, temp_instance)
        #temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) 
                 for sub_attr in temp_instance 
                 if search('^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.get_probability(attr, val)
Beispiel #3
0
    def _trestle_categorize(self, instance):
        """
        The structure maps the instance, categorizes the matched instance, and
        returns the resulting Cobweb3Node.

        :param instance: an instance to be categorized into the tree.
        :type instance: {a1:v1, a2:v2, ...}
        :return: A concept describing the instance
        :rtype: Cobweb3Node
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        return self._cobweb_categorize(temp_instance)
Beispiel #4
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [
            concept.probability(sub_attr, temp_instance[sub_attr])
            for sub_attr in temp_instance
            if search(r'^' + mapping[attr], sub_attr)
        ]
        return mean(probs)
    else:
        return concept.probability(attr, val)
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.probability(sub_attr, temp_instance[sub_attr])
                 for sub_attr in temp_instance
                 if search(r'^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.probability(attr, val)
Beispiel #6
0
    def ifit(self, instance, do_mapping=False):
        """
        Just maintain a set of counts at the root and use these for prediction.

        The structure_map parameter determines whether or not to do structure
        mapping. This is disabled by default to get a really naive model.
        """
        if do_mapping:
            structure_mapper = StructureMapper(self.root)
            temp_instance = structure_mapper.transform(instance)
        else:
            pipeline = Pipeline(Tuplizer(), ListProcessor(),
                                 NameStandardizer(),
                                 SubComponentProcessor(), Flattener())
            temp_instance = pipeline.transform(instance)
        self.root.increment_counts(temp_instance)
        return self.root
Beispiel #7
0
    def trestle(self, instance):
        """
        The core trestle algorithm used in fitting and categorization.

        This function is similar to :meth:`Cobweb.cobweb
        <concept_formation.cobweb.CobwebTree.cobweb>` The key difference
        between trestle and cobweb is that trestle performs structure mapping
        (see: :meth:`structure_map
        <concept_formation.structure_mapper.structure_map>`) before proceeding
        through the normal cobweb algorithm.

        :param instance: an instance to be categorized into the tree.
        :type instance: {a1:v1, a2:v2, ...}
        :return: A concept describing the instance
        :rtype: CobwebNode
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        return self.cobweb(temp_instance)
    def ifit(self, t, x, y):
        # print("IFIT T", t)
        # if y == 0:
        #     return

        x = {
            a: x[a]
            for a in x
            if (isinstance(a, tuple) and a[0] not in self.remove_attrs) or (
                not isinstance(a, tuple) and a not in self.remove_attrs)
        }

        # x = {a: x[a] for a in x if self.is_structural_feature(a, x[a])}
        # x = {a: x[a] for a in x}

        # eles = set([field for field in t])
        # prior_count = 0
        # while len(eles) - prior_count > 0:
        #     prior_count = len(eles)
        #     for a in x:
        #         if isinstance(a, tuple) and a[0] == 'haselement':
        #             if a[2] in eles:
        #                 eles.add(a[1])
        #             # if self.matches(eles, a):
        #             #     names = get_attribute_components(a)
        #             #     eles.update(names)

        # x = {a: x[a] for a in x
        #      if self.matches(eles, a)}

        # foa_mapping = {field: 'foa%s' % j for j, field in enumerate(t)}
        foa_mapping = {}
        for j, field in enumerate(t):
            if field not in foa_mapping:
                foa_mapping[field] = 'foa%s' % j

        # for j,field in enumerate(t):
        #     x[('foa%s' % j, field)] = True
        x = rename_flat(x, foa_mapping)
        # pprint(x)

        # print("adding:")

        ns = NameStandardizer()
        sm = StructureMapper(self.concept)
        x = sm.transform(ns.transform(x))
        # pprint(x)
        self.concept.increment_counts(x)

        if y == 1:
            self.pos_concept.increment_counts(x)
        else:
            self.neg_concept.increment_counts(x)

        # print()
        # print('POSITIVE')
        # pprint(self.pos_concept.av_counts)
        # print('NEGATIVE')
        # pprint(self.neg_concept.av_counts)

        # pprint(self.concept.av_counts)

        pos_instance = {}
        pos_args = set()
        for attr in self.pos_concept.av_counts:
            attr_count = 0
            for val in self.pos_concept.av_counts[attr]:
                attr_count += self.pos_concept.av_counts[attr][val]
            if attr_count == self.pos_concept.count:
                if len(self.pos_concept.av_counts[attr]) == 1:
                    args = get_vars(attr)
                    pos_args.update(args)
                    pos_instance[attr] = val
                else:
                    args = get_vars(attr)
                    val_gensym = value_gensym()
                    args.append(val_gensym)
                    pos_instance[attr] = val_gensym

            # if len(self.pos_concept.av_counts[attr]) == 1:
            #     for val in self.pos_concept.av_counts[attr]:
            #         if ((self.pos_concept.av_counts[attr][val] ==
            #              self.pos_concept.count)):
            #             args = get_vars(attr)
            #             pos_args.update(args)
            #             pos_instance[attr] = val

        # print('POS ARGS', pos_args)

        neg_instance = {}
        for attr in self.neg_concept.av_counts:
            # print("ATTR", attr)
            args = set(get_vars(attr))
            if not args.issubset(pos_args):
                continue

            for val in self.neg_concept.av_counts[attr]:
                # print("VAL", val)
                if ((attr not in self.pos_concept.av_counts
                     or val not in self.pos_concept.av_counts[attr])):
                    neg_instance[attr] = val

        foa_mapping = {'foa%s' % j: '?foa%s' % j for j in range(len(t))}
        pos_instance = rename_flat(pos_instance, foa_mapping)
        neg_instance = rename_flat(neg_instance, foa_mapping)

        conditions = ([(a, pos_instance[a])
                       for a in pos_instance] + [('not', (a, neg_instance[a]))
                                                 for a in neg_instance])

        # print("========CONDITIONS======")
        # pprint(conditions)
        # print("========CONDITIONS======")

        self.target_types = ['?foa%s' % i for i in range(len(t))]
        self.operator = Operator(tuple(['Rule'] + self.target_types),
                                 conditions, [])