Beispiel #1
0
    def infer_missing(self, instance, choice_fn=weighted_choice):
        """
        Given a tree and an instance, returns a new instance with missing
        atributes-values inferred using the given choice_fn.

        :param instance: an instance to be completed.
        :type instance: {a1: v1, a2: v2, ...}
        :param choice_fn: A function for deciding which attribute/value to
            chose. The default is: concept_formation.utils.weighted_choice. The
            other option is: concept_formation.utils.most_likely_choice.
        :type choice_fn: a python function
        :type instance: {a1: v1, a2: v2, ...}
        :return: A completed instance
        :rtype: instance
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        concept = self._cobweb_categorize(temp_instance)

        for attr in concept.av_counts:
            if attr in temp_instance:
                continue

            missing_prob = concept.get_probability_missing(attr)
            attr_choices = ((None, missing_prob), (attr, 1 - missing_prob))
            if choice_fn(attr_choices) == attr:

                if isinstance(concept.av_counts[attr], ContinuousValue):
                    temp_instance[attr] = concept.av_counts[attr].unbiased_mean()
                else:
                    temp_instance[attr] = choice_fn(concept.get_weighted_values(attr))

        return structure_mapper.undo_transform(temp_instance)
Beispiel #2
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance.

    The instance should not contain the attribute, but if it does then a
    shallow copy is created that does not have the attribute.
    """
    if attr in instance:
        instance = {a:instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        #temp_instance = flatten_json(instance)
        #mapping = flat_match(concept, temp_instance)
        #temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) 
                 for sub_attr in temp_instance 
                 if search('^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.get_probability(attr, val)
Beispiel #3
0
    def _trestle_categorize(self, instance):
        """
        The structure maps the instance, categorizes the matched instance, and
        returns the resulting Cobweb3Node.

        :param instance: an instance to be categorized into the tree.
        :type instance: {a1:v1, a2:v2, ...}
        :return: A concept describing the instance
        :rtype: Cobweb3Node
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        return self._cobweb_categorize(temp_instance)
Beispiel #4
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [
            concept.probability(sub_attr, temp_instance[sub_attr])
            for sub_attr in temp_instance
            if search(r'^' + mapping[attr], sub_attr)
        ]
        return mean(probs)
    else:
        return concept.probability(attr, val)
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.probability(sub_attr, temp_instance[sub_attr])
                 for sub_attr in temp_instance
                 if search(r'^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.probability(attr, val)
Beispiel #6
0
    def ifit(self, instance, do_mapping=False):
        """
        Just maintain a set of counts at the root and use these for prediction.

        The structure_map parameter determines whether or not to do structure
        mapping. This is disabled by default to get a really naive model.

        **This process modifies the tree's knoweldge.** For a non-modifying
        version see: :meth:`DummyTree.categorize`.

        :param instance: an instance to be categorized into the tree.
        :type instance: :ref:`Instance<instance-rep>`
        :param do_mapping: a flag for whether or not to do structure mapping.
        :type do_mapping: bool
        :return: the root node of the tree containing everything ever added to
            it.
        :rtype: Cobweb3Node
        """
        if do_mapping:
            pipeline = Pipeline(SubComponentProcessor(), Flattener(),
                                StructureMapper(self.root, gensym=self.gensym))
        else:
            pipeline = Pipeline(SubComponentProcessor(), Flattener())
        temp_instance = pipeline.transform(instance)
        self.root.increment_counts(temp_instance)
        return self.root
Beispiel #7
0
    def ifit(self, instance, do_mapping=False):
        """
        Just maintain a set of counts at the root and use these for prediction.

        The structure_map parameter determines whether or not to do structure
        mapping. This is disabled by default to get a really naive model.
        """
        if do_mapping:
            structure_mapper = StructureMapper(self.root)
            temp_instance = structure_mapper.transform(instance)
        else:
            pipeline = Pipeline(Tuplizer(), ListProcessor(),
                                 NameStandardizer(),
                                 SubComponentProcessor(), Flattener())
            temp_instance = pipeline.transform(instance)
        self.root.increment_counts(temp_instance)
        return self.root
Beispiel #8
0
    def trestle(self, instance):
        """
        The core trestle algorithm used in fitting and categorization.

        This function is similar to :meth:`Cobweb.cobweb
        <concept_formation.cobweb.CobwebTree.cobweb>` The key difference
        between trestle and cobweb is that trestle performs structure mapping
        (see: :meth:`structure_map
        <concept_formation.structure_mapper.structure_map>`) before proceeding
        through the normal cobweb algorithm.

        :param instance: an instance to be categorized into the tree.
        :type instance: {a1:v1, a2:v2, ...}
        :return: A concept describing the instance
        :rtype: CobwebNode
        """
        structure_mapper = StructureMapper(self.root)
        temp_instance = structure_mapper.transform(instance)
        return self.cobweb(temp_instance)
    def _trestle_categorize(self, instance):
        """
        The structure maps the instance, categorizes the matched instance, and
        returns the resulting concept.

        :param instance: an instance to be categorized into the tree.
        :type instance: {a1:v1, a2:v2, ...}
        :return: A concept describing the instance
        :rtype: concept
        """
        preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(),
                                 SubComponentProcessor(),
                                 StructureMapper(self.root))
        temp_instance = preprocessing.transform(instance)
        self._sanity_check_instance(temp_instance)
        return self._cobweb_categorize(temp_instance)
    def trestle(self, instance):
        """
        The core trestle algorithm used in fitting and categorization.

        This function is similar to :meth:`Cobweb.cobweb
        <concept_formation.cobweb.CobwebTree.cobweb>` The key difference
        between trestle and cobweb is that trestle performs structure mapping
        (see: :meth:`structure_map
        <concept_formation.structure_mapper.StructureMapper.transform>`) before
        proceeding through the normal cobweb algorithm.

        :param instance: an instance to be categorized into the tree.
        :type instance: :ref:`Instance<instance-rep>`
        :return: A concept describing the instance
        :rtype: CobwebNode
        """
        preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(),
                                 SubComponentProcessor(),
                                 StructureMapper(self.root))
        temp_instance = preprocessing.transform(instance)
        self._sanity_check_instance(temp_instance)
        return self.cobweb(temp_instance)
    def infer_missing(self,
                      instance,
                      choice_fn="most likely",
                      allow_none=True):
        """
        Given a tree and an instance, returns a new instance with attribute
        values picked using the specified choice function (either "most likely"
        or "sampled").

        .. todo:: write some kind of test for this.

        :param instance: an instance to be completed.
        :type instance: :ref:`Instance<instance-rep>`
        :param choice_fn: a string specifying the choice function to use,
            either "most likely" or "sampled".
        :type choice_fn: a string
        :param allow_none: whether attributes not in the instance can be
            inferred to be missing. If False, then all attributes will be
            inferred with some value.
        :type allow_none: Boolean
        :return: A completed instance
        :rtype: instance
        """
        preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(),
                                 SubComponentProcessor(),
                                 StructureMapper(self.root))

        temp_instance = preprocessing.transform(instance)
        concept = self._cobweb_categorize(temp_instance)

        for attr in concept.attrs('all'):
            if attr in temp_instance:
                continue
            val = concept.predict(attr, choice_fn, allow_none)
            if val is not None:
                temp_instance[attr] = val

        temp_instance = preprocessing.undo_transform(temp_instance)
        return temp_instance
    def ifit(self, t, x, y):
        # print("IFIT T", t)
        # if y == 0:
        #     return

        x = {
            a: x[a]
            for a in x
            if (isinstance(a, tuple) and a[0] not in self.remove_attrs) or (
                not isinstance(a, tuple) and a not in self.remove_attrs)
        }

        # x = {a: x[a] for a in x if self.is_structural_feature(a, x[a])}
        # x = {a: x[a] for a in x}

        # eles = set([field for field in t])
        # prior_count = 0
        # while len(eles) - prior_count > 0:
        #     prior_count = len(eles)
        #     for a in x:
        #         if isinstance(a, tuple) and a[0] == 'haselement':
        #             if a[2] in eles:
        #                 eles.add(a[1])
        #             # if self.matches(eles, a):
        #             #     names = get_attribute_components(a)
        #             #     eles.update(names)

        # x = {a: x[a] for a in x
        #      if self.matches(eles, a)}

        # foa_mapping = {field: 'foa%s' % j for j, field in enumerate(t)}
        foa_mapping = {}
        for j, field in enumerate(t):
            if field not in foa_mapping:
                foa_mapping[field] = 'foa%s' % j

        # for j,field in enumerate(t):
        #     x[('foa%s' % j, field)] = True
        x = rename_flat(x, foa_mapping)
        # pprint(x)

        # print("adding:")

        ns = NameStandardizer()
        sm = StructureMapper(self.concept)
        x = sm.transform(ns.transform(x))
        # pprint(x)
        self.concept.increment_counts(x)

        if y == 1:
            self.pos_concept.increment_counts(x)
        else:
            self.neg_concept.increment_counts(x)

        # print()
        # print('POSITIVE')
        # pprint(self.pos_concept.av_counts)
        # print('NEGATIVE')
        # pprint(self.neg_concept.av_counts)

        # pprint(self.concept.av_counts)

        pos_instance = {}
        pos_args = set()
        for attr in self.pos_concept.av_counts:
            attr_count = 0
            for val in self.pos_concept.av_counts[attr]:
                attr_count += self.pos_concept.av_counts[attr][val]
            if attr_count == self.pos_concept.count:
                if len(self.pos_concept.av_counts[attr]) == 1:
                    args = get_vars(attr)
                    pos_args.update(args)
                    pos_instance[attr] = val
                else:
                    args = get_vars(attr)
                    val_gensym = value_gensym()
                    args.append(val_gensym)
                    pos_instance[attr] = val_gensym

            # if len(self.pos_concept.av_counts[attr]) == 1:
            #     for val in self.pos_concept.av_counts[attr]:
            #         if ((self.pos_concept.av_counts[attr][val] ==
            #              self.pos_concept.count)):
            #             args = get_vars(attr)
            #             pos_args.update(args)
            #             pos_instance[attr] = val

        # print('POS ARGS', pos_args)

        neg_instance = {}
        for attr in self.neg_concept.av_counts:
            # print("ATTR", attr)
            args = set(get_vars(attr))
            if not args.issubset(pos_args):
                continue

            for val in self.neg_concept.av_counts[attr]:
                # print("VAL", val)
                if ((attr not in self.pos_concept.av_counts
                     or val not in self.pos_concept.av_counts[attr])):
                    neg_instance[attr] = val

        foa_mapping = {'foa%s' % j: '?foa%s' % j for j in range(len(t))}
        pos_instance = rename_flat(pos_instance, foa_mapping)
        neg_instance = rename_flat(neg_instance, foa_mapping)

        conditions = ([(a, pos_instance[a])
                       for a in pos_instance] + [('not', (a, neg_instance[a]))
                                                 for a in neg_instance])

        # print("========CONDITIONS======")
        # pprint(conditions)
        # print("========CONDITIONS======")

        self.target_types = ['?foa%s' % i for i in range(len(t))]
        self.operator = Operator(tuple(['Rule'] + self.target_types),
                                 conditions, [])