def infer_missing(self, instance, choice_fn=weighted_choice): """ Given a tree and an instance, returns a new instance with missing atributes-values inferred using the given choice_fn. :param instance: an instance to be completed. :type instance: {a1: v1, a2: v2, ...} :param choice_fn: A function for deciding which attribute/value to chose. The default is: concept_formation.utils.weighted_choice. The other option is: concept_formation.utils.most_likely_choice. :type choice_fn: a python function :type instance: {a1: v1, a2: v2, ...} :return: A completed instance :rtype: instance """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) concept = self._cobweb_categorize(temp_instance) for attr in concept.av_counts: if attr in temp_instance: continue missing_prob = concept.get_probability_missing(attr) attr_choices = ((None, missing_prob), (attr, 1 - missing_prob)) if choice_fn(attr_choices) == attr: if isinstance(concept.av_counts[attr], ContinuousValue): temp_instance[attr] = concept.av_counts[attr].unbiased_mean() else: temp_instance[attr] = choice_fn(concept.get_weighted_values(attr)) return structure_mapper.undo_transform(temp_instance)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. The instance should not contain the attribute, but if it does then a shallow copy is created that does not have the attribute. """ if attr in instance: instance = {a:instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() #temp_instance = flatten_json(instance) #mapping = flat_match(concept, temp_instance) #temp_instance = rename_flat(temp_instance, mapping) probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search('^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.get_probability(attr, val)
def _trestle_categorize(self, instance): """ The structure maps the instance, categorizes the matched instance, and returns the resulting Cobweb3Node. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: Cobweb3Node """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) return self._cobweb_categorize(temp_instance)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [ concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr) ] return mean(probs) else: return concept.probability(attr, val)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.probability(attr, val)
def ifit(self, instance, do_mapping=False): """ Just maintain a set of counts at the root and use these for prediction. The structure_map parameter determines whether or not to do structure mapping. This is disabled by default to get a really naive model. **This process modifies the tree's knoweldge.** For a non-modifying version see: :meth:`DummyTree.categorize`. :param instance: an instance to be categorized into the tree. :type instance: :ref:`Instance<instance-rep>` :param do_mapping: a flag for whether or not to do structure mapping. :type do_mapping: bool :return: the root node of the tree containing everything ever added to it. :rtype: Cobweb3Node """ if do_mapping: pipeline = Pipeline(SubComponentProcessor(), Flattener(), StructureMapper(self.root, gensym=self.gensym)) else: pipeline = Pipeline(SubComponentProcessor(), Flattener()) temp_instance = pipeline.transform(instance) self.root.increment_counts(temp_instance) return self.root
def ifit(self, instance, do_mapping=False): """ Just maintain a set of counts at the root and use these for prediction. The structure_map parameter determines whether or not to do structure mapping. This is disabled by default to get a really naive model. """ if do_mapping: structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) else: pipeline = Pipeline(Tuplizer(), ListProcessor(), NameStandardizer(), SubComponentProcessor(), Flattener()) temp_instance = pipeline.transform(instance) self.root.increment_counts(temp_instance) return self.root
def trestle(self, instance): """ The core trestle algorithm used in fitting and categorization. This function is similar to :meth:`Cobweb.cobweb <concept_formation.cobweb.CobwebTree.cobweb>` The key difference between trestle and cobweb is that trestle performs structure mapping (see: :meth:`structure_map <concept_formation.structure_mapper.structure_map>`) before proceeding through the normal cobweb algorithm. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: CobwebNode """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) return self.cobweb(temp_instance)
def _trestle_categorize(self, instance): """ The structure maps the instance, categorizes the matched instance, and returns the resulting concept. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: concept """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) self._sanity_check_instance(temp_instance) return self._cobweb_categorize(temp_instance)
def trestle(self, instance): """ The core trestle algorithm used in fitting and categorization. This function is similar to :meth:`Cobweb.cobweb <concept_formation.cobweb.CobwebTree.cobweb>` The key difference between trestle and cobweb is that trestle performs structure mapping (see: :meth:`structure_map <concept_formation.structure_mapper.StructureMapper.transform>`) before proceeding through the normal cobweb algorithm. :param instance: an instance to be categorized into the tree. :type instance: :ref:`Instance<instance-rep>` :return: A concept describing the instance :rtype: CobwebNode """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) self._sanity_check_instance(temp_instance) return self.cobweb(temp_instance)
def infer_missing(self, instance, choice_fn="most likely", allow_none=True): """ Given a tree and an instance, returns a new instance with attribute values picked using the specified choice function (either "most likely" or "sampled"). .. todo:: write some kind of test for this. :param instance: an instance to be completed. :type instance: :ref:`Instance<instance-rep>` :param choice_fn: a string specifying the choice function to use, either "most likely" or "sampled". :type choice_fn: a string :param allow_none: whether attributes not in the instance can be inferred to be missing. If False, then all attributes will be inferred with some value. :type allow_none: Boolean :return: A completed instance :rtype: instance """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) concept = self._cobweb_categorize(temp_instance) for attr in concept.attrs('all'): if attr in temp_instance: continue val = concept.predict(attr, choice_fn, allow_none) if val is not None: temp_instance[attr] = val temp_instance = preprocessing.undo_transform(temp_instance) return temp_instance
def ifit(self, t, x, y): # print("IFIT T", t) # if y == 0: # return x = { a: x[a] for a in x if (isinstance(a, tuple) and a[0] not in self.remove_attrs) or ( not isinstance(a, tuple) and a not in self.remove_attrs) } # x = {a: x[a] for a in x if self.is_structural_feature(a, x[a])} # x = {a: x[a] for a in x} # eles = set([field for field in t]) # prior_count = 0 # while len(eles) - prior_count > 0: # prior_count = len(eles) # for a in x: # if isinstance(a, tuple) and a[0] == 'haselement': # if a[2] in eles: # eles.add(a[1]) # # if self.matches(eles, a): # # names = get_attribute_components(a) # # eles.update(names) # x = {a: x[a] for a in x # if self.matches(eles, a)} # foa_mapping = {field: 'foa%s' % j for j, field in enumerate(t)} foa_mapping = {} for j, field in enumerate(t): if field not in foa_mapping: foa_mapping[field] = 'foa%s' % j # for j,field in enumerate(t): # x[('foa%s' % j, field)] = True x = rename_flat(x, foa_mapping) # pprint(x) # print("adding:") ns = NameStandardizer() sm = StructureMapper(self.concept) x = sm.transform(ns.transform(x)) # pprint(x) self.concept.increment_counts(x) if y == 1: self.pos_concept.increment_counts(x) else: self.neg_concept.increment_counts(x) # print() # print('POSITIVE') # pprint(self.pos_concept.av_counts) # print('NEGATIVE') # pprint(self.neg_concept.av_counts) # pprint(self.concept.av_counts) pos_instance = {} pos_args = set() for attr in self.pos_concept.av_counts: attr_count = 0 for val in self.pos_concept.av_counts[attr]: attr_count += self.pos_concept.av_counts[attr][val] if attr_count == self.pos_concept.count: if len(self.pos_concept.av_counts[attr]) == 1: args = get_vars(attr) pos_args.update(args) pos_instance[attr] = val else: args = get_vars(attr) val_gensym = value_gensym() args.append(val_gensym) pos_instance[attr] = val_gensym # if len(self.pos_concept.av_counts[attr]) == 1: # for val in self.pos_concept.av_counts[attr]: # if ((self.pos_concept.av_counts[attr][val] == # self.pos_concept.count)): # args = get_vars(attr) # pos_args.update(args) # pos_instance[attr] = val # print('POS ARGS', pos_args) neg_instance = {} for attr in self.neg_concept.av_counts: # print("ATTR", attr) args = set(get_vars(attr)) if not args.issubset(pos_args): continue for val in self.neg_concept.av_counts[attr]: # print("VAL", val) if ((attr not in self.pos_concept.av_counts or val not in self.pos_concept.av_counts[attr])): neg_instance[attr] = val foa_mapping = {'foa%s' % j: '?foa%s' % j for j in range(len(t))} pos_instance = rename_flat(pos_instance, foa_mapping) neg_instance = rename_flat(neg_instance, foa_mapping) conditions = ([(a, pos_instance[a]) for a in pos_instance] + [('not', (a, neg_instance[a])) for a in neg_instance]) # print("========CONDITIONS======") # pprint(conditions) # print("========CONDITIONS======") self.target_types = ['?foa%s' % i for i in range(len(t))] self.operator = Operator(tuple(['Rule'] + self.target_types), conditions, [])