def infer_missing(self, instance, choice_fn=weighted_choice): """ Given a tree and an instance, returns a new instance with missing atributes-values inferred using the given choice_fn. :param instance: an instance to be completed. :type instance: {a1: v1, a2: v2, ...} :param choice_fn: A function for deciding which attribute/value to chose. The default is: concept_formation.utils.weighted_choice. The other option is: concept_formation.utils.most_likely_choice. :type choice_fn: a python function :type instance: {a1: v1, a2: v2, ...} :return: A completed instance :rtype: instance """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) concept = self._cobweb_categorize(temp_instance) for attr in concept.av_counts: if attr in temp_instance: continue missing_prob = concept.get_probability_missing(attr) attr_choices = ((None, missing_prob), (attr, 1 - missing_prob)) if choice_fn(attr_choices) == attr: if isinstance(concept.av_counts[attr], ContinuousValue): temp_instance[attr] = concept.av_counts[attr].unbiased_mean() else: temp_instance[attr] = choice_fn(concept.get_weighted_values(attr)) return structure_mapper.undo_transform(temp_instance)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. The instance should not contain the attribute, but if it does then a shallow copy is created that does not have the attribute. """ if attr in instance: instance = {a:instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() #temp_instance = flatten_json(instance) #mapping = flat_match(concept, temp_instance) #temp_instance = rename_flat(temp_instance, mapping) probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search('^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.get_probability(attr, val)
def _trestle_categorize(self, instance): """ The structure maps the instance, categorizes the matched instance, and returns the resulting Cobweb3Node. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: Cobweb3Node """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) return self._cobweb_categorize(temp_instance)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [ concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr) ] return mean(probs) else: return concept.probability(attr, val)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.probability(attr, val)
def ifit(self, instance, do_mapping=False): """ Just maintain a set of counts at the root and use these for prediction. The structure_map parameter determines whether or not to do structure mapping. This is disabled by default to get a really naive model. """ if do_mapping: structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) else: pipeline = Pipeline(Tuplizer(), ListProcessor(), NameStandardizer(), SubComponentProcessor(), Flattener()) temp_instance = pipeline.transform(instance) self.root.increment_counts(temp_instance) return self.root
def trestle(self, instance): """ The core trestle algorithm used in fitting and categorization. This function is similar to :meth:`Cobweb.cobweb <concept_formation.cobweb.CobwebTree.cobweb>` The key difference between trestle and cobweb is that trestle performs structure mapping (see: :meth:`structure_map <concept_formation.structure_mapper.structure_map>`) before proceeding through the normal cobweb algorithm. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: CobwebNode """ structure_mapper = StructureMapper(self.root) temp_instance = structure_mapper.transform(instance) return self.cobweb(temp_instance)
def ifit(self, t, x, y): # print("IFIT T", t) # if y == 0: # return x = { a: x[a] for a in x if (isinstance(a, tuple) and a[0] not in self.remove_attrs) or ( not isinstance(a, tuple) and a not in self.remove_attrs) } # x = {a: x[a] for a in x if self.is_structural_feature(a, x[a])} # x = {a: x[a] for a in x} # eles = set([field for field in t]) # prior_count = 0 # while len(eles) - prior_count > 0: # prior_count = len(eles) # for a in x: # if isinstance(a, tuple) and a[0] == 'haselement': # if a[2] in eles: # eles.add(a[1]) # # if self.matches(eles, a): # # names = get_attribute_components(a) # # eles.update(names) # x = {a: x[a] for a in x # if self.matches(eles, a)} # foa_mapping = {field: 'foa%s' % j for j, field in enumerate(t)} foa_mapping = {} for j, field in enumerate(t): if field not in foa_mapping: foa_mapping[field] = 'foa%s' % j # for j,field in enumerate(t): # x[('foa%s' % j, field)] = True x = rename_flat(x, foa_mapping) # pprint(x) # print("adding:") ns = NameStandardizer() sm = StructureMapper(self.concept) x = sm.transform(ns.transform(x)) # pprint(x) self.concept.increment_counts(x) if y == 1: self.pos_concept.increment_counts(x) else: self.neg_concept.increment_counts(x) # print() # print('POSITIVE') # pprint(self.pos_concept.av_counts) # print('NEGATIVE') # pprint(self.neg_concept.av_counts) # pprint(self.concept.av_counts) pos_instance = {} pos_args = set() for attr in self.pos_concept.av_counts: attr_count = 0 for val in self.pos_concept.av_counts[attr]: attr_count += self.pos_concept.av_counts[attr][val] if attr_count == self.pos_concept.count: if len(self.pos_concept.av_counts[attr]) == 1: args = get_vars(attr) pos_args.update(args) pos_instance[attr] = val else: args = get_vars(attr) val_gensym = value_gensym() args.append(val_gensym) pos_instance[attr] = val_gensym # if len(self.pos_concept.av_counts[attr]) == 1: # for val in self.pos_concept.av_counts[attr]: # if ((self.pos_concept.av_counts[attr][val] == # self.pos_concept.count)): # args = get_vars(attr) # pos_args.update(args) # pos_instance[attr] = val # print('POS ARGS', pos_args) neg_instance = {} for attr in self.neg_concept.av_counts: # print("ATTR", attr) args = set(get_vars(attr)) if not args.issubset(pos_args): continue for val in self.neg_concept.av_counts[attr]: # print("VAL", val) if ((attr not in self.pos_concept.av_counts or val not in self.pos_concept.av_counts[attr])): neg_instance[attr] = val foa_mapping = {'foa%s' % j: '?foa%s' % j for j in range(len(t))} pos_instance = rename_flat(pos_instance, foa_mapping) neg_instance = rename_flat(neg_instance, foa_mapping) conditions = ([(a, pos_instance[a]) for a in pos_instance] + [('not', (a, neg_instance[a])) for a in neg_instance]) # print("========CONDITIONS======") # pprint(conditions) # print("========CONDITIONS======") self.target_types = ['?foa%s' % i for i in range(len(t))] self.operator = Operator(tuple(['Rule'] + self.target_types), conditions, [])