def ifit(self, instance, do_mapping=False): """ Just maintain a set of counts at the root and use these for prediction. The structure_map parameter determines whether or not to do structure mapping. This is disabled by default to get a really naive model. **This process modifies the tree's knoweldge.** For a non-modifying version see: :meth:`DummyTree.categorize`. :param instance: an instance to be categorized into the tree. :type instance: :ref:`Instance<instance-rep>` :param do_mapping: a flag for whether or not to do structure mapping. :type do_mapping: bool :return: the root node of the tree containing everything ever added to it. :rtype: Cobweb3Node """ if do_mapping: pipeline = Pipeline(SubComponentProcessor(), Flattener(), StructureMapper(self.root, gensym=self.gensym)) else: pipeline = Pipeline(SubComponentProcessor(), Flattener()) temp_instance = pipeline.transform(instance) self.root.increment_counts(temp_instance) return self.root
def _trestle_categorize(self, instance): """ The structure maps the instance, categorizes the matched instance, and returns the resulting concept. :param instance: an instance to be categorized into the tree. :type instance: {a1:v1, a2:v2, ...} :return: A concept describing the instance :rtype: concept """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) self._sanity_check_instance(temp_instance) return self._cobweb_categorize(temp_instance)
def __init__(self, concept, pipeline=None): self.concept = concept self.reverse_mapping = None if pipeline is None: self.pipeline = Pipeline(Tuplizer(), NameStandardizer(), SubComponentProcessor(), Flattener()) else : self.pipeline = pipeline
def trestle(self, instance): """ The core trestle algorithm used in fitting and categorization. This function is similar to :meth:`Cobweb.cobweb <concept_formation.cobweb.CobwebTree.cobweb>` The key difference between trestle and cobweb is that trestle performs structure mapping (see: :meth:`structure_map <concept_formation.structure_mapper.StructureMapper.transform>`) before proceeding through the normal cobweb algorithm. :param instance: an instance to be categorized into the tree. :type instance: :ref:`Instance<instance-rep>` :return: A concept describing the instance :rtype: CobwebNode """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) self._sanity_check_instance(temp_instance) return self.cobweb(temp_instance)
def infer_missing(self, instance, choice_fn="most likely", allow_none=True): """ Given a tree and an instance, returns a new instance with attribute values picked using the specified choice function (either "most likely" or "sampled"). .. todo:: write some kind of test for this. :param instance: an instance to be completed. :type instance: :ref:`Instance<instance-rep>` :param choice_fn: a string specifying the choice function to use, either "most likely" or "sampled". :type choice_fn: a string :param allow_none: whether attributes not in the instance can be inferred to be missing. If False, then all attributes will be inferred with some value. :type allow_none: Boolean :return: A completed instance :rtype: instance """ preprocessing = Pipeline(NameStandardizer(self.gensym), Flattener(), SubComponentProcessor(), StructureMapper(self.root)) temp_instance = preprocessing.transform(instance) concept = self._cobweb_categorize(temp_instance) for attr in concept.attrs('all'): if attr in temp_instance: continue val = concept.predict(attr, choice_fn, allow_none) if val is not None: temp_instance[attr] = val temp_instance = preprocessing.undo_transform(temp_instance) return temp_instance
class StructureMapper(Preprocessor): """ Flatten the instance, perform structure mapping to the concept, rename the instance based on this structure mapping, and return the renamed instance. :param concept: A concept to structure map the instance to :type concept: TrestleNode :param instance: An instance to map to the concept :type instance: :ref:`raw instance <raw-instance>` :return: A fully mapped and flattend copy of the instance :rtype: :ref:`mapped instance <fully-mapped>` """ def __init__(self, concept, pipeline=None): self.concept = concept self.reverse_mapping = None if pipeline is None: self.pipeline = Pipeline(Tuplizer(), NameStandardizer(), SubComponentProcessor(), Flattener()) else : self.pipeline = pipeline def get_mapping(self): return {self.reverse_mapping[o]: o for o in self.reverse_mapping} def transform(self, instance): instance = self.pipeline.transform(instance) mapping = flat_match(self.concept, instance) self.reverse_mapping = {mapping[o]: o for o in mapping} return rename_flat(instance, mapping) def undo_transform(self, instance): if self.reverse_mapping is None: raise Exception("Must transform before undoing transform") instance = rename_flat(instance, self.reverse_mapping) return self.pipeline.undo_transform(instance)
# setup += "from __main__ import random_instance\n" # setup += "from __main__ import test\n" # setup += "c = random_concept(1, %i)\n" % i # setup += "i = random_instance(%i)\n" % i # # for j in range(10): # print("%i\t%0.3f" % (i, timeit.timeit("test(c,i)", setup=setup, # number=10))) num_c_inst = 1 num_objs = 20 concept = random_concept(num_instances=num_c_inst, num_objects=num_objs) instance = random_instance(num_objects=num_objs) pl = Pipeline(Tuplizer(), SubComponentProcessor(), Flattener()) #i = sm.transform(pl.transform(subconcept.av_counts)) #print("STRUCTURE MAPPED INSTANCE") #print(i) pipeline = Pipeline(Tuplizer(), NameStandardizer(concept.tree.gensym), SubComponentProcessor(), Flattener()) #ns = NameStandardizer(concept.tree.gensym) #pprint(subconcept.av_counts) #instance = ns.transform(subconcept.av_counts) instance = pipeline.transform(random_instance(num_objects=num_objs)) inames = frozenset(get_component_names(instance))
# print("%i\t%0.3f" % (i, timeit.timeit("test(c,i)", setup=setup, # number=10))) num_c_inst = 1 num_objs = 20 concept = random_concept(num_instances=num_c_inst, num_objects=num_objs) instance = random_instance(num_objects=num_objs) pl = Pipeline(Tuplizer(), SubComponentProcessor(), Flattener()) # i = sm.transform(pl.transform(subconcept.av_counts)) # print("STRUCTURE MAPPED INSTANCE") # print(i) pipeline = Pipeline(Tuplizer(), NameStandardizer(concept.tree.gensym), SubComponentProcessor(), Flattener()) # ns = NameStandardizer(concept.tree.gensym) # pprint(subconcept.av_counts) # instance = ns.transform(subconcept.av_counts) instance = pipeline.transform(random_instance(num_objects=num_objs)) inames = frozenset(get_component_names(instance)) cnames = frozenset(get_component_names(concept.av_counts, True)) print("INAMES:") print(inames) print("CNAMES:") print(cnames)