def like_parent_like_child(self, classifier, node): """ :param classifier: :param node: :return: Attribute: return the attribute that this child should model based on their parent """ while node is not None: parent_entropy = node.data_set.entropy(classifier=classifier) if parent_entropy[0] != 1: # there is an unequal amount of positive and negative value # choose the most dominant value for the attribute return Attribute(parent_entropy[1], 'end') else: # the data set is completely random # meaning that there are equal amounts of positive classifications and negative # classifications # move to the next parent return self.like_parent_like_child(classifier=classifier, node=node.parent) else: # finishes the loop correctly # at the parent node # SUSPICIOUS print( 'error: finished the loop and there is no parent with a dominant value' ) return Attribute(classifier.values.sort()[0], 'end')
def like_parent_like_child(classifier, node): """ Use on a node that must rely on its parent for a classification A function that keeps looking at its parents until the entropy of its parents is not one Then use the classification of that parent's entropy to figure out the child's entropy :param classifier: (Attribute) the attribute for which to classify one's examples :param node: (Node) the node child that one wants to decide its classification :return: Attribute: return the attribute that this child should model based on their parent """ parent = node while parent is not None: parent_entropy = parent.data_set.entropy(classifier=classifier) if parent_entropy[0] != 1: # there is an unequal amount of positive and negative value # choose the most dominant value for the attribute return Attribute(parent_entropy[1], 'end') else: # the data set is completely random # meaning that there are equal amounts of positive classifications and negative # classifications # move to the next parent parent = parent.parent else: # finishes the loop correctly # at the parent node # Should not reach this area unless there is a perfect split in the examples # which should not happen if you have good data print 'error: finished the loop and there is no parent with a dominant value' return Attribute(classifier.values.sort()[0], 'end')
def test_get_attr_by_inherit(): attr = Attribute.get_attr(SomeClassSub, UnUsed) assert attr is None attr = Attribute.get_attr(SomeClassSub, UnUsed, inherit=True) assert attr is None attr = Attribute.get_attr(SomeClassSub, Data) assert attr is None attr = Attribute.get_attr(SomeClassSub, Data, inherit=True) assert attr is not None
def test_method_get_attrs(): for method in (SomeClass.method, SomeClass().method): attrs = Attribute.get_attrs(method) attr, = attrs assert attr.args == ('method', ) for method in (SomeClassSub.method, SomeClassSub().method): attrs = Attribute.get_attrs(method) assert not attrs
def test_methods_not_inherit(): for method in ( SomeClassSub.method, SomeClassSub().method, SomeClassSub.cmethod, SomeClassSub().cmethod, SomeClassSub.smethod, SomeClassSub().smethod, ): assert not Attribute.get_attrs(method) assert not Attribute.get_attrs(method, inherit=True)
def on_enter_attributes(self, text, content): self.parser = attributes_parser if isinstance(content, int): if text is not None and len(text): attribute = Attribute.create_from_paragraph( content, self._paragraphs[content]) if attribute is not None: self.attributes.add(attribute) last = self.attributes[-1] if len(self.attributes) else None if last is None: raise KeyError( 'Unable to decode initial attribute: class_id: {}'. format(self)) last.parse_attribute_settings_from_text( content, self._paragraphs[content]) elif isinstance(content, Table): last = self.attributes[-1] if len(self.attributes) else None if last is None: raise KeyError( 'Unable to decode initial attribute: class_id: {}'.format( self)) last.parse_attribute_settings_from_text(content, None)
def plot_histogram(attribute: Attribute) -> None: values = attribute.get_filtered_values() bins = int(1 + 3.22 * log(len(values))) pyplot.xlabel(attribute.name) pyplot.hist(values, bins=bins) pyplot.show()
def test_get_attrs_order(): @Data(2) @Data(1) class Kls: pass attrs = Attribute.get_attrs(Kls) assert tuple(a.args[0] for a in attrs) == (2, 1)
def parse(self, reader, pool): self.access_flags = reader.read_short() self.name_index = reader.read_short() self.name = pool.get_value(self.name_index) self.descriptor_index = reader.read_short() self.descriptor = pool.get_value(self.descriptor_index) for attr in Attribute.parse_attributes(reader, pool): self.attributes.append(attr)
def test_get_inherited_attr(): class Parent(Attribute): pass class Current(Parent): pass class Sub(Current): pass @Current() class SomeClassScoped: pass attr = Attribute.get_attr(SomeClassScoped, Parent) assert attr is not None attr = Attribute.get_attr(SomeClassScoped, Current) assert attr is not None attr = Attribute.get_attr(SomeClassScoped, Sub) assert attr is None
def test_get_attrs_order_by_inherit(): @Data(3) class C1: pass @Data(2) class C2(C1): pass @Data(1) class C3(C2): pass attrs = Attribute.get_attrs(C3, inherit=True) assert tuple(a.args[0] for a in attrs) == (1, 2, 3)
def __init__(self, fullName, type, comment = "", range="", namespace="", label = "", categoryPath = "", parentFullName = "" ): #begin Attribute.__init__(self) temp = fullName.split("#") if len(temp)>0: tName = temp[-1:][0] if len(temp)>1: self.Namespace = temp[0:-1][0] + "#" else: self.Namespace = namespace else: tName = fullName self.Namespace = namespace if len(namespace)>0: self.Namespace = namespace self.FullName = fullName self.Name = tName self.DataType = type self.Description = comment self.PrintValue = label if len(label)>0 else tName.replace("_", " ") if tName else u""; self.Range = range self.ParentFullName = parentFullName #override namespace by application configs self.CategoryPath = (categoryPath if isinstance(categoryPath, (list)) else [categoryPath]) if len(categoryPath)>0 else ([configs.application_configs["owlNamespaceCategories"][self.Namespace] if self.Namespace in configs.application_configs["owlNamespaceCategories"] else self.Namespace] if len(self.Namespace)>0 else [])
def parse_class(self): clazz = JavaClass() self.reader.load_class(self.file) if self.reader.read_int() != 0xCAFEBABE: raise Exception('Not a valid Java class file') clazz.version['minor'] = self.reader.read_short() clazz.version['major'] = self.reader.read_short() pool = self.parse_constant_pool(clazz) clazz.pool = pool clazz.access_flags = pool.get_value(self.reader.read_short()) clazz.class_name = pool.get_value(self.reader.read_short()) clazz.superclass_name = pool.get_value(self.reader.read_short()) clazz.interfaces = self.parse_interface_table(clazz) clazz.fields = self.parse_fields(clazz, pool) clazz.methods = self.parse_methods(clazz, pool) clazz.attributes = Attribute.parse_attributes(self.reader, pool) return clazz
def test_static_method_get_attrs(): for method in (SomeClass.smethod, SomeClass().smethod): attrs = Attribute.get_attrs(method) attr, = attrs assert attr.args == ('smethod', )
def test_class_method_get_attrs(): for method in (SomeClass.cmethod, SomeClass().cmethod): attrs = Attribute.get_attrs(method) attr, = attrs assert attr.args == ('cmethod', )
import numpy as np p = '/vagrant/imgs/training_data/training_data/aligned' d = os.listdir(p) def _parse_function(filename): image_string = tf.read_file(filename) image_decoded = tf.image.decode_png(image_string, channels=3) image_resized = tf.image.resize_images(image_decoded, [95, 95]) return image_resized EPOCHS = 10 BATCH_SIZE = 16 filenames = [os.path.join(p, img_path) for img_path in d[:5]] a = Attribute() labels = np.array([a.get_attributes_list(img_path) for img_path in d[:5]]) # labels = labels.reshape(labels[0], labels[1], -1, -1) print (labels.shape) # labels = tf.constant(l) features = [_parse_function(img_path) for img_path in d[:5]] print([feature.shape for feature in features]) dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(BATCH_SIZE) # dataset = dataset.map(_parse_function) iterator = dataset.make_one_shot_iterator() x, y = iterator.get_next() net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input net = tf.layers.dense(net, 8, activation=tf.tanh)
def test_has_attr(): assert not Attribute.has_attr(SomeClass, UnUsed) assert Attribute.has_attr(SomeClass, Data)
def test_get_attr(): attr = Attribute.get_attr(SomeClass, UnUsed) assert attr is None attr = Attribute.get_attr(SomeClass, Data) assert attr.args == (1, 2)
def test_get_attrs_by_inherit(): attrs = Attribute.get_attrs(SomeClassSub) assert len(attrs) == 0 attrs = Attribute.get_attrs(SomeClassSub, inherit=True) assert len(attrs) == 1
def test_get_attrs(): attrs = Attribute.get_attrs(SomeClass) attr, = attrs assert attr.args == (1, 2)
def test_attr_target(): attr = Attribute.get_attr(SomeClassSub, Data, inherit=True) assert attr.target is SomeClassSub
def id3(self, root, target_attribute, attrs, debug=False): """ Recursively build a decision tree that learns how to classify a given type of data with a training set of data. :param root: (Node) the current node that the algorithm is classifying :param target_attribute: (Attribute) the trait of the data that we would like to classify by :param attrs: (Attributes) The Attributes that are related to this node's classification, excluding any Attributes that have been used higher up the hierarchy :param debug: (boolean) Enables or disables debugging output :return: void """ # pass in root # do a general check based on entropy if root.data_set.entropy(classifier=target_attribute)[0] == 0: value = root.data_set.all_examples[0].get_value(target_attribute) root.attribute = Attribute(value, 'end') return # there are attributes to split upon # decide the split based on gain if len(attrs) > 0: # START: BEST ATTRIBUTE best_attributes = list() # find the best attribute for attr in attrs: # iterate through each value in the attribute gain = root.data_set.gain(target_attribute, attr, debug) if len(best_attributes) == 0: best_attributes.append((attr, gain)) elif best_attributes[0][1] == gain: best_attributes.append((attr, gain)) elif best_attributes[0][1] < gain: best_attributes = [(attr, gain)] # organize alphabetically # "Also, if there is a tie in entropy reduction between multiple attributes, you should choose the # attribute # whose name is earlier in the alphabet (using Python's native string comparison) def name(elem): return elem[0].name # sort based on name best_attributes.sort(key=name) if debug is True: print() print('best attributes: ') for attr in best_attributes: print(attr[0].name, " ", end=' ') print() # BUILD CHILDREN # create the attribute for this node root.attribute = best_attributes[0][0] root.attribute.values.sort() # alphabetically sort values # END: BEST ATTRIBUTES if debug is True: print("best attribute: ", root.attribute.name) input('...') # ADD CHILDREN for value in root.attribute.values: example_set = [ x for x in root.data_set.all_examples if x.get_value(root.attribute) == value ] # examples to work with # make new node to pass down next_node = Node(data=dataset.DataSet(), parent=root, children=list(), attribute=None) attributes = copy.copy(attrs) attributes.remove(root.attribute) # CASE: RUN OUT OF EXAMPLES if len(example_set) == 0: if debug is True: print('warning: out of examples') # choose the most prevalent example from the population that falls into the parent's domain parent = root next_node.attribute = self.like_parent_like_child( classifier=target_attribute, node=parent) # no need to delve any more into next node root.children.append((value, next_node)) continue # make a dataset with all the value-specific information and store in next node next_node.data_set.all_examples = example_set # update the children of the node by recursing through self.id3(root=next_node, target_attribute=target_attribute, attrs=attributes, debug=debug) root.children.append((value, next_node)) else: # RUN OUT OF FEATURES # no attributes if debug is True: print('warning: out of features') num_pos = root.data_set.partial_count(target_attribute) num_neg = len(root.data_set) - num_pos tie = num_pos == num_neg if tie: # this is what we do in the event of a tie: parent = root root.attribute = self.like_parent_like_child( classifier=target_attribute, node=parent) else: # in the event of NOT a tie dominant_value = root.data_set.entropy( classifier=target_attribute)[1] root.attribute = Attribute(dominant_value, 'end')