Example #1
0
    def like_parent_like_child(self, classifier, node):
        """

        :param classifier:
        :param node:
        :return:

        Attribute: return the attribute that this child should model based on their parent
        """
        while node is not None:

            parent_entropy = node.data_set.entropy(classifier=classifier)
            if parent_entropy[0] != 1:
                # there is an unequal amount of positive and negative value
                # choose the most dominant value for the attribute
                return Attribute(parent_entropy[1], 'end')
            else:
                # the data set is completely random
                # meaning that there are equal amounts of positive classifications and negative
                # classifications
                # move to the next parent
                return self.like_parent_like_child(classifier=classifier,
                                                   node=node.parent)

        else:
            # finishes the loop correctly
            # at the parent node
            # SUSPICIOUS
            print(
                'error: finished the loop and there is no parent with a dominant value'
            )
            return Attribute(classifier.values.sort()[0], 'end')
Example #2
0
    def like_parent_like_child(classifier, node):
        """
        Use on a node that must rely on its parent for a classification
        A function that keeps looking at its parents until the entropy of its parents is not
        one

        Then use the classification of that parent's entropy to figure out the child's entropy

        :param classifier: (Attribute) the attribute for which to classify one's examples
        :param node: (Node) the node child that one wants to decide its classification
        :return:
        Attribute: return the attribute that this child should model based on their parent
        """
        parent = node
        while parent is not None:

            parent_entropy = parent.data_set.entropy(classifier=classifier)
            if parent_entropy[0] != 1:
                # there is an unequal amount of positive and negative value
                # choose the most dominant value for the attribute
                return Attribute(parent_entropy[1], 'end')
            else:
                # the data set is completely random
                # meaning that there are equal amounts of positive classifications and negative
                # classifications
                # move to the next parent
                parent = parent.parent

        else:
            # finishes the loop correctly
            # at the parent node
            # Should not reach this area unless there is a perfect split in the examples
            # which should not happen if you have good data
            print 'error: finished the loop and there is no parent with a dominant value'
            return Attribute(classifier.values.sort()[0], 'end')
def test_get_attr_by_inherit():
    attr = Attribute.get_attr(SomeClassSub, UnUsed)
    assert attr is None
    attr = Attribute.get_attr(SomeClassSub, UnUsed, inherit=True)
    assert attr is None
    attr = Attribute.get_attr(SomeClassSub, Data)
    assert attr is None
    attr = Attribute.get_attr(SomeClassSub, Data, inherit=True)
    assert attr is not None
Example #4
0
def test_method_get_attrs():
    for method in (SomeClass.method, SomeClass().method):
        attrs = Attribute.get_attrs(method)
        attr, = attrs
        assert attr.args == ('method', )

    for method in (SomeClassSub.method, SomeClassSub().method):
        attrs = Attribute.get_attrs(method)
        assert not attrs
Example #5
0
def test_methods_not_inherit():
    for method in (
            SomeClassSub.method,
            SomeClassSub().method,
            SomeClassSub.cmethod,
            SomeClassSub().cmethod,
            SomeClassSub.smethod,
            SomeClassSub().smethod,
    ):
        assert not Attribute.get_attrs(method)
        assert not Attribute.get_attrs(method, inherit=True)
Example #6
0
    def on_enter_attributes(self, text, content):
        self.parser = attributes_parser
        if isinstance(content, int):
            if text is not None and len(text):
                attribute = Attribute.create_from_paragraph(
                    content, self._paragraphs[content])
                if attribute is not None:
                    self.attributes.add(attribute)

                last = self.attributes[-1] if len(self.attributes) else None
                if last is None:
                    raise KeyError(
                        'Unable to decode initial attribute: class_id: {}'.
                        format(self))

                last.parse_attribute_settings_from_text(
                    content, self._paragraphs[content])
        elif isinstance(content, Table):
            last = self.attributes[-1] if len(self.attributes) else None
            if last is None:
                raise KeyError(
                    'Unable to decode initial attribute: class_id: {}'.format(
                        self))

            last.parse_attribute_settings_from_text(content, None)
def plot_histogram(attribute: Attribute) -> None:
    values = attribute.get_filtered_values()

    bins = int(1 + 3.22 * log(len(values)))

    pyplot.xlabel(attribute.name)
    pyplot.hist(values, bins=bins)
    pyplot.show()
def test_get_attrs_order():
    @Data(2)
    @Data(1)
    class Kls:
        pass

    attrs = Attribute.get_attrs(Kls)
    assert tuple(a.args[0] for a in attrs) == (2, 1)
Example #9
0
    def parse(self, reader, pool):
        self.access_flags = reader.read_short()
        self.name_index = reader.read_short()
        self.name = pool.get_value(self.name_index)
        self.descriptor_index = reader.read_short()
        self.descriptor = pool.get_value(self.descriptor_index)

        for attr in Attribute.parse_attributes(reader, pool):
            self.attributes.append(attr)
def test_get_inherited_attr():
    class Parent(Attribute):
        pass
    class Current(Parent):
        pass
    class Sub(Current):
        pass

    @Current()
    class SomeClassScoped:
        pass

    attr = Attribute.get_attr(SomeClassScoped, Parent)
    assert attr is not None
    attr = Attribute.get_attr(SomeClassScoped, Current)
    assert attr is not None
    attr = Attribute.get_attr(SomeClassScoped, Sub)
    assert attr is None
def test_get_attrs_order_by_inherit():
    @Data(3)
    class C1:
        pass

    @Data(2)
    class C2(C1):
        pass

    @Data(1)
    class C3(C2):
        pass

    attrs = Attribute.get_attrs(C3, inherit=True)
    assert tuple(a.args[0] for a in attrs) == (1, 2, 3)
Example #12
0
    def __init__(self, fullName, type, comment = "", range="", namespace="", label = "", categoryPath = "", parentFullName = "" ):
	#begin
	Attribute.__init__(self)
	temp = fullName.split("#")
	if len(temp)>0:
	    tName = temp[-1:][0]
	    if len(temp)>1:
		self.Namespace = temp[0:-1][0] + "#"
	    else:
		self.Namespace = namespace
	else:
	    tName =  fullName
	    self.Namespace = namespace
	if len(namespace)>0:
	    self.Namespace = namespace
	self.FullName = fullName
	self.Name = tName
	self.DataType = type
	self.Description = comment
	self.PrintValue = label if len(label)>0 else tName.replace("_", " ") if tName else u"";
	self.Range = range
	self.ParentFullName = parentFullName
	#override namespace by application configs
	self.CategoryPath = (categoryPath if isinstance(categoryPath, (list)) else [categoryPath]) if len(categoryPath)>0 else ([configs.application_configs["owlNamespaceCategories"][self.Namespace] if self.Namespace in configs.application_configs["owlNamespaceCategories"] else  self.Namespace] if len(self.Namespace)>0 else [])
Example #13
0
    def parse_class(self):
        clazz = JavaClass()
        self.reader.load_class(self.file)

        if self.reader.read_int() != 0xCAFEBABE:
            raise Exception('Not a valid Java class file')

        clazz.version['minor'] = self.reader.read_short()
        clazz.version['major'] = self.reader.read_short()

        pool = self.parse_constant_pool(clazz)
        clazz.pool = pool
        clazz.access_flags = pool.get_value(self.reader.read_short())
        clazz.class_name = pool.get_value(self.reader.read_short())
        clazz.superclass_name = pool.get_value(self.reader.read_short())
        clazz.interfaces = self.parse_interface_table(clazz)
        clazz.fields = self.parse_fields(clazz, pool)
        clazz.methods = self.parse_methods(clazz, pool)
        clazz.attributes = Attribute.parse_attributes(self.reader, pool)

        return clazz
Example #14
0
def test_static_method_get_attrs():
    for method in (SomeClass.smethod, SomeClass().smethod):
        attrs = Attribute.get_attrs(method)
        attr, = attrs
        assert attr.args == ('smethod', )
Example #15
0
def test_class_method_get_attrs():
    for method in (SomeClass.cmethod, SomeClass().cmethod):
        attrs = Attribute.get_attrs(method)
        attr, = attrs
        assert attr.args == ('cmethod', )
Example #16
0
import numpy as np

p = '/vagrant/imgs/training_data/training_data/aligned'
d = os.listdir(p)

def _parse_function(filename):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image_resized = tf.image.resize_images(image_decoded, [95, 95])
    return image_resized

EPOCHS = 10
BATCH_SIZE = 16

filenames = [os.path.join(p, img_path) for img_path in d[:5]]
a = Attribute()
labels = np.array([a.get_attributes_list(img_path) for img_path in d[:5]])
# labels = labels.reshape(labels[0], labels[1], -1, -1)
print (labels.shape)
# labels = tf.constant(l)

features = [_parse_function(img_path) for img_path in d[:5]]
print([feature.shape for feature in features])
dataset = tf.data.Dataset.from_tensor_slices((features, labels)).repeat().batch(BATCH_SIZE)

# dataset = dataset.map(_parse_function)
iterator = dataset.make_one_shot_iterator()
x, y = iterator.get_next()

net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
def test_has_attr():
    assert not Attribute.has_attr(SomeClass, UnUsed)
    assert Attribute.has_attr(SomeClass, Data)
def test_get_attr():
    attr = Attribute.get_attr(SomeClass, UnUsed)
    assert attr is None
    attr = Attribute.get_attr(SomeClass, Data)
    assert attr.args == (1, 2)
def test_get_attrs_by_inherit():
    attrs = Attribute.get_attrs(SomeClassSub)
    assert len(attrs) == 0
    attrs = Attribute.get_attrs(SomeClassSub, inherit=True)
    assert len(attrs) == 1
def test_get_attrs():
    attrs = Attribute.get_attrs(SomeClass)
    attr, = attrs
    assert attr.args == (1, 2)
def test_attr_target():
    attr = Attribute.get_attr(SomeClassSub, Data, inherit=True)
    assert attr.target is SomeClassSub
Example #22
0
    def id3(self, root, target_attribute, attrs, debug=False):
        """
        Recursively build a decision tree that learns how to classify a given type of data
        with a training set of data.

        :param root: (Node) the current node that the algorithm is classifying
        :param target_attribute: (Attribute) the trait of the data that we would like to classify by
        :param attrs: (Attributes) The Attributes that are related to this node's classification, excluding any
                                    Attributes that have been used higher up the hierarchy
        :param debug: (boolean) Enables or disables debugging output
        :return: void
        """
        # pass in root
        # do a general check based on entropy
        if root.data_set.entropy(classifier=target_attribute)[0] == 0:
            value = root.data_set.all_examples[0].get_value(target_attribute)
            root.attribute = Attribute(value, 'end')
            return

        # there are attributes to split upon
        # decide the split based on gain
        if len(attrs) > 0:
            # START: BEST ATTRIBUTE
            best_attributes = list()

            # find the best attribute
            for attr in attrs:
                # iterate through each value in the attribute
                gain = root.data_set.gain(target_attribute, attr, debug)

                if len(best_attributes) == 0:
                    best_attributes.append((attr, gain))
                elif best_attributes[0][1] == gain:
                    best_attributes.append((attr, gain))
                elif best_attributes[0][1] < gain:
                    best_attributes = [(attr, gain)]

            # organize alphabetically
            # "Also, if there is a tie in entropy reduction between multiple attributes, you should choose the
            # attribute
            # whose name is earlier in the alphabet (using Python's native string comparison)
            def name(elem):
                return elem[0].name

            # sort based on name
            best_attributes.sort(key=name)
            if debug is True:
                print()
                print('best attributes: ')
                for attr in best_attributes:
                    print(attr[0].name, " ", end=' ')
                print()

            # BUILD CHILDREN
            # create the attribute for this node
            root.attribute = best_attributes[0][0]
            root.attribute.values.sort()  # alphabetically sort values

            # END: BEST ATTRIBUTES
            if debug is True:
                print("best attribute: ", root.attribute.name)
                input('...')

            # ADD CHILDREN
            for value in root.attribute.values:
                example_set = [
                    x for x in root.data_set.all_examples
                    if x.get_value(root.attribute) == value
                ]

                # examples to work with
                # make new node to pass down
                next_node = Node(data=dataset.DataSet(),
                                 parent=root,
                                 children=list(),
                                 attribute=None)

                attributes = copy.copy(attrs)
                attributes.remove(root.attribute)

                # CASE: RUN OUT OF EXAMPLES
                if len(example_set) == 0:
                    if debug is True:
                        print('warning: out of examples')
                    # choose the most prevalent example from the population that falls into the parent's domain
                    parent = root
                    next_node.attribute = self.like_parent_like_child(
                        classifier=target_attribute, node=parent)

                    # no need to delve any more into next node
                    root.children.append((value, next_node))
                    continue

                # make a dataset with all the value-specific information and store in next node
                next_node.data_set.all_examples = example_set
                # update the children of the node by recursing through
                self.id3(root=next_node,
                         target_attribute=target_attribute,
                         attrs=attributes,
                         debug=debug)
                root.children.append((value, next_node))
        else:
            # RUN OUT OF FEATURES
            # no attributes
            if debug is True:
                print('warning: out of features')

            num_pos = root.data_set.partial_count(target_attribute)
            num_neg = len(root.data_set) - num_pos
            tie = num_pos == num_neg

            if tie:
                # this is what we do in the event of a tie:
                parent = root
                root.attribute = self.like_parent_like_child(
                    classifier=target_attribute, node=parent)
            else:
                # in the event of NOT a tie
                dominant_value = root.data_set.entropy(
                    classifier=target_attribute)[1]
                root.attribute = Attribute(dominant_value, 'end')