Beispiel #1
0
def decision_tree_learning(data, attributes, parent_examples, i):
    if len(data) == 0:
        return plurality_value(parent_examples)
    check, classification = same_classification_check(data)
    if check:
        return classification
    if len(list(attributes)) == 0:
        return plurality_value(data)
    a = importance(data, attributes)[0][0]      # a = the random variable with the highest gain out of the data
    node = Node(a)    # each node will have a tuple with attribute of parent node and node with the next random variable
    node.set_depth(i)       # sets the depth of each node (used to print the tree)
    i += 1
    tree = Tree(node)
    index = index_of_var(a, data)
    attributes1 = get_attributes(index, data)       # attributes1 = random variables
    if attributes1 is None:         # if there are no random variables left to look at, return the plurality value
        return plurality_value(data)
    for attribute in attributes1:
        exs = get_examples_attribute(index, attribute, data)
        temp = list(attributes)
        try:
            temp.remove(a)      # used to look at the data without a given random variable
        except Exception:
            pass
        variables = tuple(temp)
        subtree = decision_tree_learning(exs, variables, data, i)
        tree.get_root().add_child(subtree.get_root(), attribute)        # adds branch to the tree
    return tree
Beispiel #2
0
 def test_search_node(self):
     depth = 2
     tournament_tree = Tree(self.g1, depth)
     tournament_tree.get_root().add_child(self.g2)
     tournament_tree.get_root().add_child(self.g3)
     self.assertEqual(tournament_tree.search_node(self.g2).get_value(), self.g2)
     self.assertEqual(tournament_tree.search_node(self.g3).get_value(), self.g3)
     self.assertEqual(tournament_tree.search_node(self.g1).get_value(), self.g1)
Beispiel #3
0
def main():
    cadeia_binaria = ""
    ## Leitura do arquivo binário
    #arq = 'compact.bin'
    try:
        # print command line arguments
        for arg in sys.argv[1:]:
            with open(arg, 'rb') as f:
                byte = (f.read(1))
                while len(byte) > 0:  # != b'':
                    #print(byte)
                    #print('{:0>8}'.format(bin(int.from_bytes(byte, byteorder=sys.byteorder))[2:]), end="")
                    cadeia_binaria += ('{:0>8}'.format(
                        bin(int.from_bytes(byte,
                                           byteorder=sys.byteorder))[2:]))
                    byte = f.read(1)

        ## Decodificação do cabeçalho e obtenção do index de onde começa o texto
        root_dec, pos = decode(cadeia_binaria)

        ## Decodificação do texto com base no cabeçalho
        tree_dec = Tree(root_dec)
        char_atual = ""
        texto_completo = ""
        root_dec = copy.copy(tree_dec.get_root())
        pos = [pos - 1]
        while (char_atual != "EOF"):
            root_dec = copy.copy(tree_dec.get_root())
            char_atual = decode_char(root_dec, pos, cadeia_binaria)
            texto_completo += char_atual

        ## Retirada do EOF(End Of File) do texto
        texto_completo = texto_completo[:-3]

        ## salvamento do texto completo em um arquivo
        salvar_arquivo_descompactado(texto_completo)
        print("Arquivo descompactado com sucesso(descompact.txt)!!!!")
    except:
        print("Não existe arquivo chamado:", arq)
Beispiel #4
0
def decode(string_cod):
    if (string_cod != ""):

        stack = []

        node = Node(None, None, 'raiz', 0)
        #new_node = (None, None, 'raiz', 'raiz')
        stack.append(node)
        tree = Tree(node)

        index = 1
        while (len(stack) != 0 and index < len(string_cod)):
            if (string_cod[index] == '0'):
                node = Node(None, None, 'n-folha', 0)
            elif (string_cod[index] == '1'):
                aux = string_cod[index + 1:index + 8]
                if (aux == '1111111'):
                    #print(aux, 'EOF')
                    node = Node(None, None, 'folha', 'EOF')
                    index += 7
                else:
                    convert = int(aux, 2)
                    #print(aux, convert, end=' ')
                    convert = convert.to_bytes((convert.bit_length() + 7) // 8,
                                               'big').decode()
                    #print(convert)
                    node = Node(None, None, 'folha', convert)
                    index += 7
            if (stack[-1].get_e() == None):
                stack[-1].set_e(node)
            else:
                stack[-1].set_d(node)
                stack.pop()
            if (type(node.get_char()) == int):
                stack.append(node)
            index += 1
    return (tree.get_root(), index)
from node import Node
from queue import Queue
from tree import Tree

tree = Tree("apple")

tree.get_root().set_left_child(Node("banana"))
tree.get_root().set_right_child(Node("cherry"))

tree.get_root().get_left_child().set_left_child(Node("dates"))
# tree.get_root().get_left_child().set_right_child(Node("elderberry"))

# tree.get_root().get_right_child().set_left_child(Node("fig"))
# tree.get_root().get_right_child().set_right_child(Node("grape"))


def bfs(tree):
    q = Queue()
    root = tree.get_root()
    q.enq(root)
    visit_order = []

    while q:
        node = q.deq()
        visit_order.append(node.value)
        if node.has_left_child():
            q.enq(node.get_left_child())
        if node.has_right_child():
            q.enq(node.get_right_child())

    print(visit_order)
Beispiel #6
0
def build_aggregate_tree(options,args,fields,dim,types,dict_dim, otfa = lambda x,y: x.get_root(),randomize=False,no_final_aggregation=False,from_gui=True):
    
    tree_src = None
    tree_dst = None
   
    list_res = []
    list_window = []
    tree = None
    f = open(options.input) 
    lines = [ l for l in f]
    f.close()
    
    
    total_count = 0.0
    current_window = 0
    lineno = 0
    for line in lines:
        print lineno
        lineno+=1
        find = re.search(options.reg_exp,line)

        if find:

            dict_fields = {}
            
            for i in range(len(fields)):
                dict_fields[fields[i]] = find.group(i+1)
            try:
                sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S"))
            except:
                sec = int(dict_fields["timestamp"])

            if sec >= current_window + options.window: 
                if tree != None:
                    list_res.append((tree,total_count))
                    tree = Tree(dim)
                    current_window = current_window + options.window
                    
                    while sec>= current_window + options.window:
                        list_res.append((None,0.0))
                        list_window.append(current_window)
                        current_window = current_window + options.window
                    
                else:
                    tree = Tree(dim)
                    current_window = sec
                total_count = 0.0
                
                list_window.append(current_window)
            
            update_tree(tree,dict_fields,dict_dim,options.type_aggr)
            total_count+= float(dict_fields[VALUE])
            
            #SKIP HERE
            if 1 == 0:
                try:    
                    update_tree(tree,dict_fields,dict_dim,options.type_aggr)
                    #print tree,dict_fields,dict_dim,options.type_aggr
                except Exception, e:
                    print e
                    raise
                    tree.set_root(tree.get_root().post_aggregate())
                    update_tree(tree,dict_fields,dict_dim,options.type_aggr)
                
                total_nodes = len(tree.get_root().preorder())
                if total_nodes > options.max_nodes:
                    
                    tree.set_root(otfa(tree,total_count))
                    tree.set_root(tree.get_root().post_aggregate())
                    
                    #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count)
                
                #print tree.get_root().preorder()
                #tree.increase_age_tree()
                
                total_count+= float(dict_fields[VALUE])
                
                map(lambda x: x.increase_age(),tree.get_root().preorder())
Beispiel #7
0
def benchmark_aggregation(options,args,fields,dim,types,dict_dim, otfa = lambda x: x.get_root()):
    
    tree_src = None
    tree_dst = None
   
    list_res = []
    list_window = []
    tree = None
    f = open(options.input) 
    
    total_count = 0.0
    current_window = 0
    tree = Tree(dim)
    k = 0
    for line in f:
        find = re.search(options.reg_exp,line)
        if find:
            dict_fields = {}
            for i in range(len(fields)):
                dict_fields[fields[i]] = find.group(i+1)
            #print dict_fields
            try:
                sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S"))
            except:
                sec = int(dict_fields["timestamp"])

            
            
            list_window.append(current_window)
            
            try:    
                update_tree(tree,dict_fields,dict_dim,options.type_aggr)
            except:
                tree.set_root(tree.get_root().post_aggregate())
                update_tree(tree,dict_fields,dict_dim,options.type_aggr)
            total_nodes = len(tree.get_root().preorder())
            if total_nodes > options.max_nodes:
                tree.set_root(otfa(tree,total_count))
                tree.set_root(tree.get_root().post_aggregate())
                #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count)
            
            #print tree.get_root().preorder()
            #tree.increase_age_tree()
            
            total_count+= float(dict_fields[VALUE])
            
            map(lambda x: x.increase_age(),tree.get_root().preorder())
        
            
        
        k=k+1
        
        if options.max_lines > 0 and options.max_lines < k :
            break    
    
    if tree.get_root() != None:
        list_res.append((tree,total_count))
    
            
    pretotal_nodes_before_aggregation = len(tree.get_root().preorder())
    tree.aggregate(options.aggregate,total_count)
       
            
    print "Total nodes before pre order aggregation %s"%pretotal_nodes_before_aggregation
    print "Total nodes after aggregation %s"%len(tree.get_root().preorder())
Beispiel #8
0
def build_stability_aggregation_trees(options,args,fields,dim,types,dict_dim, otfa = lambda x,y: x.get_root(),randomize=False,no_final_aggregation=False):
    
    #TODO: remove nof_final aggregtion parameter and replace it by the test below
    if not options.aggregate>0:
        no_final_aggregation = True
    
    tree_src = None
    tree_dst = None
   
    list_res = []
    list_window = []
    tree = None
    f = open(options.input) 
    lines = [ l for l in f]
    f.close()
    
    if randomize: random.shuffle(lines)
    
    total_count = 0.0
    current_window = 0
    
    for line in lines:
        #print line
        find = re.search(options.reg_exp,line)
        #print find
        if find:
            dict_fields = {}
            for i in range(len(fields)):
                dict_fields[fields[i]] = find.group(i+1)
            #print dict_fields
            try:
                #    print dict_fields["timestamp"]
                sec = time.mktime(time.strptime(dict_fields["timestamp"],"%Y-%m-%d %H:%M:%S"))
            except:
                sec = int(dict_fields["timestamp"])

            if sec >= current_window + options.window: 
                if tree != None:
                    list_res.append((tree,total_count))
                    tree = Tree(dim)
                    current_window = current_window + options.window
                    
                    while sec>= current_window + options.window:
                        list_res.append((None,0.0))
                        list_window.append(current_window)
                        current_window = current_window + options.window
                    
                else:
                    tree = Tree(dim)
                    current_window = sec
                total_count = 0.0
                
                list_window.append(current_window)
                
            try:    
                update_tree(tree,dict_fields,dict_dim,options.type_aggr)
            except Exception, e:
                print e
                raise
                tree.set_root(tree.get_root().post_aggregate())
                update_tree(tree,dict_fields,dict_dim,options.type_aggr)
            total_nodes = len(tree.get_root().preorder())
            if total_nodes > options.max_nodes:
                tree.set_root(otfa(tree,total_count))
                tree.set_root(tree.get_root().post_aggregate())
                #aggregate_LRU(tree,options.max_nodes,options.aggregate,total_count)
            
            #print tree.get_root().preorder()
            #tree.increase_age_tree()
            
            total_count+= float(dict_fields[VALUE])
            
            map(lambda x: x.increase_age(),tree.get_root().preorder())
def main():
    conteudo = ""
    # print command line arguments

    try:
        for arg in sys.argv[1:]:
            print(arg)
            with open(arg, 'r') as file:
                conteudo += file.read()
        tabela_inicial = {}
        for i in list(conteudo):
            tabela_inicial[i] = list(conteudo).count(i)
        tabela_inicial["EOF"] = 1
        #print(tabela_inicial)

        nodes = []
        #Criar lista de árvores com um nó
        for chave in tabela_inicial:
            nodes.append(Node(None, None, tabela_inicial[chave], chave))

        #Algoritmo de Huffman
        while len(nodes) > 1:
            menores = menores_nodes(nodes)
            node_pai = Node(menores[0], menores[1],
                            menores[0].get_apar() + menores[1].get_apar(),
                            menores[0].get_apar() + menores[1].get_apar())
            nodes.remove(menores[0])
            nodes.remove(menores[1])
            nodes.append(node_pai)

        #criação da árvore
        tree = Tree(nodes[0])
        root = tree.get_root()
        #r = tree.get_root()
        #pre_ordem(r, "")

        #Criação da tabela de codificação
        tabela_final = {}
        tabela_pre(root, [], tabela_final)
        print("Tabela de codificação: ", tabela_final)

        #Codificação do texto a partir da tabela
        cod = ""
        for i in conteudo:
            cod += tabela_final[i]
        #Adicionar sinalização de final do arquivo
        cod += tabela_final['EOF']

        #Criando cabeçalho do arquivo
        cab_e_texto = [""]
        cab(root, cab_e_texto, tabela_final)
        cab_e_texto = cab_e_texto[0]
        #print(cab_e_texto)

        #Unificando cabeçalho e o conteudo do texto já em binário
        arquivo = cab_e_texto + cod
        arquivo[:]

        salvar_arquivo(arquivo)
        print("Arquivo compactado criado com sucesso (compact.txt)!!!!!")

    except:
        print("Não existe arquivo chamado")
Beispiel #10
0
 def test_init_node_has_id_of_root(self):
     n = Node(title='foo', id='root')
     t = Tree(n)
     self.assertEqual(t.get_root().get_id(), 'root')
Beispiel #11
0
 def test_init_node_not_have_id_root(self):
     """ test init using a node who's id is not 'root'"""
     n = Node(title='foo', id=0)
     t = Tree(n)
     self.assertEqual(t.get_root().get_id(), 'root')
Beispiel #12
0
 def test_get_root(self):
     depth = 2
     tournament_tree = Tree(self.g1, depth)
     self.assertEqual(tournament_tree.get_root().get_value(), self.g1)
                # Get the trees now
                for i in range(num_trees):
                    # Get train data since train data varies for each tree
                    num_examples_to_sample = np.round(
                        0.1 * train_data.shape[0]).astype(int)
                    ind = np.random.randint(0, train_data.shape[0],
                                            num_examples_to_sample)

                    label_data_tr = np.hstack((train_label[ind].reshape(
                        (-1, 1)), train_data[ind])).astype('str')
                    label_data_tr = np.vstack((header, label_data_tr))
                    train_data_obj = Data(data=label_data_tr)

                    myTree = Tree()
                    tree_util.ID3(train_data_obj, train_data_obj.attributes,
                                  myTree.get_root(), myTree, True, depth)

                    # Prediction from each tree on all of train data and test data
                    out_tr = tree_util.prediction_label(
                        label_data_tr_full_obj, myTree)
                    out_te = tree_util.prediction_label(
                        label_data_te_obj, myTree)

                    data_tr_svm[:, i] = out_tr
                    data_te_svm[:, i] = out_te

                # Train on the best hyperparameter
                classifier = SVM(num_trees, C)

                # SVM part start training
                for e in range(epoch_cv):
Beispiel #14
0
data2 = np.loadtxt(DATA_DIR + 'test.csv', delimiter=',', dtype=str)
data_obj2 = Data(data=data2)

# Train data
data = np.loadtxt(DATA_DIR + 'train.csv', delimiter=',', dtype=str)
data_obj = Data(data=data)

print("Filling Missing Entries\n...")
# Fill the missing entries in the data
majority = util.get_majority_column_data(data_obj)
data_obj = util.fill_data(data_obj, majority, data)
data_obj2 = util.fill_data(data_obj2, majority, data2)
print("...\n...\n...\nFilled Missing Entries\n")

myTree = Tree()
util.ID3(data_obj, data_obj.attributes, myTree.get_root(), myTree, False, 2)

# print("--------------- Printing Tree --------------------")
myTree.print_tree(myTree.get_root(), 0)

# Accuracy Prediction
acc = util.prediction_accuracy(data_obj, myTree)
acc = util.prediction_accuracy(data_obj2, myTree)

# Depth Prediction
print("\nDepth of Tree = " + str(myTree.get_depth(myTree.get_root())))

################################################################################
# Q3 (2a)
################################################################################
DATA_DIR = 'data/CVfolds_new/'
Beispiel #15
0
class TestTreeMethods(unittest.TestCase):
    
    def setUp(self):
        self.n1 = Node(title='node1', id='1', parent_id='root')
        self.n2 = Node(title='node2', id='2', parent_id='1')
        self.n3 = Node(title='node3', id='3', parent_id='1')
        self.n4 = Node(title='node4', id='4', parent_id='2')
        self.n5 = Node(title='node5', id='5', parent_id='4') 
        
        # set up tree with multiple nodes
        self.t1 = Tree()
        self.t1.add(self.n1) # node1 has many children
        self.t1.add(self.n2)
        self.t1.add(self.n3)
        self.t1.add(self.n4)
        self.t1.add(self.n5)
        #print("Tree before the test:")
        #print(self.t1)
        
        # set up tree with only one node besides root
        self.n6 = Node('node6', '6', parent_id='root')
        self.one_node_tree = Tree()
        self.one_node_tree.add(self.n6)

    def tearDown(self):
        self.n1 = None
        self.n2 = None
        self.n3 = None
        self.n4 = None
        self.n5 = None
        self.n6 = None
        self.t1 = None
        self.t2 = None

    def test_get_root(self):
        self.assertEqual(self.t1.get_root().get_id(), 'root')

    def test_init_node_not_have_id_root(self):
        """ test init using a node who's id is not 'root'"""
        n = Node(title='foo', id=0)
        t = Tree(n)
        self.assertEqual(t.get_root().get_id(), 'root')

    def test_init_node_has_id_of_root(self):
        n = Node(title='foo', id='root')
        t = Tree(n)
        self.assertEqual(t.get_root().get_id(), 'root')
    
    def test_string_empty_tree(self):
        t2 = Tree(None)
        self.assertEqual(t2.__str__(), '|---Google_Drive\n')

    def test_string_non_empty_tree(self):
        print("You can't really test this...automatically")
        print(self.t1)

    def test_search_for_root(self):
        result = self.t1.search('root')
        self.assertTrue(result.get_id() == 'root')

    def test_search_for_first_node_added(self):
        result = self.t1.search('1')
        self.assertTrue(result.get_id() == '1')

    def test_search_for_nonexisting_node_in_one_node_tree(self):
        result = self.one_node_tree.search(self.n2.get_id())
        self.assertTrue(result == None)
    
    def test_new_tree_add_2_nodes_and_print_it(self):
        t = Tree()
        n = Node(title='test', id='1', parent_id='root')
        t.add(n)
        n = Node(title='test2', id='2', parent_id='1')
        t.add(n)
        print(t)

    def test_new_tree_add_2_nodes_and_search_it(self):
        t = Tree()
        n = Node(title='test', id='1', parent_id='root')
        t.add(n)
        n = Node(title='test2', id='2', parent_id='1')
        t.add(n)
        #print(t)
        result = t.search('2')
        self.assertEqual(result.get_id(), '2')

# From here down, tests are failing
    def test_search_for_nested_leaf_node(self):
        result = self.t1.search(self.n5.get_id())
        self.assertTrue('5' == result.get_id())

    def test_search_for_node1(self):
        result = self.t1.search(self.n1.get_id())
        self.assertTrue(result.get_id(), '1')

    def test_search_for_node2(self):
        result = self.t1.search(self.n2.get_id())
        self.assertTrue(result.get_id(), '2')

    def test_search_for_node3(self):
        result = self.t1.search(self.n3.get_id())
        self.assertTrue(result.get_id(), '3')

    def test_search_for_node4(self):
        result = self.t1.search(self.n4.get_id())
        self.assertTrue(result.get_id(), '4')

    def test_search_for_node5(self):
        result = self.t1.search(self.n5.get_id())
        self.assertTrue(result.get_id(), '5')

    def test_search_empty_tree(self):
        root = None
        empty_tree = Tree(root)
        result = empty_tree.search(self.n1.get_id())
        self.assertEqual(result, None)

    def test_check_that_node_was_added(self):
        n = Node('test_node', id='7', parent_id='4')
        was_added = self.t1.add(n)
        #print(self.t1)
        self.assertEqual(was_added, 1)

    def test_add_node_whose_parent_is_in_tree(self):
        """ test adding node whose parent is node4 """
        n = Node('test_node2', id='8', parent_id='4')
        was_added = self.t1.add(n) # should be 1
        #print(self.t1)
        self.assertEqual(was_added, 1)

    def test_add_node_whose_parent_is_not_in_tree(self):
        n = Node('test_node3', id='9', parent_id='0')
        was_added = self.t1.add(n) # should be -1
        self.assertEqual(was_added, -1)

    def test_add_node_whose_parent_is_none(self):
        n = Node('test_node', id='8')
        was_added = self.t1.add(n) # should be 0
        self.assertEqual(was_added, 0)
from tree import Tree
from tree import Node

myTree = Tree()
#print(myTree.get_root())

n = Node('taste',5)
p = Node('var',6)
q = Node('var',7)
r = Node('var',8)
s = Node('name',9)

myTree.add_node(n,myTree.get_root())
print("Traversing the tree after adding 1 node")
myTree.print_tree(myTree.get_root(),0)

myTree.add_node(p,myTree.search_node(myTree.get_root(),n.feature,n.value))
print("Traversing the tree after adding 2 nodes")
myTree.print_tree(myTree.get_root(),0)
myTree.add_node(q,myTree.search_node(myTree.get_root(),n.feature,n.value))
myTree.add_node(r,myTree.search_node(myTree.get_root(),q.feature,q.value))

print("Traversing the tree after adding 4 nodes")
myTree.print_tree(myTree.get_root(),0)
myTree.add_node(s,myTree.search_node(myTree.get_root(),r.feature,r.value))

"""
n.add_child(p)
n.add_child(q)
n.add_child(r)
r.add_child(s)
Beispiel #17
0
n = Node('taste')
n.add_value('o')

p = Node('var')
n.add_value('a')

q = Node('var')
n.add_value('b')

r = Node('var')
r.add_value('c')

s = Node('name')

myTree.add_node(n, myTree.get_root())
print("Traversing the tree after adding 1 node")
myTree.print_tree(myTree.get_root(), 0)

myTree.add_node(p, n)
#myTree.add_node(p,myTree.search_node(myTree.get_root(),n.feature,n.value))
print("Traversing the tree after adding 2 nodes")
myTree.print_tree(myTree.get_root(), 0)
myTree.add_node(q, n)
myTree.add_node(r, n)

print("Traversing the tree after adding 4 nodes")
myTree.print_tree(myTree.get_root(), 0)
myTree.add_node(s, r)
"""
n.add_child(p)