Example #1
0
    def train(self):
        """
        To train the model using decision tree algorithm.
        :return:
        """
        entries = self.train_file
        features = set(entries[0].features.keys())

        self.tree = tree.make_tree(entries, features, [], MAX_DEPTH)

        file = open(self.out_file, "wb")
        pickle.dump(self, file)
        file.close()
Example #2
0
def search(query):
    print query[:-1]
    Tree = tree.make_tree(query[:-1])

    t_goto = 0
    t_ev = 0
    res = []
    last_id = -1
    Tree.goto(last_id)
    last_id = Tree.evaluate()
    while last_id != -1:
        res.append(last_id)
        last_id += 1
        Tree.goto(last_id)
        last_id = Tree.evaluate()
    print len(res)

    with open('urls.txt', 'r') as f:
        lines = f.readlines()
        for r in res:
            print lines[r][:-1]
    def train(self, ensemble_size=SIZE):
        """
        To train the model using Adaboost algorithm.
        :param ensemble_size: the size of the stumps
        :return:
        """
        entries = self.train_file
        features = set(entries[0].features.keys())
        weights = Weights(entries)
        self.ensemble = []

        # create and store each stump
        for i in range(ensemble_size):
            stump = tree.make_tree(entries, features, [], 1)
            error = 0

            for entry in entries:
                decision = stump.decide_classification(entry)
                if decision != entry.target:
                    error += entry.weight

            for j in range(len(entries)):
                entry = entries[j]
                decision = stump.decide_classification(entry)
                if decision == entry.target:
                    new_weight = entry.weight * error / (weights.total - error)
                    weights.update_weight(j, new_weight)

            weights.normalization()
            stump.weight = math.log(weights.total - error) / error
            self.ensemble.append(stump)

        # store the model to a binary file
        file = open(self.out_file, "wb")
        pickle.dump(self, file)
        file.close()
Example #4
0
def index():
    treeData = make_tree(level=1000)
    return render_template('index.html', treeData=treeData)
from monitor import FolderMonitor
from tree import make_tree

path = 'D:/temp/dev'
some = FolderMonitor(path)
some.run()
# while True:
#     intakes = input()
#     if intakes == 'go':
#         some.start()
#     if intakes == 'stop':
#         some.stop()

make_tree(path)
Example #6
0
training_data = pandas.read_csv(training_data_file_path)
validation_data = pandas.read_csv(vaildation_data_file_path)
testing_data = pandas.read_csv(testing_data_file_path)
column_list = training_data.columns.values
attr = column_list[:-1]
classname = column_list[-1]
instance_classes = utilities.getInstanceClasses(training_data, classname)
instances = utilities.getInstances(training_data)
validation_instances = utilities.getInstances(validation_data)
validation_column_list = validation_data.columns.values
validation_attr = column_list[:-1]
testing_instances = utilities.getInstances(testing_data)
testing_column_list = testing_data.columns.values
testing_attr = column_list[:-1]
node_label = 1
parent = tree.make_tree(instances, instance_classes, attr, training_data,
                        node_label)
print('Decision Tree : ')
tree.printTree(parent, 0)
#Pre Prune Accuracy
print('-------------------')
print('Pre-Pruned Accuracy')
print('-------------------')
print('Number if Training instances = ', len(instances))
print('Number if Training attributes = ', len(attr))
print('Total number of nodes in the tree = ', tree.countNodes(parent))
print('Number of leaf nodes in the tree = ', tree.countPureNodes(parent))
print('Accuracy of the model on the training dataset : ',
      round(utilities.getAccuracy(parent, training_data) * 100, 2), '%')
print('')
print('Number if Validation instances = ', len(validation_instances))
print('Number if Validation attributes = ', len(validation_attr))
Example #7
0
    def __init__(self, nleaves, nbreakpoints, G, mappings=[]):
        self.nleaves = nleaves
        self.nbreakpoints = nbreakpoints
        self.G = G
        epoch_sizes = self.G.getEpochSizes()
        self.all_sizes = []
        for e in xrange(len(nbreakpoints)):
            self.all_sizes.extend([epoch_sizes[e]] * nbreakpoints[e])
        # Build a list of all paths through the SCC graph
        paths = []
        for S in G.all_paths():
            paths.append(S[:])

        epoch_sizes = G.getEpochSizes()
        component_index = dict()
        components = [(0,)]
        for e,esize in enumerate(epoch_sizes):
            for c in xrange(len(G.G[e].V)):
                component = array(G.all_states(e,c))
                component_idx = len(components)
                components.append(component)
                component_index[(e,c)] = component_idx

        self.components_flat = zeros(sum(self.all_sizes)+1, dtype=int32)
        component_starts = []
        component_ends = []
        offset = 0
        for c in components:
            a = offset
            b = offset + len(c)
            offset = b
            assert self.components_flat[a:b].sum() == 0
            assert 0 < b <= len(self.components_flat)
            self.components_flat[a:b] = array(c, dtype=int32)
            component_starts.append(a)
            component_ends.append(b)
        assert all(component_ends[i] == component_starts[i+1] for i in range(len(component_ends)-1))

        # Build all distributions of the paths over our intervals
        paths_final = []
        tree_map = {}
        paths_indices = []
        npaths = 0
        for s in enumerate_all_transitions(paths, nbreakpoints):
            # FIXME: instead of removing the first component in the path,
            # we shouldn't have it there to begin with...
            s = s[1:]
            cpath = (0,)+tuple(component_index[(e,p)] for e,p in s)
            path_as_offsets = []
            for ci in cpath:
                path_as_offsets.append(component_starts[ci])
                path_as_offsets.append(component_ends[ci])
            path_as_offsets = array(path_as_offsets, dtype=int32)
            paths_final.extend(path_as_offsets)
            npaths += 1

            ta = make_tree(G, s, 0)
            tb = make_tree(G, s, 1)
            a = tree_map.setdefault(ta, len(tree_map))
            b = tree_map.setdefault(tb, len(tree_map))
            paths_indices.append(a)
            paths_indices.append(b)

        self.tree_map = tree_map
        self.ntrees = len(tree_map)
        self.paths_final_indices = array(paths_indices, dtype=int32)
        self.paths_final = array(paths_final, dtype=int32)
        self.npaths = npaths
        self.mappings = mappings
Example #8
0
    def __init__(self, nleaves, nbreakpoints, G, mappings=[]):
        self.nleaves = nleaves
        self.nbreakpoints = nbreakpoints
        self.G = G
        epoch_sizes = self.G.getEpochSizes()
        self.all_sizes = []
        for e in xrange(len(nbreakpoints)):
            self.all_sizes.extend([epoch_sizes[e]] * nbreakpoints[e])
        # Build a list of all paths through the SCC graph
        paths = []
        for S in G.all_paths():
            paths.append(S[:])

        epoch_sizes = G.getEpochSizes()
        component_index = dict()
        components = [(0, )]
        for e, esize in enumerate(epoch_sizes):
            for c in xrange(len(G.G[e].V)):
                component = array(G.all_states(e, c))
                component_idx = len(components)
                components.append(component)
                component_index[(e, c)] = component_idx

        self.components_flat = zeros(sum(self.all_sizes) + 1, dtype=int32)
        component_starts = []
        component_ends = []
        offset = 0
        for c in components:
            a = offset
            b = offset + len(c)
            offset = b
            assert self.components_flat[a:b].sum() == 0
            assert 0 < b <= len(self.components_flat)
            self.components_flat[a:b] = array(c, dtype=int32)
            component_starts.append(a)
            component_ends.append(b)
        assert all(component_ends[i] == component_starts[i + 1]
                   for i in range(len(component_ends) - 1))

        # Build all distributions of the paths over our intervals
        paths_final = []
        tree_map = {}
        paths_indices = []
        npaths = 0
        for s in enumerate_all_transitions(paths, nbreakpoints):
            # FIXME: instead of removing the first component in the path,
            # we shouldn't have it there to begin with...
            s = s[1:]
            cpath = (0, ) + tuple(component_index[(e, p)] for e, p in s)
            path_as_offsets = []
            for ci in cpath:
                path_as_offsets.append(component_starts[ci])
                path_as_offsets.append(component_ends[ci])
            path_as_offsets = array(path_as_offsets, dtype=int32)
            paths_final.extend(path_as_offsets)
            npaths += 1

            ta = make_tree(G, s, 0)
            tb = make_tree(G, s, 1)
            a = tree_map.setdefault(ta, len(tree_map))
            b = tree_map.setdefault(tb, len(tree_map))
            paths_indices.append(a)
            paths_indices.append(b)

        self.tree_map = tree_map
        self.ntrees = len(tree_map)
        self.paths_final_indices = array(paths_indices, dtype=int32)
        self.paths_final = array(paths_final, dtype=int32)
        self.npaths = npaths
        self.mappings = mappings