def train(self, data, labels):
     num_features = len(data[0])
     k = math.floor(len(labels)/float(self.num))
     p = math.ceil(np.sqrt(num_features))
     
     for i in range(self.num):
         indices_to_delete = np.random.permutation(num_features)[p:]
         tree = DTree(self.depth, self.impurity, self.segmentor)
         tree.train(np.delete(data[int(i*k):int((i+1)*k)], [], 1), labels[int(i*k):int((i+1)*k)])
         if not isinstance(tree.root, LeafNode):
             pass
         self.trees.append(tree)
Example #2
0
    def train(self, data, labels):
        def sample(data, labels):
            n = data.shape[0]
            samples = self.samples if self.samples else (n / 2)
            i = np.random.choice(n, samples, replace=False)
            return data[i], labels[i]

        for index in range(self.nTrees):
            tree = DTree(self.impurity,
                         self.segmentor,
                         self.depth,
                         self.randomness,
                         name=index)
            sData, sLabel = sample(data, labels)
            tree.train(sData, sLabel)
            self.forest.append(tree)
Example #3
0
def create_trees(bags, data, label):

    N = len(bags)
    trees = []
    for k in range(N):
        #b = pd.DataFrame(bags[k])
        #print b.columns
        t = DTree.get_tree(bags[k], label)
        trees.append(t)
        
    return trees
Example #4
0
    print('\t1. Drvo odlucivanja')
    print('\t2. K najblizih suseda')
    print('\t3. Gausova raspodela')
    print('\t4. Neuronske mreze')
    while True:
        metoda = int(input(''))
        if metoda in (1, 2, 3, 4):
            break
        else:
            print('Neispravna opcija')
            print('Odabrali ste metodu pod rednim brojem ' + str(metoda))
            print('--------------------------------------------------------')

    if metoda == 1:
        print('Odabrali ste analizu metodom drveta odlučivanja')
        DTree.DTree(data)
    elif metoda == 2:
        print('Odabrali ste analizu metodom k najbližih suseda')
        KNeighbors.KNeighboors(data)
    elif metoda == 3:
        print('Odabrali ste analizu Gausovom metodom')
        Gaussian.Gaussian(data)
    elif metoda == 4:
        print('Odabrali ste analizu metodom neuronskih mreža')
        MLP.MLP(data)
    else:
        print('Neispravna opcija')

    metoda = input('Zelite li da isprobate drugu metodu? (y/n)')
    if metoda == 'n':
        break
Example #5
0
X = data['training_data']
Y = data['training_labels'].T.ravel()
randomIndex = np.random.choice(TRAIN_SIZE, TRAIN_SIZE, replace=False)

# Split Data
xTrain = X[randomIndex[:-VALIDATION_SIZE]]
yTrain = Y[randomIndex[:-VALIDATION_SIZE]]
xValidate = X[randomIndex[-VALIDATION_SIZE:]]
yValidate = Y[randomIndex[-VALIDATION_SIZE:]]
xTest = data['test_data']

segmentor = Segmentor()

print "============= Decision Tree =========="
tree = DTree(Impurity.impurity, segmentor, depth=20)
tree.train(xTrain, yTrain)
labels = tree.predict(xValidate)

counts = np.bincount(tree.predict(xTrain) == yTrain)
error = 1.0 - (counts[True] / float(counts[True] + counts[False]))
print "Training Error: %f" % (error)

counts = np.bincount(labels == yValidate)
error = 1.0 - (counts[True] / float(counts[True] + counts[False]))
print "Validation Error: %f" % (error)

#import pdb; pdb.set_trace()

print "========== Random Forest =========="
forest = RForest(Impurity.impurity, segmentor, nTrees=30, randomness=10)