예제 #1
0
 def fit(self, X, y):
     self.trees = []
     for m in range(self.num_trees):
         print("Fitting tree %02d/%d..." % (m+1,self.num_trees))
         tree = RandomTree(max_depth = self.max_depth)
         tree.fit(X,y)
         self.trees.append(tree)
예제 #2
0
 def fit(self, X, y):
     # Fit each tree
     treeList = []
     for i in range(self.num_trees):
         tree = RandomTree(self.max_depth)
         tree.fit(X, y)
         treeList.append(tree)
     self.treeList = treeList
예제 #3
0
    def fit(self, X, y):

        self.random_trees = []

        for i in range(self.num_trees):
            random_tree = RandomTree(max_depth=self.max_depth)
            random_tree.fit(X, y)
            self.random_trees.append(random_tree)
예제 #4
0
 def fit(self, X, y):
     N, D = X.shape
     trees = [None] * self.num_trees
     for i in range(self.num_trees):
         model = RandomTree(max_depth=self.max_depth)
         model.fit(X, y)
         trees[i] = model
     self.trees = trees
예제 #5
0
def main():
    """ Function creates new tree with all files and directories.
    """
    tree = RandomTree(3, 3)

    for item in recognize_type(tree.get()):
        create(*item)

    print(repr(tree))
예제 #6
0
 def fit(self, X, y):
     # Train data on each tree
     # initialize a forest
     self.random_forest = []
     for tree in range(self.num_trees):
         # Bootstrapping and Random Trees Step
         one_tree = RandomTree(max_depth=self.max_depth)
         one_tree.fit(X, y)
         self.random_forest.append(one_tree)
    def fit(self, X, y):
        numTrees = self.num_trees
        list_of_trees = []

        for x in range(numTrees):
            tree = RandomTree(max_depth=self.max_depth)
            tree.fit(X, y)
            list_of_trees.append(tree)

        self.list_of_trees = list_of_trees
예제 #8
0
    def fit(self, X, y):

        listOfModels = []
        for i in range(0, self.num_trees):
            model = RandomTree(max_depth=np.inf)

            model.fit(X, y)
            listOfModels.append(model)
        self.LOM = listOfModels
        self.y_length = y.shape[0]
예제 #9
0
    def fit(self, X, y):

        self.trees = []
        num_trees = self.num_trees
        max_depth = self.max_depth

        for m in range(num_trees):
            tree = RandomTree(max_depth=max_depth)
            tree.fit(X, y)
            self.trees.append(tree)
예제 #10
0
 def load(self, filename):
     file = open(filename, "r")
     jsonModel = file.read()
     model = json.loads(jsonModel)["model"]
     self.trees = []
     for i in range(len(model)):
         tree = RandomTree(max_depth=self.max_depth)
         tree.load(model[i])
         self.trees.append(tree)
     file.close()
예제 #11
0
    def fit(self, X, y):

        forest = []

        for n in range(self.num_trees):

            model = RandomTree( max_depth = self.max_depth)
            model.fit(X,y)
            forest.append(model)

        self.forest = forest
예제 #12
0
 def fit(self, X, y):
     self.X = X
     self.y = y
     self.stats = []
     for i in range(self.num_trees):
         self.stats = np.append(self.stats, RandomTree(self.max_depth))
         self.stats[i].fit(X, y)
예제 #13
0
 def fit(self, X, y):
     rt = []
     M = self.num_trees
     for n in range(M):
         #rt.append(RandomTree(self.max_depth))
         rt.append(RandomTree.fit(self, X, y))
     self.rt = rt
예제 #14
0
    def __init__(self, num_trees, max_depth):
        self.num_trees = num_trees
        self.max_depth = max_depth

        # Create array of size num_trees with RandomTree models
        self.rand_trees = []
        for i in range(self.num_trees):
            self.rand_trees.append(RandomTree(max_depth=self.max_depth))
예제 #15
0
    def fit(self, X, Y):
        n, d = X.shape
        self.trees = []

        for i in range(self.n_trees):
            idx = np.arange(n)
            np.random.seed(np.int(time() / 150))
            np.random.shuffle(idx)
            X = X[idx]
            Y = Y[idx]

            train = np.int(self.ratio_per_tree * n)
            Xtrain = X[:train, :]
            Ytrain = Y[:train]

            clf = RandomTree(max_depth=self.max_depth,
                             ratio_features=self.ratio_features)
            clf.fit(Xtrain, Ytrain)
            self.trees.append(clf)
예제 #16
0
def gen_random_forest(dataset,
                      attributes,
                      ntrees,
                      nattributes,
                      depth_limit=None):
    """
    :param dataset: the dataset dataframe
    :param attributes: the attributes dict
    :param ntrees: int, number of trees
    :param nattributes: int, number of attributes of each tree (aka. "m")
    Algorithm in slide 32 class 16
    """
    return [
        RandomTree(*select_attributes(bootstrap(dataset), attributes,
                                      nattributes),
                   depth_limit=depth_limit) for _ in range(0, ntrees)
    ]
예제 #17
0
    if question == '5':
        dataset = utils.load_dataset('vowel')
        X = dataset['X']
        y = dataset['y']
        X_test = dataset['Xtest']
        y_test = dataset['ytest']
        print("n = %d, d = %d" % X.shape)

        def evaluate_model(model):
            model.fit(X, y)

            y_pred = model.predict(X)
            tr_error = np.mean(y_pred != y)

            t = time.time()
            y_pred = model.predict(X_test)
            te_error = np.mean(y_pred != y_test)
            print("Training error: %.3f" % tr_error)
            print("Testing error: %.3f" % te_error)
            print("Time taken: %f" % (time.time() - t))

        evaluate_model(
            DecisionTree(max_depth=50, stump_class=DecisionStumpInfoGain))
        evaluate_model(RandomTree(max_depth=50))
        evaluate_model(
            RandomForestClassifier(n_estimators=50,
                                   max_depth=50,
                                   criterion='entropy',
                                   random_state=1))
        evaluate_model(RandomForest(num_trees=50, max_depth=np.inf))
 def fit(self, X, y):
     self.Random_Forest = []
     for x in range(0, self.num_trees):
         New_tree = RandomTree(self.max_depth)
         New_tree.fit(X, y)
         self.Random_Forest.append(New_tree)
 def __init__(self, max_depth, num_trees):
     RandomTree.__init__(self, max_depth)
     self.num_trees = num_trees
예제 #20
0
            model.fit(X,y)

            y_pred = model.predict(X)
            tr_error = np.mean(y_pred != y)

            y_pred = model.predict(X_test)
            te_error = np.mean(y_pred != y_test)
            print("    Training error: %.3f" % tr_error)
            print("    Testing error: %.3f" % te_error)


        print("Our implementations:")
        print("  Decision tree info gain")
        evaluate_model(DecisionTree(max_depth=np.inf, stump_class=DecisionStumpInfoGain))
        print("  Random tree info gain")
        evaluate_model(RandomTree(max_depth=np.inf))
        print("  Random forest info gain")
        evaluate_model(RandomForest(max_depth=np.inf, num_trees=50))

        print("sklearn implementations")
        print("  Decision tree info gain")
        evaluate_model(DecisionTreeClassifier(criterion="entropy"))
        print("  Random forest info gain")
        evaluate_model(RandomForestClassifier(criterion="entropy"))
        print("  Random forest info gain, more trees")
        evaluate_model(RandomForestClassifier(criterion="entropy", n_estimators=50))


    elif question == '3':
        X = load_dataset('clusterData.pkl')['X']
 def fit(self, X, y):
     for i in range(self.num_trees):
         model = RandomTree(max_depth=self.max_depth)
         model.fit(X, y)
         self.models.append(model)
예제 #22
0
    mode_parser = parser()

    if str(mode_parser.mode) == 'verify':
        # benchmark
        m = []
        with open('datasets/benchmark/benchmark.csv') as bfile:
            breader = csv.reader(bfile, delimiter=';')
            for row in breader:
                m.append(row)
        attributes = {x: ['categorical'] for x in m[0][:-1]}
        attributes_names = m[0][:-1]
        del m[0]
        dataset = pd.DataFrame(m, columns=attributes_names + ['y'])
        for x in attributes.keys():
            attributes[x].append(dataset[x].unique())
        RT = RandomTree(dataset, attributes)
        RT.print_tree()
        sys.exit()

    elif str(mode_parser.mode) == 'wine':
        dataset, attributes = read_dataset('wine')

    elif str(mode_parser.mode) == 'survival':
        dataset, attributes = read_dataset('survival')

    elif str(mode_parser.mode) == 'cancer':
        dataset, attributes = read_dataset('cancer')

    elif str(mode_parser.mode) == 'contraceptive':
        dataset, attributes = read_dataset('contraceptive')
    """
예제 #23
0
 def __init__(self, max_depth, num_trees):
     self.num_trees = num_trees
     RandomTree.__init__(self, max_depth=max_depth)
     DecisionTree.__init__(self,
                           max_depth=max_depth,
                           stump_class=RandomStumpInfoGain)