Exemple #1
0
    def __init__(self,
                 n_estimators=100,
                 max_features=None,
                 min_samples_split=2,
                 min_gain=0,
                 max_depth=float("inf")):
        '''
        
        :param n_estimators: number of trees
        :param max_features: number of features for each tree generation, maxmium number of features
        :param min_samples_split: used for decision tree split, param for decision tree
        :param min_gain: Minimum information gain req. to continue, param for decision tree
        :param max_depth: Maximum depth for tree, param for decision tree
        '''
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain
        self.max_depth = max_depth
        self.progressbar = progressbar.ProgressBar(widgets=bar_widgets)
        self.trees_feature_idx = []

        self.trees = []  # initial the list of trees
        for _ in self.n_estimators:
            self.trees.append(
                ClassificationTree(min_samples_split=self.min_samples_split,
                                   min_impurity=min_gain,
                                   max_depth=self.max_depth))
def main():
    print('-- Classification Tree')

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print('Accuracy: ', accuracy)

    dt = DecisionTreeClassifier()
    dt.fit(X_train, y_train)
    y_val = dt.predict(X_test)

    acc = accuracy_score(y_test, y_val)

    print('sklearn score:', acc)
Exemple #3
0
    def __init__(self, n_estimators=50, max_features=None, min_samples_split=2,
                 min_gain=1e-7, max_depth=float("inf"), debug=False):
        self.n_estimators = n_estimators    # Number of trees
        self.max_features = max_features    # Maxmimum number of features per tree
        self.feature_indices = []           # The indices of the features used for each tree
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain            # Minimum information gain req. to continue
        self.max_depth = max_depth          # Maximum depth for tree
        self.debug = debug

        # Initialize decision trees
        self.trees = []
        for _ in range(n_estimators):
            self.trees.append(
                ClassificationTree(
                    min_samples_split=self.min_samples_split,
                    min_impurity=min_gain,
                    max_depth=self.max_depth))
    def __init__(self,
                 n_estimators=100,
                 min_samples_split=2,
                 min_gain=0,
                 max_depth=float("inf"),
                 max_features=None):

        self.n_estimators = n_estimators
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain
        self.max_depth = max_depth
        self.max_features = max_features

        self.trees = []
        # 建立森林(bulid forest)
        for _ in range(self.n_estimators):
            tree = ClassificationTree(min_samples_split=self.min_samples_split,
                                      min_impurity=self.min_gain,
                                      max_depth=self.max_depth,
                                      max_features=max_features)
            self.trees.append(tree)
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree(max_features=2)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print("Our DTClassifier Accuracy:", accuracy)

    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print("sklearn DTClassifier Accuracy:", accuracy)
Exemple #6
0
            column = np.hstack((column, float(row['director_facebook_likes'])))
            column = np.hstack((column, float(row['actor_1_facebook_likes'])))
            column = np.hstack((column, float(row['actor_2_facebook_likes'])))
            column = np.hstack((column, float(row['actor_3_facebook_likes'])))
            column = np.hstack((column, float(row['budget'])))
            column = np.hstack((column, float(row['facenumber_in_poster'])))
            target = float(row['imdb_score'])
            target = int(target)
            if target >= 4 and target <= 8:
                y = np.vstack((y, [target]))  # scores
                X = np.vstack((X, column))
            # dataset.append(Instance( [ncfr, dur, dfl, a3fl, a1fl, gr], [target] ))
        except Exception as e:
            pass

model = ClassificationTree()
X = standardize(X)
# X = RobustScaler(quantile_range=(25, 75)).fit_transform(X)
# X = MinMaxScaler().fit_transform(X)
# # # print y
datasets = k_fold_cross_validation_sets(X, y, 3)

for data in datasets:
    X_train, X_test, y_train, y_test = data
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print(accuracy_score(y_test, y_pred))
    # print accuracy_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean Squared Error:", mse)
Exemple #7
0
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescaled labels {-1, 1}
rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel)
lda = LDA()

# ........
#  TRAIN
# ........
print "Training:"
print "\tAdaboost"
adaboost.fit(X_train, rescaled_y_train)
print "\tNaive Bayes"
naive_bayes.fit(X_train, y_train)
print "\tLogistic Regression"
logistic_regression.fit(X_train, y_train)
print "\tLDA"
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescaled labels {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=50)
support_vector_machine = SupportVectorMachine()
lda = LDA()
gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2)
xgboost = XGBoost(n_estimators=50, learning_rate=0.5)

# ........
#  TRAIN
# ........
print ("Training:")
print ("\tAdaboost")
adaboost.fit(X_train, rescaled_y_train)
print ("\tDecision Tree")
decision_tree.fit(X_train, y_train)
print ("\tGradient Boosting")