def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression, debug):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.init_estimate = None
        self.regression = regression
        self.debug = debug
        self.multipliers = []

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = LogisticLoss()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)
            self.trees.append(tree)
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression, debug):
        self.n_estimators = n_estimators  # Number of trees
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split  # The minimum n of sampels to justify split
        self.min_impurity = min_impurity  # Minimum variance reduction to continue
        self.max_depth = max_depth  # Maximum depth for tree
        self.init_estimate = None  # The initial prediction of y
        self.regression = regression
        self.debug = debug

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = LogisticLoss()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)

            self.trees.append(tree)
예제 #3
0
    def __init__(self,
                 n_estimators, # int 树的个数
                 learning_rate, # 梯度下降的学习速率
                 min_samples_split, # 每棵子树叶子节点中数据的最小数目
                 min_impurity, # 每棵子树的最小纯度
                 max_depth, # 每棵子树的最大深度
                 regression=True # boolean 是否为回归问题
                 ):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        if regression:
            self.loss = SquareLoss()
        else:
            self.loss = SoftmaxLoss()

        self.trees = []
        for _ in range(self.n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=self.min_impurity,
                                  max_depth=self.max_depth)
예제 #4
0
    def __init__(self, n_estimators, learning_rate, max_depth=2,
            min_split_samples=2, min_impurity=1e-7, regression=False):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.regression = regression

        if self.regression:
            self.loss = SquareLoss()
        else:
            self.loss = CrossEntropy()
        
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(max_depth=max_depth,
                    min_split_samples=min_split_samples,
                    min_impurity=min_impurity)
            self.trees.append(tree)
    def __init__(self,
                 n_estimators=20,
                 learning_rate=0.5,
                 min_samples_split=20,
                 min_var_red=1e-4,
                 max_depth=4):
        self.n_estimators = n_estimators  # Number of trees
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_var_red = min_var_red  # Minimum variance reduction to continue
        self.max_depth = max_depth  # Maximum depth for tree
        self.init_estimate = None

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            self.trees.append(
                RegressionTree(min_samples_split=self.min_samples_split,
                               min_impurity=min_var_red,
                               max_depth=self.max_depth))
예제 #6
0
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.regression = regression
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)
            self.trees.append(tree)
def main():

    print("-- Regression Tree --")

    # Load temperature data
    data = pd.read_csv('../datasets/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = standardize(time)  # Time. Fraction of the year [0, 1]
    y = temp[:, 0]  # Temperature. Reduce to one-dim

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = RegressionTree()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10)
    plt.suptitle("Regression Tree")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"),
               loc='lower right')
    plt.show()
예제 #8
0
from decision_tree import RegressionTree

# Get data from train dataset
my_data = np.genfromtxt('./datasets/Train_Datasets.csv', delimiter=',', skip_header=1, dtype=None, encoding=None)
X = np.zeros(shape=(1, 11))
y = np.zeros(shape=(1, 1))
for data in my_data:
    raw = list(data)
    data = np.array([raw[:-1]]) 
    target = np.array([raw[-1]])
    X = np.vstack((data, X))
    y = np.vstack((target, y))

X, y = my_data[:, :-1], my_data[:, -1]
models = RegressionTree()
# X = X[:-1]
# y = y[:-1]
models.fit(X, y)

filename = 'models_version_one.sav'
pickle.dump(models, open(filename, 'wb'))

loaded_model = pickle.load(open(filename, 'rb'))

# Test predict for some input
args = sys.argv[1:]
if len(args) < 4:
    args = ['FDT07',5.82,'reg',0,'Fruits and Vegetables',256.633,'OUT049',1999,'Medium','Tier 1','Supermarket Type1']
print(loaded_model.predict(np.array([args])),
      2050.664)