Esempio n. 1
0
    def __init__(self,
                 base_estimator=None,
                 n_estimators=10,
                 learning_rate=1.0):
        """
        :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效;
                                同质的情况,单个estimator会被copy成n_estimators份
        :param n_estimators: 基学习器迭代数量
        :param learning_rate: 学习率,降低后续基学习器的权重,避免过拟合
        """
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        if self.base_estimator is None:
            # 默认使用决策树桩
            self.base_estimator = CARTRegressor(max_depth=2)
        # 同质分类器
        if type(base_estimator) != list:
            estimator = self.base_estimator
            self.base_estimator = [
                copy.deepcopy(estimator) for _ in range(0, self.n_estimators)
            ]
        # 异质分类器
        else:
            self.n_estimators = len(self.base_estimator)

        # 记录estimator权重
        self.estimator_weights = []

        # 记录最终中位数值弱学习器的index
        self.median_index = None
 def __init__(self,
              base_estimator=None,
              n_estimators=10,
              learning_rate=1.0,
              loss='ls',
              huber_threshold=1e-1,
              quantile_threshold=0.5):
     """
     :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效;
                             同质的情况,单个estimator会被copy成n_estimators份
     :param n_estimators: 基学习器迭代数量
     :param learning_rate: 学习率,降低后续基学习器的权重,避免过拟合
     :param loss:表示损失函数ls表示平方误差,lae表示绝对误差,huber表示huber损失,quantile表示分位数损失
     :param huber_threshold:huber损失阈值,只有在loss=huber时生效
     :param quantile_threshold损失阈值,只有在loss=quantile时生效
     """
     self.base_estimator = base_estimator
     self.n_estimators = n_estimators
     self.learning_rate = learning_rate
     if self.base_estimator is None:
         # 默认使用决策树桩
         self.base_estimator = CARTRegressor(max_depth=2)
     # 同质分类器
     if type(base_estimator) != list:
         estimator = self.base_estimator
         self.base_estimator = [
             copy.deepcopy(estimator) for _ in range(0, self.n_estimators)
         ]
     # 异质分类器
     else:
         self.n_estimators = len(self.base_estimator)
     self.loss = loss
     self.huber_threshold = huber_threshold
     self.quantile_threshold = quantile_threshold
Esempio n. 3
0
 def __init__(self,
              base_estimator=None,
              n_estimators=10,
              feature_sample=0.66):
     """
     :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效;
                             同质的情况,单个estimator会被copy成n_estimators份
     :param n_estimators: 基学习器迭代数量
     :param feature_sample:特征抽样率
     """
     self.base_estimator = base_estimator
     self.n_estimators = n_estimators
     if self.base_estimator is None:
         # 默认使用决策树
         self.base_estimator = CARTRegressor()
     # 同质
     if type(base_estimator) != list:
         estimator = self.base_estimator
         self.base_estimator = [
             copy.deepcopy(estimator) for _ in range(0, self.n_estimators)
         ]
     # 异质
     else:
         self.n_estimators = len(self.base_estimator)
     self.feature_sample = feature_sample
     # 记录每个基学习器选择的特征
     self.feature_indices = []
Esempio n. 4
0
    def __init__(self, base_estimator=None, n_estimators=10, dropout=0.5):
        """
        :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效;
                                同质的情况,单个estimator会被copy成n_estimators份
        :param n_estimators: 基学习器迭代数量
        :param dropout: dropout概率
        """
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.dropout = dropout
        if self.base_estimator is None:
            # 默认使用决策树桩
            self.base_estimator = CARTRegressor(max_depth=2)
        # 同质分类器
        if type(base_estimator) != list:
            estimator = self.base_estimator
            self.base_estimator = [
                copy.deepcopy(estimator) for _ in range(0, self.n_estimators)
            ]
        # 异质分类器
        else:
            self.n_estimators = len(self.base_estimator)

        # 扩展class_num组分类器
        self.expand_base_estimators = []

        # 记录权重
        self.weights = None
Esempio n. 5
0
 def __init__(self, base_estimator=None, n_estimators=10):
     """
     :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效;
                             同质的情况,单个estimator会被copy成n_estimators份
     :param n_estimators: 基学习器迭代数量
     """
     self.base_estimator = base_estimator
     self.n_estimators = n_estimators
     if self.base_estimator is None:
         # 默认使用决策树
         self.base_estimator = CARTRegressor()
     # 同质
     if type(base_estimator) != list:
         estimator = self.base_estimator
         self.base_estimator = [copy.deepcopy(estimator) for _ in range(0, self.n_estimators)]
     # 异质
     else:
         self.n_estimators = len(self.base_estimator)
Esempio n. 6
0
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import DARTRegressor

model = DARTRegressor(base_estimator=CARTRegressor())
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import RandomForestRegressor

model = RandomForestRegressor(
    base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10)
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()
Esempio n. 8
0
import matplotlib.pyplot as plt
import numpy as np
import os

os.chdir('../')

from sklearn.datasets import make_regression

# data, target = make_regression(n_samples=100, n_features=1, random_state=44, bias=0.5, noise=2)
data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)
data = data.reshape((-1, 1))

# indices = np.argsort(target)
#
# data = data[indices]
# target = target[indices]

from ml_models.tree import CARTRegressor

tree = CARTRegressor(max_bins=50)
tree.fit(data, target)
tree.prune(10000)

plt.scatter(data, target)
plt.plot(data, tree.predict(data), color='r')
plt.show()
Esempio n. 9
0
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import AdaBoostRegressor

model = AdaBoostRegressor(base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10)
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(base_estimator=CARTRegressor(), loss='quantile', quantile_threshold=0.6, learning_rate=1.0)
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()
Esempio n. 11
0
from sklearn.datasets import make_classification
from ml_models import utils
from ml_models.linear_model import LogisticRegression
from ml_models.tree import CARTRegressor
from ml_models.svm import SVC

data, target = make_classification(n_samples=100,
                                   n_features=2,
                                   n_classes=2,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_clusters_per_class=1,
                                   class_sep=0.5)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.1)

from ml_models.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier(base_estimator=CARTRegressor(),
                                        n_estimators=10,
                                        learning_rate=0.5)
classifier.fit(X_train, y_train)
# # 计算F1
from sklearn.metrics import f1_score
print(f1_score(y_test, classifier.predict(X_test)))
print(np.sum(np.abs(y_test - classifier.predict(X_test))))
#
utils.plot_decision_function(X_train, y_train, classifier)
utils.plt.show()
Esempio n. 12
0
from sklearn.datasets import make_classification
from ml_models import utils
from ml_models.linear_model import LogisticRegression
from ml_models.tree import CARTRegressor
from ml_models.svm import SVC

data, target = make_classification(n_samples=100,
                                   n_features=2,
                                   n_classes=2,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_clusters_per_class=1,
                                   class_sep=0.5)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.1)

from ml_models.ensemble import DARTClassifier

classifier = DARTClassifier(base_estimator=CARTRegressor(), n_estimators=10)
classifier.fit(X_train, y_train)
# # 计算F1
from sklearn.metrics import f1_score

print(f1_score(y_test, classifier.predict(X_test)))
print(np.sum(np.abs(y_test - classifier.predict(X_test))))
#
utils.plot_decision_function(X_train, y_train, classifier)
utils.plt.show()
Esempio n. 13
0
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import AdaBoostRegressor

model = AdaBoostRegressor(base_estimator=[LinearRegression(),
                                          CARTRegressor()],
                          n_estimators=10)
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()
import numpy as np
import os

os.chdir('../')
import matplotlib.pyplot as plt
from ml_models.tree import CARTRegressor
from ml_models.linear_model import LinearRegression

data = np.linspace(1, 10, num=100)
target = np.sin(data) + np.random.random(size=100)  # 添加噪声
data = data.reshape((-1, 1))

from ml_models.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(base_estimator=CARTRegressor(),
                                  loss='quantile',
                                  quantile_threshold=0.6,
                                  learning_rate=1.0)
model.fit(data, target)

plt.scatter(data, target)
plt.plot(data, model.predict(data), color='r')
plt.show()