def __init__(self, base_estimator=None, n_estimators=10, learning_rate=1.0): """ :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 同质的情况,单个estimator会被copy成n_estimators份 :param n_estimators: 基学习器迭代数量 :param learning_rate: 学习率,降低后续基学习器的权重,避免过拟合 """ self.base_estimator = base_estimator self.n_estimators = n_estimators self.learning_rate = learning_rate if self.base_estimator is None: # 默认使用决策树桩 self.base_estimator = CARTRegressor(max_depth=2) # 同质分类器 if type(base_estimator) != list: estimator = self.base_estimator self.base_estimator = [ copy.deepcopy(estimator) for _ in range(0, self.n_estimators) ] # 异质分类器 else: self.n_estimators = len(self.base_estimator) # 记录estimator权重 self.estimator_weights = [] # 记录最终中位数值弱学习器的index self.median_index = None
def __init__(self, base_estimator=None, n_estimators=10, learning_rate=1.0, loss='ls', huber_threshold=1e-1, quantile_threshold=0.5): """ :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 同质的情况,单个estimator会被copy成n_estimators份 :param n_estimators: 基学习器迭代数量 :param learning_rate: 学习率,降低后续基学习器的权重,避免过拟合 :param loss:表示损失函数ls表示平方误差,lae表示绝对误差,huber表示huber损失,quantile表示分位数损失 :param huber_threshold:huber损失阈值,只有在loss=huber时生效 :param quantile_threshold损失阈值,只有在loss=quantile时生效 """ self.base_estimator = base_estimator self.n_estimators = n_estimators self.learning_rate = learning_rate if self.base_estimator is None: # 默认使用决策树桩 self.base_estimator = CARTRegressor(max_depth=2) # 同质分类器 if type(base_estimator) != list: estimator = self.base_estimator self.base_estimator = [ copy.deepcopy(estimator) for _ in range(0, self.n_estimators) ] # 异质分类器 else: self.n_estimators = len(self.base_estimator) self.loss = loss self.huber_threshold = huber_threshold self.quantile_threshold = quantile_threshold
def __init__(self, base_estimator=None, n_estimators=10, feature_sample=0.66): """ :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 同质的情况,单个estimator会被copy成n_estimators份 :param n_estimators: 基学习器迭代数量 :param feature_sample:特征抽样率 """ self.base_estimator = base_estimator self.n_estimators = n_estimators if self.base_estimator is None: # 默认使用决策树 self.base_estimator = CARTRegressor() # 同质 if type(base_estimator) != list: estimator = self.base_estimator self.base_estimator = [ copy.deepcopy(estimator) for _ in range(0, self.n_estimators) ] # 异质 else: self.n_estimators = len(self.base_estimator) self.feature_sample = feature_sample # 记录每个基学习器选择的特征 self.feature_indices = []
def __init__(self, base_estimator=None, n_estimators=10, dropout=0.5): """ :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 同质的情况,单个estimator会被copy成n_estimators份 :param n_estimators: 基学习器迭代数量 :param dropout: dropout概率 """ self.base_estimator = base_estimator self.n_estimators = n_estimators self.dropout = dropout if self.base_estimator is None: # 默认使用决策树桩 self.base_estimator = CARTRegressor(max_depth=2) # 同质分类器 if type(base_estimator) != list: estimator = self.base_estimator self.base_estimator = [ copy.deepcopy(estimator) for _ in range(0, self.n_estimators) ] # 异质分类器 else: self.n_estimators = len(self.base_estimator) # 扩展class_num组分类器 self.expand_base_estimators = [] # 记录权重 self.weights = None
def __init__(self, base_estimator=None, n_estimators=10): """ :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 同质的情况,单个estimator会被copy成n_estimators份 :param n_estimators: 基学习器迭代数量 """ self.base_estimator = base_estimator self.n_estimators = n_estimators if self.base_estimator is None: # 默认使用决策树 self.base_estimator = CARTRegressor() # 同质 if type(base_estimator) != list: estimator = self.base_estimator self.base_estimator = [copy.deepcopy(estimator) for _ in range(0, self.n_estimators)] # 异质 else: self.n_estimators = len(self.base_estimator)
import numpy as np import os os.chdir('../') import matplotlib.pyplot as plt from ml_models.tree import CARTRegressor from ml_models.linear_model import LinearRegression data = np.linspace(1, 10, num=100) target = np.sin(data) + np.random.random(size=100) # 添加噪声 data = data.reshape((-1, 1)) from ml_models.ensemble import DARTRegressor model = DARTRegressor(base_estimator=CARTRegressor()) model.fit(data, target) plt.scatter(data, target) plt.plot(data, model.predict(data), color='r') plt.show()
import numpy as np import os os.chdir('../') import matplotlib.pyplot as plt from ml_models.tree import CARTRegressor from ml_models.linear_model import LinearRegression data = np.linspace(1, 10, num=100) target = np.sin(data) + np.random.random(size=100) # 添加噪声 data = data.reshape((-1, 1)) from ml_models.ensemble import RandomForestRegressor model = RandomForestRegressor( base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10) model.fit(data, target) plt.scatter(data, target) plt.plot(data, model.predict(data), color='r') plt.show()
import matplotlib.pyplot as plt import numpy as np import os os.chdir('../') from sklearn.datasets import make_regression # data, target = make_regression(n_samples=100, n_features=1, random_state=44, bias=0.5, noise=2) data = np.linspace(1, 10, num=100) target = np.sin(data) + np.random.random(size=100) data = data.reshape((-1, 1)) # indices = np.argsort(target) # # data = data[indices] # target = target[indices] from ml_models.tree import CARTRegressor tree = CARTRegressor(max_bins=50) tree.fit(data, target) tree.prune(10000) plt.scatter(data, target) plt.plot(data, tree.predict(data), color='r') plt.show()
import numpy as np import os os.chdir('../') import matplotlib.pyplot as plt from ml_models.tree import CARTRegressor from ml_models.linear_model import LinearRegression data = np.linspace(1, 10, num=100) target = np.sin(data) + np.random.random(size=100) # 添加噪声 data = data.reshape((-1, 1)) from ml_models.ensemble import AdaBoostRegressor model = AdaBoostRegressor(base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10) model.fit(data, target) plt.scatter(data, target) plt.plot(data, model.predict(data), color='r') plt.show()
import numpy as np import os os.chdir('../') import matplotlib.pyplot as plt from ml_models.tree import CARTRegressor from ml_models.linear_model import LinearRegression data = np.linspace(1, 10, num=100) target = np.sin(data) + np.random.random(size=100) # 添加噪声 data = data.reshape((-1, 1)) from ml_models.ensemble import GradientBoostingRegressor model = GradientBoostingRegressor(base_estimator=CARTRegressor(), loss='quantile', quantile_threshold=0.6, learning_rate=1.0) model.fit(data, target) plt.scatter(data, target) plt.plot(data, model.predict(data), color='r') plt.show()
from sklearn.datasets import make_classification from ml_models import utils from ml_models.linear_model import LogisticRegression from ml_models.tree import CARTRegressor from ml_models.svm import SVC data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=0.5) X_train, X_test, y_train, y_test = model_selection.train_test_split( data, target, test_size=0.1) from ml_models.ensemble import GradientBoostingClassifier classifier = GradientBoostingClassifier(base_estimator=CARTRegressor(), n_estimators=10, learning_rate=0.5) classifier.fit(X_train, y_train) # # 计算F1 from sklearn.metrics import f1_score print(f1_score(y_test, classifier.predict(X_test))) print(np.sum(np.abs(y_test - classifier.predict(X_test)))) # utils.plot_decision_function(X_train, y_train, classifier) utils.plt.show()
from sklearn.datasets import make_classification from ml_models import utils from ml_models.linear_model import LogisticRegression from ml_models.tree import CARTRegressor from ml_models.svm import SVC data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=0.5) X_train, X_test, y_train, y_test = model_selection.train_test_split( data, target, test_size=0.1) from ml_models.ensemble import DARTClassifier classifier = DARTClassifier(base_estimator=CARTRegressor(), n_estimators=10) classifier.fit(X_train, y_train) # # 计算F1 from sklearn.metrics import f1_score print(f1_score(y_test, classifier.predict(X_test))) print(np.sum(np.abs(y_test - classifier.predict(X_test)))) # utils.plot_decision_function(X_train, y_train, classifier) utils.plt.show()