def train(array, embedDim, interval): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False) params = {'n_estimators': randint(20, 200), 'loss': ['ls', 'lad', 'huber'], 'learning_rate': uniform(0.01, 0.19), 'subsample': uniform(0.5, 0.5), 'max_depth': randint(1, 5), 'min_samples_split': randint(1, 3), 'min_samples_leaf': randint(1, 3), 'max_features': randint(1, len(XTrain[0]))} bestModels = [] for i in range(len(yTrain[0])): gbrt = GradientBoostingRegressor() clf = grid_search.RandomizedSearchCV(gbrt, param_distributions=params, n_iter=20, scoring='mean_squared_error', cv=kfold, n_jobs=-1) clf.fit(XTrain, yTrain[:, i]) bestModels.append(clf.best_estimator_) for i in range(1, 12): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, i) # 模型的预测天数递增 XPredict = pp.makeXPredict(array, embedDim, interval, i) # 待预测的输入递增 subyPredict = [] for j in range(len(yTrain[0])): bestModels[j].fit(XTrain, yTrain[:, j]) subyPredict.append(bestModels[j].predict(XPredict)) array = np.hstack((array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 yPredict = array[0, -65:-5] # 一共可以预测66天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False) params = { 'C': uniform(1, 99), 'gamma': uniform(0.01, 0.29), 'kernel': ['rbf', 'poly'] } bestModels = [] for i in range(len(yTrain[0])): svr = svm.SVR() clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=30, cv=kfold, scoring='mean_squared_error', n_jobs=1, verbose=0) clf.fit(XTrain, yTrain[:, i]) bestModels.append(clf.best_estimator_) for i in range(1, 12): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, i) # 模型的预测天数递增 XPredict = pp.makeXPredict(array, embedDim, interval, i) # 待预测的输入递增 subyPredict = [] for j in range(len(yTrain[0])): bestModels[j].fit(XTrain, yTrain[:, j]) subyPredict.append(bestModels[j].predict(XPredict)) array = np.hstack( (array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 yPredict = array[0, -65:-5] # 一共可以预测66天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) kfold = cross_validation.KFold(len(XTrain), n_folds=4, shuffle=False) params = {"n_estimators": randint(5, 100), "max_depth": [1, 2, 3, 5, 8, 10, None], "max_features": randint(1, len(XTrain[0])), "min_samples_split": randint(1, 3), "min_samples_leaf": randint(1, 3)} bestModels = [] for i in range(len(yTrain[0])): erf = ExtraTreesRegressor() clf = grid_search.RandomizedSearchCV(erf, param_distributions=params, n_iter=10, scoring='mean_squared_error', cv=kfold, n_jobs=-1) clf.fit(XTrain, yTrain[:, i]) bestModels.append(clf.best_estimator_) for i in range(60): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) # 模型的嵌入维度递增 XPredict = pp.makeXPredict(array, embedDim, interval, 1) # 待预测的嵌入维度递增 subyPredict = [] for j in range(len(yTrain[0])): bestModels[j].fit(XTrain, yTrain[:, j]) subyPredict.append(bestModels[j].predict(XPredict)) array = np.hstack((array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 embedDim += 1 yPredict = array[0, -60:] # 一共可以预测60天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): distance = 7 XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, distance) kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False) params = {'n_estimators': randint(20, 200), 'loss': ['ls', 'lad', 'huber'], 'learning_rate': uniform(0.01, 0.19), 'subsample': uniform(0.5, 0.5), 'max_depth': randint(1, 5), 'min_samples_split': randint(1, 3), 'min_samples_leaf': randint(1, 3), 'max_features': randint(1, len(XTrain[0]))} bestModels = [] for i in range(len(yTrain[0])): gbrt = GradientBoostingRegressor() clf = grid_search.RandomizedSearchCV(gbrt, param_distributions=params, n_iter=30, scoring='mean_squared_error', cv=kfold, n_jobs=-1) clf.fit(XTrain, yTrain[:, i]) bestModels.append(clf.best_estimator_) for i in range(9): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, distance) # 模型的嵌入维度递增 XPredict = pp.makeXPredict(array, embedDim, interval, distance) # 待预测的嵌入维度递增 subyPredict = [] for j in range(len(yTrain[0])): bestModels[j].fit(XTrain, yTrain[:, j]) subyPredict.append(bestModels[j].predict(XPredict)) array = np.hstack((array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 embedDim += distance yPredict = array[0, -62:-2] # 一共可以预测63天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) kfold = cross_validation.KFold(len(XTrain), n_folds=4, shuffle=False) params = { "n_estimators": randint(5, 100), "max_depth": [1, 2, 3, 5, 8, 10, None], "max_features": randint(1, len(XTrain[0])), "min_samples_split": randint(1, 3), "min_samples_leaf": randint(1, 3) } bestModels = [] for i in range(len(yTrain[0])): erf = ExtraTreesRegressor() clf = grid_search.RandomizedSearchCV(erf, param_distributions=params, n_iter=10, scoring='mean_squared_error', cv=kfold, n_jobs=-1) clf.fit(XTrain, yTrain[:, i]) bestModels.append(clf.best_estimator_) for i in range(60): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, 1) # 模型的嵌入维度递增 XPredict = pp.makeXPredict(array, embedDim, interval, 1) # 待预测的嵌入维度递增 subyPredict = [] for j in range(len(yTrain[0])): bestModels[j].fit(XTrain, yTrain[:, j]) subyPredict.append(bestModels[j].predict(XPredict)) array = np.hstack( (array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 embedDim += 1 yPredict = array[0, -60:] # 一共可以预测60天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): distance = 7 for i in range(9): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, distance) # 模型的预测天数 XPredict = pp.makeXPredict(array, embedDim, interval, distance) params = { 'C': uniform(1, 99), 'gamma': uniform(0.01, 0.29), 'kernel': ['rbf', 'poly'] } kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False) subyPredict = [] for j in range(len(yTrain[0])): svr = svm.SVR() clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=10, cv=kfold, scoring='mean_squared_error', n_jobs=1, verbose=0) clf.fit(XTrain, yTrain[:, j]) subyPredict.append(clf.predict(XPredict)) array = np.hstack( (array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 embedDim += distance yPredict = array[0, -62:-2] # 一共可以预测63天,取其中对应的数据 return yPredict
def train(array, embedDim, interval): distance=7 for i in range(9): XTrain, yTrain = pp.makeTrainset(array, embedDim, interval, distance) # 模型的预测天数 XPredict = pp.makeXPredict(array, embedDim, interval, distance) params = {'C': uniform(1, 99), 'gamma': uniform(0.01, 0.29), 'kernel': ['rbf', 'poly']} kfold = cross_validation.KFold(len(XTrain), n_folds=5, shuffle=False) subyPredict = [] for j in range(len(yTrain[0])): svr = svm.SVR() clf = grid_search.RandomizedSearchCV(svr, param_distributions=params, n_iter=10, cv=kfold, scoring='mean_squared_error', n_jobs=1, verbose=0) clf.fit(XTrain, yTrain[:, j]) subyPredict.append(clf.predict(XPredict)) array = np.hstack((array, np.array(copy(subyPredict)))) # 将一个模型的预测值作为已知数据,训练下一个模型 embedDim += distance yPredict = array[0, -62:-2] # 一共可以预测63天,取其中对应的数据 return yPredict
def foldPredict(tracelist, embedDim, interval, distance): clusterXPredict = [] for array in tracelist: XPredict = pp.makeXPredict(array, embedDim, interval, distance) clusterXPredict.append(copy(XPredict)) return clusterXPredict