def mlp_bench(x_train, y_train, x_test, fh): """ Forecasts using a simple MLP which 6 nodes in the hidden layer :param x_train: train input data :param y_train: target values for training :param x_test: test data :param fh: forecasting horizon :return: """ y_hat_test = [] model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam', max_iter=100, learning_rate='adaptive', learning_rate_init=0.001, random_state=42) model.fit(x_train, y_train) last_prediction = model.predict(x_test)[0] for i in range(0, fh): y_hat_test.append(last_prediction) x_test[0] = np.roll(x_test[0], -1) x_test[0, (len(x_test[0]) - 1)] = last_prediction last_prediction = model.predict(x_test)[0] return np.asarray(y_hat_test)
def test_multioutput_regression(): # Test that multi-output regression works as expected X, y = make_regression(n_samples=200, n_targets=5) mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200, random_state=1) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.9)
def _create_first_population(self): self._current_population = [] for _ in range(self._n_individuals): mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1) mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)]) mlp.out_activation_ = 'softmax' self._current_population.append([mlp,0])
def _create_new_nn(self, weights, biases): mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1) mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)]) mlp.coefs_ = weights mlp.intercepts_ = biases mlp.out_activation_ = 'softmax' return mlp
def construct_train(train_length, **kwargs): """ Train and test model with given input window and number of neurons in layer """ start_cur_postion = 0 steps, steplen = observations.size/(2 * train_length), train_length if 'hidden_layer' in kwargs: network = MLPRegressor(hidden_layer_sizes=kwargs['hidden_layer']) else: network = MLPRegressor() quality = [] # fit model - configure parameters network.fit(observations[start_cur_postion:train_length][:, 1].reshape(1, train_length), observations[:, 1][start_cur_postion:train_length].reshape(1, train_length)) parts = [] # calculate predicted values # for each step add all predicted values to a list # TODO: add some parallelism here for i in xrange(0, steps): parts.append(network.predict(observations[start_cur_postion:train_length][:, 1])) start_cur_postion += steplen train_length += steplen # estimate model quality using result = np.array(parts).flatten().tolist() for valnum, value in enumerate(result): quality.append((value - observations[valnum][1])**2) return sum(quality)/len(quality)
def test_lbfgs_regression(): # Test lbfgs on the boston dataset, a regression problems.""" X = Xboston y = yboston for activation in ACTIVATION_TYPES: mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.95)
def GetOptimalCLF2(train_x,train_y,rand_starts = 8): ''' Gets the optimal CLF function based on fixed settings Parameters ------------------------ train_x - np.array Training feature vectors train_y - np.array Training label vectors rand_starts - int Number of random starts to do Default - 8 for 95% confidence and best 30% Returns ------------------------ max_clf - sklearn function Optimal trained artificial neuron network ''' #### Get number of feature inputs of training vector n_input = train_x.shape[1] #### Set initial loss value min_loss = 1e10 #### Perform number of trainings according to random start set for i in range(rand_starts): #### Print current status print "Iteration number {}".format(i+1) #### Initialize ANN network clf = MLPRegressor(hidden_layer_sizes = (int(round(2*np.sqrt(n_input),0)),1), activation = 'logistic',solver = 'sgd', learning_rate = 'adaptive', max_iter = 100000000,tol = 1e-10, early_stopping = True, validation_fraction = 1/3.) #### Fit data clf.fit(train_x,train_y) #### Get current loss cur_loss = clf.loss_ #### Save current clf if loss is minimum if cur_loss < min_loss: #### Set min_loss to a new value min_loss = cur_loss #### Set max_clf to new value max_clf = clf return max_clf
def MLP_Regressor(train_x, train_y): clf = MLPRegressor( alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=([8,8]), learning_rate='constant', learning_rate_init=0.01, max_iter=500, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) clf.fit(train_x, train_y) #score = metrics.accuracy_score(clf.predict((train_x)), (train_y)) #print(score) return clf
def test_shuffle(): # Test that the shuffle parameter affects the training process (it should) X, y = make_regression(n_samples=50, n_features=5, n_targets=1, random_state=0) # The coefficients will be identical if both do or do not shuffle for shuffle in [True, False]: mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp1.fit(X, y) mlp2.fit(X, y) assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0]) # The coefficients will be slightly different if shuffle=True mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=True) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=False) mlp1.fit(X, y) mlp2.fit(X, y) assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
def test_lbfgs_regression(): # Test lbfgs on the boston dataset, a regression problems. X = Xboston y = yboston for activation in ACTIVATION_TYPES: mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X, y) if activation == 'identity': assert_greater(mlp.score(X, y), 0.84) else: # Non linear models perform much better than linear bottleneck: assert_greater(mlp.score(X, y), 0.95)
def train_model(x_train, y_train, alpha=1e-3, hid_layers=[512], max_iter=100): """ Train model on training data. :param x_train: training examples :param y_train: target variables :param alpha: L2 regularization coefficient :param hid_layers: hidden layer sizes :param max_iter: maximum number of iterations in L-BFGS optimization :return a model trained with neuron network """ nn_model = MLPRegressor(solver='lbgfs', hidden_layer_sizes=hid_layers, alpha=alpha, max_iter=max_iter, activation="relu", random_state=1) nn_model.fit(x_train, y_train) return nn_model
def test_partial_fit_regression(): # Test partial_fit on regression. # `partial_fit` should yield the same results as 'fit' for regression. X = Xboston y = yboston for momentum in [0, .9]: mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu', random_state=1, learning_rate_init=0.01, batch_size=X.shape[0], momentum=momentum) with warnings.catch_warnings(record=True): # catch convergence warning mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPRegressor(solver='sgd', activation='relu', learning_rate_init=0.01, random_state=1, batch_size=X.shape[0], momentum=momentum) for i in range(100): mlp.partial_fit(X, y) pred2 = mlp.predict(X) assert_almost_equal(pred1, pred2, decimal=2) score = mlp.score(X, y) assert_greater(score, 0.75)
total_Y_train = ff_y_train + gblur_y_train + wn_y_train + jpeg_y_train + jp2k_y_train # nn = MLPRegressor(hidden_layer_sizes=(981,1),max_iter=1000) # nn.fit(total_X_train,total_Y_train) # logreg = LogisticRegression(C= 1000, random_state=0,solver = 'lbfgs', # multi_class='multinomial',max_iter=1000) # lab = preprocessing.LabelEncoder() # logreg.fit(total_X_train,lab.fit_transform(total_Y_train)) # print("ho gya fit") # answ = lab.inverse_transform(logreg.predict(total_X_test)) nn = MLPRegressor(hidden_layer_sizes=(1000, 1), activation='logistic', max_iter=1000, solver='lbfgs') nn.fit(total_X_train, total_Y_train) answ = nn.predict(total_X_test) # cc=svm.SVR() # sw = cc.fit(finalX_train,FinalY_train) # answ = nn.predict(total_X_test) print(answ) print(total_Y_test) print("pearson") # print(pearsonr(answ,total_Y_test)) vb = nmp.corrcoef(answ, total_Y_test) # print(vb)nn = MLPRegressor(hidden_layer_sizes=(227,1),max_iter=500) # nn.fit(finalX_train,FinalY_train)
load_q = profiles[("load", "q_mvar")] import pandas as pd # X = pd.concat([sgen_p, load_p, load_q], axis=1) X = pd.read_json("./res_bus/vm_pu.json") y = pd.read_json("./res_line/loading_percent.json") from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.1) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) y_train = scaler.fit_transform(y_train) y_test = scaler.transform(y_test) from sklearn.neural_network import MLPRegressor ann = MLPRegressor(verbose=1) ann.fit(X_train, y_train) y_predict = ann.predict(X_test) # y_predict = scaler.inverse_transform(y_predict) import matplotlib.pyplot as plt plt.plot(y_test[3000:3100, 99], linestyle="--", label="cl") plt.plot(y_predict[3000:3100, 99], linestyle="-", label="pl") plt.legend() plt.show() print('testing')
''' base = pd.read_csv('C:\\Users\\André Viniciu\\Documents\\Python_ML\\Curso\\Secao 15 - Outros tipos de regressao\\plano_saude2.csv') x = base.iloc[:, 0:1].values y = base.iloc[:, 1].values # Escalonamento scaler_x = StandardScaler() x = scaler_x.fit_transform(x) scaler_y = StandardScaler() y = scaler_y.fit_transform(y.reshape(-1, 1)) # Regressor regressor = MLPRegressor() regressor.fit(x, y) regressor.score(x, y) # Plot plt.scatter(x, y) plt.plot(x, regressor.predict(x), color='r') ''' Housing Prices ''' base = pd.read_csv('C:\\Users\\André Viniciu\\Documents\\Python_ML\\Curso\\Secao 14 - Regressao Linear\\house_prices.csv') # Values x = base.iloc[:, 3:19].values y = base.iloc[:, 3].values
def get_stacking_model(): model = MLPRegressor(hidden_layer_sizes=(20,20)) X_train,y_train,_,_ = get_data() model.fit(X_train,y_train) return model
def forecast_all(): training_end = datetime.datetime.strptime('2016-06-25', '%Y-%m-%d') training_start = datetime.datetime.strptime('2016-02-29', '%Y-%m-%d') test_end = datetime.datetime.strptime('2016-07-25', '%Y-%m-%d') start = datetime.datetime.strptime('2016-02-01', '%Y-%m-%d') all_data = [] training_x = [] training_y = [] test_x = [] test_y = [] # 首先合并特征工程 for i, row in feature_i.iterrows(): row['sldatime'] = ( datetime.datetime.strptime(row['sldatime'], '%Y-%m-%d') - start).days all_data.append([]) j = 0 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_iv.iterrows(): j = 1 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_ii.iterrows(): j = 1 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_v.iterrows(): j = 1 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_iii.iterrows(): j = 1 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_vi.iterrows(): j = 1 while j < len(row): all_data[i].append(row[j]) j += 1 # 首先划分训练集和测试集 for row in all_data: time = start + datetime.timedelta(days=row[6]) if training_start < time < training_end: training_x.append(row) training_y.append(row[8]) elif training_end < time < test_end: test_x.append(row) test_y.append(row[8]) # 重复预测7次 result = [] id = 0 # 初始化result while id < len(test_x): result.append([]) result[id].append(test_x[id][0]) result[id].append(test_x[id][6]) result[id].append(test_x[id][8] / 1000) id += 1 history_dict = get_pluno_dict() bndno_dict = get_bndno_dict() pluno_level_dict = [ get_pluno_level_dict(1000), get_pluno_level_dict(10000), get_pluno_level_dict(100000), get_pluno_level_dict(1000000) ] day = 0 while day < 7: # 预测 # clf = RandomForestClassifier() # linear,poly,rbf # clf = SVR(kernel="poly") # clf = tree.DecisionTreeClassifier(criterion='entropy') clf = MLPRegressor() clf.fit(training_x, training_y) predict_y = clf.predict(test_x) print("predict ok") i = 0 while i < len(test_x): # 保存到结果字典result中 pluno = test_x[i][0] bndno = int(pluno / 1000) pluno_level = [ int(pluno / 1000), int(pluno / 10000), int(pluno / 100000), int(pluno / 1000000) ] date_str = datetime.datetime.strftime( start + datetime.timedelta(days=test_x[i][6]), '%Y-%m-%d') test_x[i][8] = predict_y[i] result[i].append(predict_y[i] / 1000) if day == 0: training_x.append(test_x[i]) training_y.append(test_x[i][8]) # 从未来第一天开始更新 if day > 0: # 更新时间序列字典 history_dict[pluno][date_str] += predict_y[i] bndno_dict[bndno][date_str] += predict_y[i] lev = 0 while lev < 4: id = pluno_level[lev] pluno_level_dict[lev][id][date_str] += predict_y[i] lev += 1 rec = test_x[i] # 更新d-1/d-7 j = 0 lastweek = last_week_list(date_str) for date in lastweek: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime(date, '%Y-%m-%d') if this_date > min_date: rec[9 + j] = history_dict[pluno][date] rec[25 + j] = bndno_dict[bndno][date] rec[41 + j] = pluno_level_dict[0][pluno_level[0]][date] rec[48 + j] = pluno_level_dict[1][pluno_level[1]][date] rec[55 + j] = pluno_level_dict[2][pluno_level[2]][date] rec[62 + j] = pluno_level_dict[3][pluno_level[3]][date] else: rec[9 + j] = 0.0 rec[25 + j] = 0.0 rec[41 + j] = 0.0 rec[48 + j] = 0.0 rec[55 + j] = 0.0 rec[62 + j] = 0.0 j += 1 # 更新avg、max、min week_list = past_week_list(date_str) avg_pluno = avg_bndno = 0.0 max_pluno = max_bndno = 0.0 min_pluno = min_bndno = float('inf') week_index = 0 for week in week_list: for date in week: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime( date, '%Y-%m-%d') if this_date > min_date: avg_pluno += history_dict[pluno][date] avg_bndno += bndno_dict[bndno][date] if history_dict[pluno][date] > max_pluno: max_pluno = history_dict[pluno][date] if bndno_dict[bndno][date] > max_bndno: max_bndno = bndno_dict[bndno][date] if history_dict[pluno][date] < min_pluno: min_pluno = history_dict[pluno][date] if bndno_dict[bndno][date] < min_bndno: min_bndno = bndno_dict[bndno][date] else: min_pluno = 0.0 min_bndno = 0.0 avg_pluno = avg_pluno / 7 avg_bndno = avg_bndno / 7 rec[16 + 3 * week_index] = avg_pluno rec[17 + 3 * week_index] = max_pluno rec[18 + 3 * week_index] = min_pluno rec[32 + 3 * week_index] = avg_bndno rec[33 + 3 * week_index] = max_bndno rec[34 + 3 * week_index] = min_bndno avg_pluno = avg_bndno = 0.0 max_pluno = max_bndno = 0.0 min_pluno = min_bndno = float('inf') week_index += 1 avg = 0.0 max = 0.0 min = float('inf') start_index = 69 lev = 0 while lev < 4: pluno_dict = pluno_level_dict[lev] pluno_index = pluno_level[lev] week_index = 0 for week in week_list: for date in week: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime( date, '%Y-%m-%d') if this_date > min_date: avg += pluno_dict[pluno_index][date] if pluno_dict[pluno_index][date] > max: max = pluno_dict[pluno_index][date] if pluno_dict[pluno_index][date] < min: min = pluno_dict[pluno_index][date] else: min = 0.0 avg = avg / 7 rec[start_index + 3 * week_index] = avg rec[start_index + 1 + 3 * week_index] = max rec[start_index + 2 + 3 * week_index] = min avg = 0.0 max = 0.0 min = float('inf') week_index += 1 start_index += 9 lev += 1 training_x.append(rec) training_y.append(rec[8]) i += 1 # 更新日期 for row in test_x: row[6] += 1 date = datetime.datetime.strftime( start + datetime.timedelta(days=row[6]), '%Y-%m-%d') if is_weekday(date): row[7] = 1 else: row[7] = 0 print(day) day += 1 # 将预测结果写入csv head = [ 'pluno', 'time', 'qty', 'd', 'd+1', 'd+2', 'd+3', 'd+4', 'd+5', 'd+6' ] # 创建文件对象 path = "MLP/forecast_all.csv" f = open(path, 'w', encoding='utf-8', newline='' "") # 基于文件对象构建 csv写入对象 csv_writer = csv.writer(f) # 构建列表头 csv_writer.writerow(head) # 创建每一行数据 for row in result: csv_writer.writerow(row) # 关闭文件 f.close()
def play(self, wave): if (wave.shape[0] > self.n_output): wave = wave[:self.n_output] if (wave.shape[0] < self.n_output): np.pad(wave, [0, self.n_output], 'constant') sd.play(self.model.predict(np.array([wave]))[0]) def process(self, input_file, output_file): cnvlute.utils.loadfile(input_file) if (wave.shape[0] > self.n_output): wave = wave[:self.n_output] if (wave.shape[0] < self.n_output): np.pad(wave, [0, self.n_output], 'constant') wav.write(output_file, self.model.predict(np.array([wave]))[0], srate) def store(self, filename): with open(filename, 'wb') as fd: pickle.dump(self, fd, protocol=pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': files = sys.argv[1] max_len = int(sys.argv[2]) n_hidden = int(sys.argv[3]) model_file = sys.argv[4] model = MLPRegressor(hidden_layer_sizes=(n_hidden, max_len)) X = data y = np.array(data) model = model.fit(X, y) prediction = model.predict(data)
from sklearn.neural_network import MLPRegressor import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error data = pd.read_csv('network_backup_dataset.csv') train = data.loc[:,['WeekNumber','DayofWeek','BackupStartTime','WorkFlowID','FileName','BackupTime']] target = data.loc[:,['SizeofBackup']] mlp = MLPRegressor(algorithm='sgd', hidden_layer_sizes=150, max_iter=200, shuffle=False, random_state=1) mlp.fit(train, target) prediction = mlp.predict(train) plt.plot(prediction,label='Prediction',color='red') plt.plot(target,label='Real Data',color='blue') plt.title('Copy Size versus Time based on Neural Network Regression') plt.xlabel('Time') plt.ylabel('Copy Size') plt.legend() plt.show() rmse = mean_squared_error(target.SizeofBackup,prediction)**0.5 print (rmse)
def execute(trial=False): '''Retrieve some data sets (not using the API here for the sake of simplicity).''' startTime = datetime.datetime.now() # Set up the database connection. client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('angelay_maulikjs', 'angelay_maulikjs') repo.dropPermanent('angelay_maulikjs') repo.createPermanent('angelay_maulikjs') data = repo.angelay_maulikjs.clean2012.find() y = [] x = [] for document in data: d = dict(document) tmp = [] tmp.append(d['CarbonIntensity']) tmp.append(d['EnergyIntensity']) tmp.append(d['EnergyUse']) tmp.append(d['GDPperCapita']) tmp.append(d['HDI']) tmp.append(d['Population']) y.append(d['CO2Emissions']) x.append(tmp) data2013 = repo.angelay_maulikjs.clean2013.find() y13 = [] x13 = [] for document in data2013: d = dict(document) tmp = [] tmp.append(d['CarbonIntensity']) tmp.append(d['EnergyIntensity']) tmp.append(d['EnergyUse']) tmp.append(d['GDPperCapita']) tmp.append(d['HDI']) tmp.append(d['Population']) y13.append(d['CO2Emissions']) x13.append(tmp) scaler = StandardScaler() scaler.fit(x) X_train = scaler.transform(x) # scaler13 = StandardScaler() # scaler13.fit(x13) X_train13 = scaler.transform(x13) # df.to_csv('test.csv', index=False, header=False) # clf = 0 clf = MLPRegressor(hidden_layer_sizes=(7776, ), max_iter=1000, learning_rate_init=0.001, momentum=0.4, alpha=0.01) neural_model = clf.fit(X_train, y) validation_data_predictions = clf.predict(X_train13) r2_error = r2_score(y_true=y13, y_pred=validation_data_predictions) fig, ax = plt.subplots() x1 = range(len(validation_data_predictions)) ax.plot(x1, y13, 'o', label="Actual Data (2013)") ax.plot(x1, validation_data_predictions, 'r', label="Multilayer Perceptron Predicted Data (2013)") ax.legend(loc="best") plt.savefig('angelay_maulikjs/MLP.png', bbox_inches='tight') repo.logout() endTime = datetime.datetime.now() return {"start": startTime, "end": endTime}
def get_stacking_model(): model = MLPRegressor(hidden_layer_sizes=(20, 20)) X_train, y_train, _, _ = get_data() model.fit(X_train, y_train) return model
class ANNSurrogate(SurrogateModel): """Multi-layer Perceptron regressor. This algorithm optimizes the squared-loss using l-bfgs or gradient descent. :param hidden_layer_sizes: The ith element represents the number of neurons in the ith hidden layer. :type hidden_layer_sizes: tuple, length = n_layers - 2, default (100,) :param activation: Activation function for the hidden layer. - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x) :type activation: {'logistic', 'tanh', 'relu'}, default 'relu' :param algorithm: The algorithm for weight optimization. - 'l-bfgs' is an optimization algorithm in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimization algorithm proposed by Kingma, Diederik, and Jimmy Ba .. note:: The default algorithm 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'l-bfgs' can converge faster and perform better. :type algorithm: {'logistic', 'tanh', 'relu'}, default 'relu' :param alpha: L2 penalty (regularization term) parameter. :type alpha: float, optional, default 0.0001 :param batch_size: Size of minibatches for stochastic optimizers. If the algorithm is 'l-bfgs', the classifier will not use minibatch. :type batch_size: int, optional, default 200 :param learning_rate: Learning rate schedule for weight updates. - 'constant', is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate ``learning_rate_`` at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive', keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when algorithm='sgd'. :type learning_rate: {'constant', 'invscaling', 'adaptive'}, default 'constant' :param max_iter: Maximum number of iterations. The algorithm iterates until convergence (determined by 'tol') or this number of iterations. :type max_iter: int, optional, default 200 :param random_state: State or seed for random number generator. :type random_state: int or RandomState, optional, default None :param shuffle: Whether to shuffle samples in each iteration. Only used when algorithm='sgd' or 'adam'. :type shuffle: bool, optional, default True :param tol: Tolerance for the optimization. When the loss or score is not improving by at least tol for two consecutive iterations, unless `learning_rate` is set to 'adaptive', convergence is considered to be reached and training stops. :type tol: float, optional, default 1e-4 :param learning_rate_init: The initial learning rate used. It controls the step-size in updating the weights. Only used when algorithm='sgd' or 'adam'. :type learning_rate_init: double, optional, default 0.001 :param power_t: The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when algorithm='sgd'. :type power_t: double, optional, default 0.5 :param verbose: Whether to print progress messages to stdout. :type verbose: bool, optional, default False :param warm_start: When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. :type warm_start: bool, optional, default False :param momentum: Momentum for gradient descent update. Should be between 0 and 1. Only used when algorithm='sgd'. :type momentum: float, default 0.9 :param nesterovs_momentum: Whether to use Nesterov's momentum. Only used when algorithm='sgd' and momentum > 0. :type nesterovs_momentum: boolean, default True :param early_stopping: Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for two consecutive epochs. Only effective when algorithm='sgd' or 'adam' :type early_stopping: bool, default False :param validation_fraction: The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True :type validation_fraction: float, optional, default 0.1 :param beta_1: Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when algorithm='adam' :type beta_1: float, optional, default 0.9 :param beta_2: Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when algorithm='adam' :type beta_2: float, optional, default 0.999 :param epsilon: Value for numerical stability in adam. Only used when algorithm='adam' :type epsilon: float, optional, default 1e-8 .. note:: MLPRegressor trains iteratively since at each time step the partial derivatives of the loss function with respect to the model parameters are computed to update the parameters. It can also have a regularization term added to the loss function that shrinks model parameters to prevent overfitting. This implementation works with data represented as dense and sparse numpy arrays of floating point values. .. [Hinton1989] Hinton, Geoffrey E., "Connectionist learning procedures." Artificial intelligence 40.1 (1989): 185-234. .. [Glorot2010] Glorot, Xavier, and Yoshua Bengio., "Understanding the difficulty of training deep feedforward neural networks." International Conference on Artificial Intelligence and Statistics. 2010. .. [He2015] He, Kaiming, et al., "Delving deep into rectifiers: Surpassing human-level performance on imagenet classification." arXiv preprint arXiv:1502.01852 (2015). .. [Kingma2014] Kingma, Diederik, and Jimmy Ba., "Adam: A method for stochastic optimization." arXiv preprint arXiv:1412.6980 (2014). """ def __init__(self, hidden_layer_sizes=(100, ), activation="relu", algorithm='adam', alpha=0.0001, batch_size=200, learning_rate="constant", learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=1e-4, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-8): super(ANNSurrogate, self).__init__() # self.hidden_layer_sizes=(100,) # self.activation="relu" # self.algorithm='adam' # self.alpha=0.0001 # self.batch_size=200 # self.learning_rate="constant" # self.learning_rate_init=0.001 # self.power_t=0.5 # self.max_iter=200 # self.shuffle=True # self.random_state=None # self.tol=1e-4 # self.verbose=False # self.warm_start=False # self.momentum=0.9 # self.nesterovs_momentum=True # self.early_stopping=False # self.validation_fraction=0.1 # self.beta_1=0.9 # self.beta_2=0.999 # self.epsilon=1e-8 # self.x = None # self.y = None # algorithm => solver self.__model = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, activation=activation, solver=algorithm, alpha=alpha, batch_size=batch_size, learning_rate=learning_rate, learning_rate_init=learning_rate_init, power_t=power_t, max_iter=max_iter, shuffle=shuffle, random_state=random_state, tol=tol, verbose=verbose, warm_start=warm_start, momentum=momentum, nesterovs_momentum=nesterovs_momentum, early_stopping=early_stopping, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) def fit(self, x, y): super(ANNSurrogate, self).fit(x, y) # print 'fit' self.__model.fit(x, y) def predict(self, x): super(ANNSurrogate, self).predict(x) # print 'predict' y = self.__model.predict(x) return y def predict_proba(self, x): pass
X = np.array([Jref1, Jdet1, Width1 ]).reshape(-1, 3) #ya esta listo mi array de x (Jref, Jdet, Width) #reduccion de dimensionalidad #pca = decomposition.PCA(n_components=2) #X = pca.fit_transform(X) #división de datos 70/30 X_train, X_test, y_train, y_test = train_test_split(X, Yerror, test_size=0.30, random_state=40) #La red ahora si lichi = MLPRegressor(solver='adam', alpha=1e-4, hidden_layer_sizes=(4, 3), max_iter=200, tol=1e-4, random_state=1) lichi.fit(X_train, y_train) #para que entrene predict_train = lichi.predict(X_train) predict_test = lichi.predict(X_test) #ver la r2 print(r2_score(y_test, predict_test)) #la r2 de los datos de prueba a ver que da print(lichi.score( X_train, y_train)) #la r2 de la regresion con los datos de entrenamiento
def run(self): processed_train = pd.read_csv('preprocessed_train.csv') X = processed_train.drop('count', axis=1) Y = processed_train['count'] #Since data is timeseries hence we MUST keep the shuffle parameter as FALSE !! X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, shuffle=False) global error_metric1, error_metric2 error_metric1 = pd.DataFrame({ 'Training RMSLE': [], 'Training R^2': [], 'Testing RMSLE': [], 'Testing R^2': [] }) error_metric2 = pd.DataFrame({ 'Training RMSLE': [], 'Training R^2': [], 'Testing RMSLE': [], 'Testing R^2': [] }) def scaled_model_stats(model, model_name, X_train, Y_train, X_test, Y_test): global error_metric1 ytr = np.array(Y_train).reshape(len(Y_train), 1) yte = np.array(Y_test).reshape(len(Y_test), 1) Y_train_scaled = scaler.fit_transform(ytr).ravel() Y_test_scaled = scaler.fit_transform(yte).ravel() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.fit_transform(X_test) train_data_predictions = model.predict(X_train_scaled) test_data_predictions = model.predict(X_test_scaled) trdp = np.array(train_data_predictions).reshape( len(train_data_predictions), 1) trdp = scaler.fit_transform(trdp).ravel() tedp = np.array(test_data_predictions).reshape( len(test_data_predictions), 1) tedp = scaler.fit_transform(tedp).ravel() # RMSLE model_rmsle_train = np.sqrt( mean_squared_log_error(Y_train_scaled, trdp)) model_rmsle_test = np.sqrt( mean_squared_log_error(Y_test_scaled, tedp)) # R-Squared model_r2_train = r2_score(Y_train_scaled, trdp) model_r2_test = r2_score(Y_test_scaled, tedp) df_local = pd.DataFrame({ 'Model': [model_name], 'Training RMSLE': [model_rmsle_train], 'Training R^2': [model_r2_train], 'Testing RMSLE': [model_rmsle_test], 'Testing R^2': [model_r2_test] }) error_metric1 = pd.concat([error_metric1, df_local], sort=True) def model_stats(model, model_name, X_train, Y_train, X_test, Y_test): global error_metric2 train_data_predictions = model.predict(X_train) test_data_predictions = model.predict(X_test) # RMSLE model_rmsle_train = np.sqrt( mean_squared_log_error(Y_train, train_data_predictions)) model_rmsle_test = np.sqrt( mean_squared_log_error(Y_test, test_data_predictions)) # R-Squared model_r2_train = r2_score(Y_train, train_data_predictions) model_r2_test = r2_score(Y_test, test_data_predictions) df_local = pd.DataFrame({ 'Model': [model_name], 'Training RMSLE': [model_rmsle_train], 'Training R^2': [model_r2_train], 'Testing RMSLE': [model_rmsle_test], 'Testing R^2': [model_r2_test] }) error_metric2 = pd.concat([error_metric2, df_local], sort=True) print('Trying out MLP Regression...') # Neural Network Model mlpr = MLPRegressor() mlpr.fit(X_train, Y_train) scaled_model_stats(mlpr, 'Neural Network', X_train, Y_train, X_test, Y_test) print('MLP Regression...Done') print('\n') print('Trying out Linear Regression...') # Linear Regression Model lr = LinearRegression() lr.fit(X_train, Y_train) scaled_model_stats(lr, 'Linear Regression', X_train, Y_train, X_test, Y_test) print('Linear Regression...Done') print('\n') print('Trying out Lasso Regression...') # Lasso Regressor Model lass = Lasso() lass.fit(X_train, Y_train) scaled_model_stats(lass, 'Lasso Regression', X_train, Y_train, X_test, Y_test) print('Lasso Regression...Done') print('\n') print('Trying out Ridge Regression...') # Ridge Regression Model ridge = Ridge() ridge.fit(X_train, Y_train) scaled_model_stats(ridge, 'Ridge Regression', X_train, Y_train, X_test, Y_test) print('Ridge Regression...Done') print('\n') print('Trying out Gradient Boosting Regression...') # Gradient Boosting Regressor Model gb = GradientBoostingRegressor() gb.fit(X_train, Y_train) scaled_model_stats(gb, 'Gradient Boosting Regressor', X_train, Y_train, X_test, Y_test) print('Gradient Boosting Regression...Done') print('\n') print('Trying out Support Vector Regression...') # Support Vector Regressor Model svr = SVR() svr.fit(X_train, Y_train) model_stats(svr, 'Support Vector Regressor', X_train, Y_train, X_test, Y_test) print('Support Vector Regression...Done') print('\n') print('Trying out Random Forrest Regression...') # Random Forrest Model rf = RandomForestRegressor() rf.fit(X_train, Y_train) model_stats(rf, 'Random Forrest Regressor', X_train, Y_train, X_test, Y_test) print('Random Forrest Regression...Done') print('\n') final_df = pd.concat([error_metric1, error_metric2], sort=True) final_df.reset_index().drop('index', axis=1).to_csv(self.output().path)
class MLP_Regressor(Modelo): """ """ def cargar_datos(self): """ """ excluir = { "idzona", "precio_metro_cubierto", "precio_metro_total", "gps", "lat", "lng" } features = FEATURES_DISPONIBLES - excluir super().cargar_datos(features) self.train_data = self.preparar_datos(self.train_data) self.test_data = self.preparar_datos(self.test_data) self.submit_data = self.preparar_datos(self.submit_data) self.agregar_columnas_faltantes() return True def preparar_datos(self, df): """ """ df = df.drop(columns=["fecha", "titulo", "descripcion"]) categoricas = {"tipodepropiedad", 'ciudad', 'provincia'} return self.one_hot_encode(df, categoricas) def _split_data_label(self, df, label=None): if not label: label = self.feature data = df.loc[:, df.columns != label] label = df[label].values if label in df.columns else None return data, label def llenar_nans(self, df): return df.fillna(df.mean(skipna=True, numeric_only=True)) @Modelo.cronometrar() def entrenar(self, params=None): """ """ data_train, label_train = self._split_data_label(self.train_data) self.scaler = StandardScaler() self.scaler.fit(data_train) X_train = self.scaler.transform(data_train) hiperparametros = { 'learning_rate_init': 0.1, 'activation': 'relu', 'alpha': 0.001, 'max_iter': 600, 'shuffle': False } if params: hiperparametros.update(params) self.model = MLPRegressor(**hiperparametros) self.model.fit(X_train, label_train) super().entrenar() return True @Modelo.cronometrar() def predecir(self, df): """ """ data = df.copy() data_test, label_test = self._split_data_label(data) X_data = self.scaler.transform(data_test) predictions = self.model.predict(X_data) data["target"] = predictions return data
max_iter=1000000, learning_rate_init=0.0001, learning_rate='constant', tol=1e-4, power_t=0.5, shuffle=True, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, beta_1=0.9, beta_2=0.999, epsilon=1e-08) MLR.fit(x_train, y_train) plt.figure('regressor', figsize=(4.8, 5.4), dpi=200) plt.subplot(2, 1, 1) plt.title('MLPRegressor') plt.scatter(x_test[:, 0], y_test, label='origin', c='b', s=10) plt.scatter(x_test[:, 0], MLR.predict(x_test), label='predict', c='r', s=3) plt.text(0.305, 1.5, 'score=' + str(round(r2_score(y_test, MLR.predict(x_test)), 3)), fontsize=6) plt.xticks([]), plt.yticks(fontsize=6) plt.legend(fontsize=6) forest.fit(x_train, y_train) plt.subplot(2, 1, 2) plt.title('RandomForestRegresoor')
y_data = [y1, y2, y3] #Print Pearson Co-efficient to evaluate the Trained ML model print("\tLearning Sheet Number \t" + "Pearson Coefficient") #repeat stuff for all data i = 0 j = 1 #Train the Artificial Neural Network and Visualise Actual(Label) Vs Predicted while i < 3: fig = plt.figure(j, figsize=(8, 6)) clf = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=3000) clf.fit(x_data[i], y_data[i].ravel()) prediction = clf.predict(x_data[i]) time = range(0, y_data[i].size) print("\t" + str(i + 1) + " \t \t \t " + str(np.corrcoef(prediction, y_data[i].ravel())[0, 1])) plt.scatter(time, prediction, color='red', label='''New Index form Datasheet ''' + str(i + 1)) plt.scatter(time, y_data[i], color='blue', label="BIS") plt.legend() plt.xlabel('Time in Seconds') plt.ylabel('Training Datasheet ' + str(i + 1))
def main(): cal_housing = fetch_california_housing() X, y = cal_housing.data, cal_housing.target names = cal_housing.feature_names # Center target to avoid gradient boosting init bias: gradient boosting # with the 'recursion' method does not account for the initial estimator # (here the average target, by default) y -= y.mean() print("Training MLPRegressor...") est = MLPRegressor(activation='logistic') est.fit(X, y) print('Computing partial dependence plots...') # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower # with the brute method. features = [0, 5, 1, 2] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with MLPRegressor') plt.subplots_adjust(top=0.9) # tight_layout causes overlap with suptitle print("Training GradientBoostingRegressor...") est = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, loss='huber', random_state=1) est.fit(X, y) print('Computing partial dependence plots...') features = [0, 5, 1, 2, (5, 1)] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with Gradient Boosting') plt.subplots_adjust(top=0.9) print('Custom 3d plot via ``partial_dependence``') fig = plt.figure() target_feature = (1, 5) pdp, axes = partial_dependence(est, X, target_feature, grid_resolution=50) XX, YY = np.meshgrid(axes[0], axes[1]) Z = pdp[0].T ax = Axes3D(fig) surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor='k') ax.set_xlabel(names[target_feature[0]]) ax.set_ylabel(names[target_feature[1]]) ax.set_zlabel('Partial dependence') # pretty init view ax.view_init(elev=22, azim=122) plt.colorbar(surf) plt.suptitle('Partial dependence of house value on median\n' 'age and average occupancy, with Gradient Boosting') plt.subplots_adjust(top=0.9) plt.show()
class nnQLearningAlgorithm(QLearningAlgorithm): def __init__(self, actions, discount, featureExtractor, explorationProb=0.2, init_weights="simple.p", filename=None): self.actions = actions self.discount = discount # self.featureExtractor = featureExtractor.arrayExtractor self.featureExtractor = featureExtractor self.explorationProb = explorationProb self.sparse = False self.print_time = False self.cache_size = 30 self._reset_cache() self.time_feat = [] self.time_pred = [] self.time_fit = [] # TODO if filename: with open("data/" + filename, "rb") as fin: self.mlp = pickle.load(fin) self.numIters = 101 # skip init else: self.numIters = 0 self.alg_init = QLearningAlgorithm(actions, discount, featureExtractor, explorationProb, init_weights) self.mlp = MLPRegressor( hidden_layer_sizes=(20, ), activation="relu", solver="adam", max_iter=700, # TODO # warm_start TODO early_stopping=False, verbose=False) def _reset_cache(self): self.cache = 0 self.x_cache = [] self.y_cache = [] def _x_cache(self): if self.sparse: return self.featureExtractor.sparseMatrixExtractor(self.x_cache) else: return self.x_cache def evalQ(self, state, action): """ Evaluate Q-function for a given (`state`, `action`) """ if self.numIters < 101: return self.alg_init.evalQ(state, action) if self.sparse: return self.mlp.predict( self.featureExtractor.sparseExtractor( self.featureExtractor.dictExtractor(state, action)))[0] else: return self.mlp.predict( [self.featureExtractor.arrayExtractor(state, action)])[0] def getAction(self, state): """ The strategy implemented by this algorithm. With probability `explorationProb` take a random action. """ self.numIters += 1 if len(self.actions(state)) == 0: return None if random.random() < self.explorationProb or self.numIters < 102: return random.choice(self.actions(state)) else: return max((self.evalQ(state, action), action) for action in self.actions(state))[1] def incorporateFeedback(self, state, action, reward, newState): if newState is None: return t0 = time.time() if self.sparse: phi = self.featureExtractor.dictExtractor(state, action) else: phi = self.featureExtractor.arrayExtractor(state, action) t1 = time.time() self.time_feat.append(t1 - t0) if self.numIters < 101: pred = self.evalQ(state, action) else: if self.sparse: pred = self.mlp.predict( self.featureExtractor.sparseExtractor(phi))[0] else: pred = self.mlp.predict([phi])[0] t2 = time.time() self.time_pred.append(t2 - t1) try: v_opt = max( self.evalQ(newState, new_a) for new_a in self.actions(newState)) except: v_opt = 0. target = reward + self.discount * v_opt self.x_cache.append(phi) self.y_cache.append(target) self.cache += 1 if self.numIters == 100: self.mlp.fit(self._x_cache(), self.y_cache) self._reset_cache() elif self.numIters > 100 and self.cache == self.cache_size: t3 = time.time() self.mlp.partial_fit(self._x_cache(), self.y_cache) t4 = time.time() self.time_fit.append(t4 - t3) self._reset_cache() if self.numIters % 3000 == 0 and self.print_time: print "{:.2f}\t{:.2f}\t{:.2f}".format( 1000. * np.mean(self.time_feat), 1000. * np.mean(self.time_pred), 1000. * np.mean(self.time_fit))
class QN(object): def __init__(self, num_inputs, num_outputs): self.nx = num_inputs self.ny = num_outputs self.net = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=1, algorithm='sgd', learning_rate='constant', learning_rate_init=0.001, warm_start=True, momentum=0.9, nesterovs_momentum=True ) self.initialize_network() # set experience replay self.mbsize = 128 # mini-batch size self.er_s = [] self.er_a = [] self.er_r = [] self.er_done = [] self.er_sp = [] self.er_size = 2000 # total size of mb, impliment as queue self.whead = 0 # write head def initialize_network(self): # function to initialize network weights xtrain = np.random.rand(256, self.nx) ytrain = 10 + np.random.rand(256, self.ny) self.net.fit(xtrain, ytrain) def update_network(self): # function updates network by sampling a mini-batch from the ER # Prepare train data chosen = list(np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize))) Xtrain = np.asarray([self.er_s[i] for i in chosen]) # calculate target target = np.random.rand(len(chosen), self.ny) for j, i in enumerate(chosen): # do a forward pass through s and sp Q_s = self.net.predict(self.er_s[i].reshape(1, -1)) Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1)) target[j, :] = Q_s # target initialized to current prediction if (self.er_done[i] == True): target[j, self.er_a[i]] = self.er_r[i] # if end of episode, target is terminal reward else: target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(max(Q_sp)) # Q_sp is list of list (why?) # fit the network self.net.fit(Xtrain, target) # single step of SGD def append_memory(self, s, a, r, sp, done): if (len(self.er_s) < self.er_size): self.er_s.append(s) self.er_a.append(a) self.er_r.append(r) self.er_sp.append(sp) self.er_done.append(done) self.whead = (self.whead + 1) % self.er_size else: self.er_s[self.whead] = s self.er_a[self.whead] = a self.er_r[self.whead] = r self.er_sp[self.whead] = sp self.er_done[self.whead] = done self.whead = (self.whead+1) % self.er_size
train = train.drop('Genre', axis=1) from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(train, feature, test_size=0.30) from sklearn.neural_network import MLPRegressor clf = MLPRegressor(hidden_layer_sizes=(5, ), activation='relu', solver='adam', learning_rate='adaptive', max_iter=1000, learning_rate_init=0.01, alpha=0.01) clf.fit(X_train, y_train) res = clf.predict(X_test) lsd = [] for item in res: if item >= 0.5: lsd.append(1) else: lsd.append(0) from sklearn.metrics import accuracy_score print(accuracy_score(lsd, y_test))
def forecast_i(): training_start = datetime.datetime.strptime('2016-02-08', '%Y-%m-%d') training_end = datetime.datetime.strptime('2016-06-25', '%Y-%m-%d') test_end = datetime.datetime.strptime('2016-07-25', '%Y-%m-%d') mindate = datetime.datetime.strptime('2016-02-01', '%Y-%m-%d') # 设置训练集和测试集 training_x = [] training_y = [] test_x = [] test_y = [] result = [] for index, row in feature_i.iterrows(): date_str = row['sldatime'] date_time = datetime.datetime.strptime(date_str, '%Y-%m-%d') row['sldatime'] = (date_time - mindate).days if training_start < date_time < training_end: i = 0 training_list = [] while i < len(row): training_list.append(row[i]) i += 1 training_x.append(training_list) training_y.append(row['qty']) else: # 最后6天的测试数据不要 if date_time < test_end: i = 0 test_list = [] while i < len(row): test_list.append(row[i]) i += 1 test_x.append(test_list) test_y.append(row['qty']) # 初始化result le = 0 while le < len(test_x): result.append([]) result[le].append(test_x[le][0]) result[le].append(test_x[le][6]) result[le].append(test_x[le][8] / 1000) le += 1 # 预测7天的销量 history_dict = get_pluno_dict() day = 0 while day < 7: # 预测 # clf = RandomForestClassifier() # linear,poly,rbf # clf = SVR(kernel="poly") # clf = tree.DecisionTreeClassifier(criterion='entropy') clf = MLPRegressor() clf.fit(training_x, training_y) predict_y = clf.predict(test_x) index = 0 while index < len(test_x): # 将预测结果保存到结果数组中 result[index].append(predict_y[index] / 1000) pluno = test_x[index][0] date_str = datetime.datetime.strftime( mindate + datetime.timedelta(days=test_x[index][6]), '%Y-%m-%d') test_x[index][8] = predict_y[index] # 将预测结果添加到训练集中 # 预测d的销量其他特征不需要更新 if day == 0: training_x.append(test_x[index]) training_y.append(int(test_x[index][8])) if day > 0: # 更新时间序列字典 history_dict[pluno][date_str] += predict_y[index] rec = test_x[index] # 更新特征d-1/d-7 j = 0 lastweek = last_week_list(date_str) for date in lastweek: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime(date, '%Y-%m-%d') if this_date > min_date: rec[9 + j] = history_dict[pluno][date] else: rec[9 + j] = 0.0 j += 1 training_x.append(rec) training_y.append(int(rec[8])) index += 1 # 更新测试集日期进行下一次预测 i = 0 while i < len(test_x): interval = test_x[i][6] test_x[i][6] += 1 str = datetime.datetime.strftime( mindate + datetime.timedelta(days=interval), '%Y-%m-%d') if is_weekday(str): test_x[i][7] = 1 else: test_x[i][7] = 0 i += 1 day += 1 # 将预测结果写入csv head = [ 'pluno', 'time', 'qty', 'd', 'd+1', 'd+2', 'd+3', 'd+4', 'd+5', 'd+6' ] # 创建文件对象 path = "MLP/forecast_i.csv" f = open(path, 'w', encoding='utf-8', newline='' "") # 基于文件对象构建 csv写入对象 csv_writer = csv.writer(f) # 构建列表头 csv_writer.writerow(head) # 创建每一行数据 for row in result: csv_writer.writerow(row) # 关闭文件 f.close()
x1_train, x1_test, y1_train, y1_test = train_test_split(pca_data, Y1, test_size=0.20, random_state=0) x2_train, x2_test, y2_train, y2_test = train_test_split(pca_data, Y2, test_size=0.20, random_state=0) print( "\n<----------------------------------MLP REGRESSOR------------------------------->\n" ) reg = MLPRegressor() m = reg.fit(x_train, y_train) pred1 = m.predict(x_train) pred = m.predict(x_test) trainr2 = r2_score(y_train, pred1) testr2 = r2_score(y_test, pred) print('train_r2=', trainr2) print('test_r2=', testr2) from sklearn.metrics import mean_squared_error print("TRAIN mean_squared_error= ", mean_squared_error(y_train, pred1)) print("TEST mean_squared_error= ", mean_squared_error(y_test, pred)) print( "<\n-----------------------------MULTIPLE LINEAR REGRESSION------------------------->\n" )
pheno = np.load('phenodata.npy') X_tr = geno[:1000,1:] #slicing geno #X_va = geno[201:250,:] X_te = geno[1001:,1:] Y_tr = pheno[:1000,1:] #slicing pheno #Y_va = pheno[201:250,:] Y_te = pheno[1001:,1:] diabetes_X_train = X_tr diabetes_X_test = X_te diabetes_y_train = Y_tr diabetes_y_test = Y_te reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs') reg.fit(X_tr,Y_tr) scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10) #Result_Y = np.zeros((249,1), dtype='float64') Result_Y = reg.predict(X_te) #Yte = np.array(Y_te, dtype=np.float64) r_row,p_score = pearsonr(Result_Y,Y_te) # The mean square error print("Residual sum of squares: %.2f" % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test)) print(Result_Y) print(scores)
#for i in f: m = 1 n = 1 o = 1 for m in range(1, 16): for n in range(1, 16): for o in range(1, 16): regr = MLPRegressor(activation='tanh', hidden_layer_sizes=(n, m, o), max_iter=1000, solver='lbfgs', tol='1e-5') # Train the model using the training sets regr.fit(miv_X_train, miv_y_train.values.ravel()) # Make predictions using the testing set miv_y_pred = regr.predict(miv_X_test) miv_y_pred_out = regr.predict(miv_X_test) miv_y_pred_in = regr.predict(miv_X_train) # regr.coefs_ # The coefficients # print('Coefficients: \n', regr.coefs_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(miv_y_test, miv_y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(miv_y_test, miv_y_pred)) print('MAE score: %.2f' %
class NeuralNetwork: ################# Fields ####################### # dataset_filename: string - path to dataset # header: list - header of the dataset # enumerable_columns: list - the enumerable columns # df: matrix - data set # training_set: matrix - training set # test_set: matrix - test set # TSnew_X: matrix - training set of TSnew (see documentation) # TSnew_Y: matrix - training set of TSnew (see documentation) # dim_random_subset: int - number of features to set to 0 (see documentation) # repeatSometimes: int - number of for cicles (see documentation) def __init__(self, repeatSometimes = 2, dim_random_subset = 2): # variables initialization self.enumerable_columns = [] self.dataset_filename = "" self.header = [] self.df = pandas.DataFrame() self.trainSet = pandas.DataFrame() self.testSet = pandas.DataFrame() self.TSnew_X = pandas.DataFrame() self.TSnew_Y = pandas.DataFrame() self.repeatSometimes = repeatSometimes self.dim_random_subset = dim_random_subset # This code really needs much time and therefore I save some computations if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)): self.readDataset() self.discretization() self.preprocess() # creating TSnew self.createTrainingAndTestSet() self.createTSnew() # backup encoded sets self.writeCSV() else: self.readCSV() # training and test self.train() self.predict() def readDataset(self): print("DEB Read dataset") with open('header.txt') as f: self.header = f.read().split(',') print(self.header) with open('dataset.txt') as f: self.dataset_filename = f.read() print(self.dataset_filename) self.df = pandas.read_csv(self.dataset_filename, names=self.header) print('Dataset with {} entries'.format(self.df.__len__())) ############# Preprocessing ########################## # helper function (should not be called from other functions) def discretize(self, column): print("DEB Discretize column " + column) sorted_col = sorted(column) l = len(column) n = int(numpy.floor(l / 2)) if l % 2 == 0: median_1 = numpy.median(sorted_col[0:n]) median_2 = numpy.median(sorted_col[n:]) else: median_1 = numpy.median(sorted_col[0:(n + 1)]) median_2 = numpy.median(sorted_col[(n + 1):]) iqr = median_2 - median_1 h = 2 * iqr * (1 / numpy.cbrt(l)) if h > 0: bins_number = numpy.ceil((column.max() - column.min()) / h) new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False) else: new_col = column bins = [] return new_col, bins # helper function (should not be called from other functions) def normalize(column): print("DEB Normalize") h = abs(column.min()) new_col = column + h return new_col def discretization(self): print("DEB Discretization") replacements = {} bins = {} for i in range(0, self.df.shape[1]): # for each feature bins[i] = [] col = self.df.as_matrix()[:, i] flag_str = False flag_float = False flag_negative = False for j in col: if type(j) is str: flag_str = True elif type(j) is float: flag_float = True elif type(j) is int and j < 0: flag_negative = True if flag_str: continue elif flag_negative: new_col = self.normalize(col) replacements[i] = new_col bins[i] = [] elif flag_float: new_col, new_bins = self.discretize(col) replacements[i] = new_col bins[i] = new_bins for k, v in replacements.items(): self.df.iloc[:, k] = v def preprocess(self, removeColumnsWithMissingValues = False): print("DEB Preprocessing") m = self.df.as_matrix() # it is possible to encode enumerable features and to remove missing values with open('enumerable_columns.txt') as f: # e.g., self.enumerable_columns = [0, 5, 8] self.enumerable_columns = f.read() if self.enumerable_columns.__contains__(','): self.enumerable_columns = list(map(int, self.enumerable_columns.split(','))) else: self.enumerable_columns = [int(self.enumerable_columns)] print("enumerable columns are: " + str(self.enumerable_columns)) le = preprocessing.LabelEncoder() for col in self.enumerable_columns: # if the column is enumerable self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]]) # A -> 0, B -> 1, ... # remove cols with missing values (NaN), even though you risk to reduce too much the dataset if removeColumnsWithMissingValues: for i in range(0, m.shape[1]): if True in m[:, i]: self.df = numpy.delete(self.df, 0, i) # delete column ############## MPL architecture ####################### def createTrainingAndTestSet(self): print("DEB Create Training set. Using formula 80-20%") self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20) # hearth of the algorithm! def createTSnew(self): print("DEB Create TS new") for i in range(0, self.trainSet.shape[0]): for j in range(0, self.repeatSometimes): # choose small random subset of features X_hat X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.trainSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) # append row to TSnew_X copy = numpy.copy(self.trainSet.as_matrix()[i, :]) self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk ############## Train & Predict ######################## def train(self): print("DEB Training with TSnew") self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.MLP.fit(self.TSnew_X, self.TSnew_Y) def predict(self): print("DEB Test") testSetNew_X = pandas.DataFrame() testSetNew_Y = pandas.DataFrame() # preparing the test set - here you do the same as in function createTSnew: if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)): for i in range(0, self.testSet.shape[0]): # choose small random subset of features X_hat X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.testSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) copy = numpy.copy(self.testSet.as_matrix()[i, :]) testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) else: # if the needed DataFrames have already been calculated, simply load them from disk self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) # predictions self.MLP.predict(testSetNew_X) print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100)) ########################## Helper functions #################### def writeCSV(self): print("DEB WriteCSV") self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) def readCSV(self): print("DEB ReadCSV") self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
learning_rate_init=0.0001, hidden_layer_sizes=(sz_1), learning_rate='constant', max_iter=100000, momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=5, shuffle=False, solver=slv_1, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) mlp_gloabal_Conf.fit(X_train, Y_train.ravel()) predict_global_test = mlp_gloabal_Conf.predict(X_test) predict_gloabal_train = mlp_gloabal_Conf.predict(X_train) print("R2 Score Test=", r2_score(Y_test, predict_global_test)) print("MLG SCORE Test = ", mean_squared_log_error(Y_test, predict_global_test)) print("MSE Train =", mean_squared_error(Y_train, predict_gloabal_train)) print("RMSE Train =", rmse(Y_train, predict_gloabal_train)) print("MSE Test =", mean_squared_error(Y_test, predict_global_test)) print("RMSE Test =", rmse(Y_test, predict_global_test))
def RodarMLP(X_train, y_train, X_test, y_test, scaler, lags, X_tempo, y_tempo, X_anos, y_anos, X_meses, y_meses): print("Rodando Modelo") mlp = MLPRegressor() model = MLPRegressor(activation='relu', alpha=0.001, batch_size=50, hidden_layer_sizes=(8, 9, 2), max_iter=1000, solver='adam') scorer = make_scorer(mean_squared_error, greater_is_better=False) # model = KerasRegressor(build_fn=CriarMLP(lags), epochs=1000, batch_size=10, verbose=1) # hidden_layer = GerarHiddenLayers() # parameter_space = { # 'hidden_layer_sizes': hidden_layer, # 'activation': ['tanh', 'relu','softplus'], # 'solver': ['sgd', 'adam'], # 'alpha': [0.0001, 0.05,0.1, 0.01], # 'batch_size':[50], # 'max_iter':[1000] # } # model = GridSearchCV(mlp, parameter_space, n_jobs=6, cv=3, verbose=1, scoring=scorer) print("Alinhando Modelo") model.fit(X_train, y_train) print("Prevendo para dados de teste") y_predict = model.predict(X_test) y_test = scaler.inverse_transform(y_test) y_predict = scaler.inverse_transform(y_predict) X_test = scaler.inverse_transform(X_test) # calculate Pearson's correlation print("Calculando Pearson") pearson = pearsonr(y_test, y_predict) print("pearson:" + str(pearson)) r2 = r2_score(y_test, y_predict) print("r2:" + str(r2)) mse = mean_squared_error(y_test, y_predict) mae = mean_absolute_error(y_test, y_predict) # Best paramete set # print('Best parameters found:\n', model.best_params_) # GravaremTXT("Melhores Parâmetros: " + str(model.best_params_)) # All results # means = model.cv_results_['mean_test_score'] # stds = model.cv_results_['std_test_score'] # for mean, std, params in zip(means, stds, model.cv_results_['params']): # print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) # GravaremTXT("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) # alteração de variável para criação de gráficos, adicionando data y_predict = y_predict.reshape(-1, 1) print("Gerando Gráficos") X_testmapa = X_test[:, 0] y_predictmapa = y_predict[:, 0] plt.rcParams["figure.figsize"] = (10, 10) plt.plot(X_testmapa, X_testmapa, color='black') plt.scatter(X_testmapa, y_predictmapa, color='orange') plt.legend(loc='best') plt.title('Resultados MLP Lag ' + str(lags)) plt.ylabel('Previsto') plt.xlabel('Observado') plt.savefig('gráfico_mlp_scatter_lag' + str(lags) + '.png') plt.show() df_y_predict = pd.DataFrame(y_predict) y_tempo = y_tempo.reshape(-1, 1) df_y_tempo = pd.DataFrame(y_tempo, dtype='datetime64[D]') df_y_tempo.columns = ['Datas'] y_predict = df_y_predict.set_index(df_y_tempo.iloc[:, 0], ) y_predict.columns = ['Vazão'] y_test = y_test.reshape(-1, 1) df_y_test = pd.DataFrame(y_test) # y_tempo = y_tempo.reshape(-1,1) df_y_tempo = pd.DataFrame(y_tempo, dtype='datetime64[D]') df_y_tempo.columns = ['Datas'] df_y_tempo = df_y_tempo.sort_values(by="Datas") y_test = df_y_test.set_index(df_y_tempo.iloc[:, 0], ) y_test.columns = ['Vazão'] X_test = X_test.reshape(-1, 1) df_x_test = pd.DataFrame(X_test) X_tempo = X_tempo.reshape(-1, 1) df_x_tempo = pd.DataFrame(X_tempo, dtype='datetime64[D]') Xtesttest = pd.DataFrame(X_test[:, 0:1]) X_test = X_test.set_index(df_y_tempo.iloc[:, 0]) X_test.columns = ['Vazão'] plt.rcParams["figure.figsize"] = (30, 10) plt.plot(X_test.iloc[:, 0], label='Observado Lag ' + str(lags), color='orange') plt.plot(y_predict, label='Previsto', color='black') plt.legend(loc='best') plt.title('Resultados MLP') plt.xlabel('Data', fontsize=14) plt.ylabel('Vazão', fontsize=14) plt.savefig(('gráfico_mlp_lag' + str(lags) + '.png')) plt.show() return r2, mse, mae
def merlin(dataset, x_tr, x_tst, y_tr, y_tst, act_fun, sov_fun, model_number, country): # creating a score dataset cnames = ['Accuracy', 'Size', 'alpha', 'Activation_Function', 'Solver'] acc_matrix = pd.DataFrame(columns=cnames) print(acc_matrix.head()) acc_lst = [] i_lst = [] nr_list = [] fun1 = [] fun2 = [] mdl_lst = [] bst_scr = [] dat_lst = [] iterate_list = [ 0.0000000001, 0.0000000002, 0.0000000003, 0.0000000004, 0.0000000005, 0.0000000006, 0.0000000007, #0.0000000008, 0.0000000009, #0.000000001, 0.000000002, 0.000000003, 0.000000004, 0.000000005, 0.000000006, 0.000000007, #0.000000008, 0.000000009, #0.00000001, 0.00000002, 0.00000003, 0.00000004, 0.00000005, 0.00000006, 0.00000007, 0.00000008, #0.00000009, #0.0000001, 0.0000002, 0.0000003, 0.0000004, 0.0000005, 0.0000006, 0.0000007, 0.0000008, 0.0000009, #0.000001, 0.000002, 0.000003, 0.000004, 0.000005, 0.000006, 0.000007, 0.000008, 0.000009, #0.00001, 0.00002, 0.00003, 0.00004, 0.00005, 0.00006, 0.00007, 0.00008, 0.00009, #0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009, #0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, #0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09 ] # importing the nural net from sklearn.neural_network import MLPRegressor from sklearn.metrics import r2_score # model Testing Module for nr in range(110, 120, 20): print("Nural Size = ", nr) for i in iterate_list: mlp = MLPRegressor(activation=act_fun, alpha=i, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(nr), learning_rate='constant', max_iter=90000000, momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=5, shuffle=False, solver=sov_fun, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=True) mlp.fit(x_tr, y_tr.ravel()) predict_test = mlp.predict(x_tst) scr = r2_score(y_tst, predict_test) acc_lst.append(scr) i_lst.append(i) nr_list.append(nr) fun1.append(act_fun) fun2.append(sov_fun) print(" i = ", i, "Score = ", scr) print("Training Complete") print() acc_matrix['Accuracy'] = acc_lst acc_matrix['Size'] = nr_list acc_matrix['alpha'] = i_lst acc_matrix['Activation_Function'] = fun1 acc_matrix['Solver'] = fun2 run_date = date.today() acc_matrix.reset_index() print(acc_matrix.head()) for i in acc_matrix.index: if acc_matrix['Accuracy'][i] == max(acc_matrix['Accuracy']): print("Best Parameters For The Model Is\n") print("Accuracy ", acc_matrix["Accuracy"][i]) print("Nural Size ", acc_matrix['Size'][i]) print("aplha =", acc_matrix['alpha'][i]) print("Activation Function =", acc_matrix['Activation_Function'][i]) print("Solver =", acc_matrix['Solver'][i]) bst = acc_matrix["Accuracy"][i] mdl_lst.append(model_number) bst_scr.append(bst) dat_lst.append(run_date) Tracker_matrix['Accuracy'] = bst_scr Tracker_matrix['Model_Number'] = mdl_lst Tracker_matrix['Run_Date'] = dat_lst Tracker_matrix['Country'] = country Track_MERLIN(Tracker_matrix) return acc_matrix['Size'][i], acc_matrix['alpha'][i], acc_matrix[ 'Activation_Function'][i], acc_matrix['Solver'][i]
from datetime import datetime startTime = datetime.now() fileTrain = open("fingerDataTrain.dat",'r') fileVal = open("fingerDataVal.dat",'r') trainingSet = np.loadtxt(fileTrain) valSet = np.loadtxt(fileVal) fileTrain.close() fileVal.close() trainX = trainingSet[:,:13] trainY = trainingSet[:,14:] valX = valSet[:,:13] valY = valSet[:,14:] for i in range(trainX.shape[1]): m = trainX[:,i].mean() s = trainX[:,i].std() trainX[:,i] = (trainX[:,i]-m)/s valX[:,i] = (valX[:,i]-m)/s ann = MLPRegressor() ann.fit(trainX,trainY) sqError = ((ann.predict(valX)-valY)**2).mean() plt.scatter(valX[:,1], valY[:,3], color='black') plt.plot(valX[:,1], ann.predict(valX)[:,3], color='blue', linewidth=3) print datetime.now() - startTime
from sklearn.neighbors import KNeighborsRegressor KNN = KNeighborsRegressor() knn_param_grid = {'n_neighbors':[3,10]} knn_grid = model_selection.GridSearchCV(KNN, knn_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') knn_grid.fit(X_train, y_train) print(' Best Params:' + str(knn_grid.best_params_)) KNN = KNeighborsRegressor(n_neighbors=10) KNN.fit(X_train, y_train) y_predict_knn=KNN.predict(X_test) mae_knn=(np.abs(y_predict_knn-y_test)).sum()/9467 joblib.dump(KNN, 'KNN.model') print(mae_knn) #mlp from sklearn.neural_network import MLPRegressor MLP = MLPRegressor(hidden_layer_sizes=(300, 200,200),max_iter=100,activation='relu') MLP.fit(X_train, y_train) y_predict_MLP=MLP.predict(X_test) mae_MLP=(np.abs(y_predict_MLP-y_test)).sum()/9467 joblib.dump(MLP, 'MLP.model') print(mae_MLP) #xgb import xgboost as xgb x_regress = xgb.XGBRegressor(max_depth=20,n_estimators =5000) x_regress_param_grid = {'max_depth': [5,20]} x_regress_grid = model_selection.GridSearchCV(x_regress, x_regress_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') x_regress.fit(X_train, y_train) joblib.dump(x_regress, 'x_regress_grid.model') y_predict_xgb=x_regress.predict(X_test) mae_xgb=(np.abs(y_predict_xgb-y_test)).sum()/9467 # 模型融合
# Data processing function def processData(): rawxdata = np.array([[1, 1], [10, 2], [1, 3], [1, 4], [1, 5], [1, 6], [1, 7], [1, 8], [1, 9], [1, 10]]).reshape(-1, 2) rawydata = np.array([2, 12, 4, 5, 6, 7, 8, 9, 10, 11]) # Automatically randomizes the data X_train, X_test, y_train, y_test = train_test_split(rawxdata, rawydata, random_state=0) return (X_train, y_train, X_test, y_test) X_train, y_train, X_test, y_test = processData() # Fit the rgxline regr.fit(X_train, y_train) print(X_test, ":\n", regr.predict(X_test)) # Accuracy print(explained_variance_score(y_test, regr.predict(X_test))) print(max_error(y_test, regr.predict(X_test))) print(max_error(y_train, regr.predict(X_train))) # Anatomy print(regr.n_layers_) print(regr.hidden_layer_sizes)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,random_state=0) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(X_train) # Now apply the transformations to the data: X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #model building using MLPRegressor from sklearn.neural_network import MLPRegressor mlp = MLPRegressor(hidden_layer_sizes=(8,8,8),max_iter=300, solver='lbfgs', alpha=1e-5, activation='relu') m1=mlp.fit(X_train,y_train) prediction_train=m1.predict(X_train) prediction_test = m1.predict(X_test) print(prediction_test) input() #RMSE value rmse = prediction_train - y_train print (np.sqrt(np.mean(rmse*rmse))) #>> RMSE = 5.10252291327967 input() #Corrrelation corr=np.corrcoef(prediction_train,y_train) #>> Correlation = 0.95278931] print(corr)
''' model.fit(X4, y4) y4_p = model.predict(X_plot) model.fit(X5, y5) y5_p = model.predict(X_plot) ''' y6_p = shift(y1_p, -30, cval=0) y7_p = shift(y1_p, -15, cval=0) X = np.column_stack([X_plot, y1_p, y2_p, y3_p, y6_p, y7_p]) y = shift(y1_p, 30, cval=0) #poly = make_pipeline(PolynomialFeatures(3), Ridge()) mpl = MLPRegressor(beta_1=0.99) ''' y_t = y[-1000:-2] y = y[0:-1000] X_t = X[-1000:-2] X = X[0:-1000] mpl.fit(X, y) poly.fit(X, y) mpl_pred = mpl.predict(X_t) poly_pred = poly.predict(X_t) ''' mpl_pred = cross_val_predict(mpl, X, y, cv=10) #poly_pred = cross_val_predict(poly, X, y, cv=10) #nn_pred = cross_val_predict(model, X, y, cv=10) print mpl.get_params() def plot_cross():
print("Support Vector Machine Regressor:") model_svr = SVR(kernel='rbf') model_svr.fit(X_train, y_train) print(model_svr.score(X_train, y_train)) print(model_svr.score(X_valid, y_valid)) y_pred = model_kneighbors.predict(X_valid) print(r2_score(y_valid, y_pred)) print("") print("Neural Network Regressor:") model_neural = MLPRegressor(hidden_layer_sizes=(5, 4), activation='logistic', solver='lbfgs', max_iter=36000) model_neural.fit(X_train, y_train) print(model_neural.score(X_train, y_train)) print(model_neural.score(X_valid, y_valid)) y_pred = model_kneighbors.predict(X_valid) print(r2_score(y_valid, y_pred)) print("") print("Voting Regressor:") model_voting = VotingRegressor([('neighbors', KNeighborsRegressor(50)), ('forest', RandomForestRegressor(n_estimators=100, min_samples_leaf=20)), ('svr', SVR(kernel='rbf')), ('neural', MLPRegressor(hidden_layer_sizes=(4, 5), activation='logistic',
def mlp(x_train, y_train, x_test): model = MLPRegressor() # LassoLarsCV自动调节alpha可以实现选择最佳的alpha model.fit(x_train, y_train) # 线性回归建模 predicted = model.predict(x_test) return (predicted)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y3, test_size=0.2, random_state=42) X_train6, X_test6, y_train6, y_test6 = train_test_split(X, y6, test_size=0.2, random_state=42) X_train9, X_test9, y_train9, y_test9 = train_test_split(X, y9, test_size=0.2, random_state=42) print("###########by using default regressor##############") model = MLPRegressor() model.fit(X_train3, y_train3) print("score for 3 month in sample: ", model.score(X_train3, y_train3)) print("score for 3 month out sample: ", model.score(X_test3, y_test3)) model.fit(X_train6, y_train6) print("score for 6 month in sample: ", model.score(X_train3, y_train3)) print("score for 6 month out sample: ", model.score(X_test6, y_test6)) model.fit(X_train9, y_train9) print("score for 9 month in sample: ", model.score(X_train3, y_train3)) print("score for 9 month out sample: ", model.score(X_test9, y_test9)) print("####################################################") data = np.delete(data.values, 0, axis=1) data = np.delete(data, 6, axis=1) data = np.delete(data, 6, axis=1) data = np.delete(data, 6, axis=1)
'weight' : 'normal', 'size' : 9} plt.rc('font', **font) fig, axes = plt.subplots(nrows=1, ncols=1) axes.set_title("Data: " + file) axes.set_ylabel('Normalized distant count') axes.set_xlabel('Distance ($\AA$)') axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0) plt.show() """ # Fit model clf.fit(X_train, y_train) # Compute and print r^2 score print(clf.score(X_test, y_test)) # Store predicted energies Ecmp = clf.predict(X_test) Ecmp = gt.hatokcal * (Ecmp) Eact = gt.hatokcal * (y_test) # Compute RMSE in kcal/mol rmse = gt.calculaterootmeansqrerror(Ecmp, Eact) # End timer _t1e = tm.time()
from __future__ import print_function, division from future.utils import iteritems from builtins import range, input # Note: you may need to update your version of future # sudo pip install -U future import numpy as np from sklearn.neural_network import MLPRegressor from util import getKaggleMNIST # get data X, _, Xt, _ = getKaggleMNIST() # create the model and train it model = MLPRegressor() model.fit(X, X) # test the model print("Train R^2:", model.score(X, X)) print("Test R^2:", model.score(Xt, Xt)) Xhat = model.predict(X) mse = ((Xhat - X)**2).mean() print("Train MSE:", mse) Xhat = model.predict(Xt) mse = ((Xhat - Xt)**2).mean() print("Test MSE:", mse)
from sklearn.pipeline import make_pipeline from sklearn.model_selection import GridSearchCV from sklearn.neural_network import MLPRegressor import pandas as pd import numpy as np # get the test data raw_data = pd.read_csv('final.csv', header=None) mask = np.random.rand(len(raw_data)) < 0.8 train_data = raw_data[mask] test_data = raw_data[~mask] x_test = test_data.iloc[:, :9] y_test = test_data.iloc[:, 9:10] # split off the features x_train = train_data.iloc[:, :9] # split of the classes y_train = train_data.iloc[:, 9:10] clf = MLPRegressor() clf.fit(x_train, y_train) pred = clf.predict(x_test)
def forecast_i_iv(): # 划分训练集和测试集的时间节点 training_end = datetime.datetime.strptime('2016-06-25', '%Y-%m-%d') training_start = datetime.datetime.strptime('2016-02-29', '%Y-%m-%d') test_end = datetime.datetime.strptime('2016-07-25', '%Y-%m-%d') start = datetime.datetime.strptime('2016-02-01', '%Y-%m-%d') all_data = [] # 存储合并之后的特征工程 training_x = [] training_y = [] test_x = [] test_y = [] # 首先合并特征工程 for i, row in feature_i.iterrows(): row['sldatime'] = ( datetime.datetime.strptime(row['sldatime'], '%Y-%m-%d') - start).days all_data.append([]) j = 0 while j < len(row): all_data[i].append(row[j]) j += 1 for i, row in feature_iv.iterrows(): j = 0 while j < len(row): all_data[i].append(row[j]) j += 1 # 划分训练集和测试集 for row in all_data: time = start + datetime.timedelta(days=row[6]) if training_start < time < training_end: training_x.append(row) training_y.append(row[8]) elif training_end < time < test_end: test_x.append(row) test_y.append(row[8]) # 初始化result result = [] id = 0 while id < len(test_x): result.append([]) result[id].append(test_x[id][0]) result[id].append(test_x[id][6]) result[id].append(test_x[id][8] / 1000) id += 1 history_dict = get_pluno_dict() # 重复预测7次 day = 0 while day < 7: # 预测 # clf = RandomForestClassifier() # linear,poly,rbf # clf = SVR(kernel="poly") # clf = tree.DecisionTreeClassifier(criterion='entropy') clf = MLPRegressor() clf.fit(training_x, training_y) predict_y = clf.predict(test_x) i = 0 while i < len(test_x): # 保存到结果字典result中 pluno = test_x[i][0] date_str = datetime.datetime.strftime( start + datetime.timedelta(days=test_x[i][6]), '%Y-%m-%d') test_x[i][8] = predict_y[i] result[i].append(predict_y[i] / 1000) # 当预测的是当天时,其他特征量不用更新,直接添加到训练集中即可 if day == 0: training_x.append(test_x[i]) training_y.append(test_x[i][8]) if day > 0: # 更新时间序列字典 history_dict[pluno][date_str] += predict_y[i] rec = test_x[i] # 更新特征量d-1/d-7 j = 0 lastweek = last_week_list(date_str) for date in lastweek: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime(date, '%Y-%m-%d') if this_date > min_date: rec[9 + j] = history_dict[pluno][date] else: rec[9 + j] = 0.0 j += 1 # 更新avg、max、min week_list = past_week_list(date_str) avg = 0.0 max = 0.0 min = float('inf') week_index = 0 # 遍历前2、3、4周 for week in week_list: # 遍历一周中的每一天 for date in week: min_date = datetime.datetime.strptime( '2016-02-01', '%Y-%m-%d') this_date = datetime.datetime.strptime( date, '%Y-%m-%d') if this_date > min_date: avg += history_dict[pluno][date] if history_dict[pluno][date] > max: max = history_dict[pluno][date] if history_dict[pluno][date] < min: min = history_dict[pluno][date] else: min = 0.0 avg = avg / 7 rec[16 + 3 * week_index] = avg rec[17 + 3 * week_index] = max rec[18 + 3 * week_index] = min avg = 0.0 max = 0.0 min = float('inf') week_index += 1 # 更新所有特征量添加到训练集中 training_x.append(rec) training_y.append(rec[8]) i += 1 # 更新日期进行下次预测 for row in test_x: row[6] += 1 date = datetime.datetime.strftime( start + datetime.timedelta(days=row[6]), '%Y-%m-%d') if is_weekday(date): row[7] = 1 else: row[7] = 0 day += 1 # 将预测结果写入csv head = [ 'pluno', 'time', 'qty', 'd', 'd+1', 'd+2', 'd+3', 'd+4', 'd+5', 'd+6' ] # 创建文件对象 path = "MLP/forecast_i_iv.csv" f = open(path, 'w', encoding='utf-8', newline='' "") # 基于文件对象构建 csv写入对象 csv_writer = csv.writer(f) # 构建列表头 csv_writer.writerow(head) # 创建每一行数据 for row in result: csv_writer.writerow(row) # 关闭文件 f.close()
#Example with a Regressor using the scikit-learn library # example for the XOr gate from sklearn.neural_network import MLPRegressor X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11 y = [0, 1, 1, 0] # outputs for each one of the entries # check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor #for more details reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant', max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False) reg.fit(X,y) outp = reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]]) print'Results:' print '0 0 0:', outp[0] print '0 1 1:', outp[1] print '1 0 1:', outp[2] print '1 1 0:', outp[0] print'Score:', reg.score(X, y)
# train/test split, LOSO X_train = feat[:len(feat_train)] X_test = feat[len(feat_train):] y_train = vad[:len(vad_train)] y_test = vad[len(vad_train):] # batch_size=min(200, n_samples) # layers (256, 128, 64, 32, 16) nn = MLPRegressor( hidden_layer_sizes=(256, 256, 128, 64, 32, 16), activation='logistic', solver='adam', alpha=0.0001, learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=180, shuffle=True, random_state=1, verbose=1, warm_start=True, momentum=0.9, nesterovs_momentum=True, early_stopping=True, validation_fraction=0.2, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) nn = nn.fit(X_train, y_train) y_predict = nn.predict(X_test) ccc = [] for i in range(0, 3): ccc_, _, _ = calc_scores(y_predict[:, i], y_test[:, i]) ccc.append(ccc_) #print("# ", ccc) print(ccc) print(np.mean(ccc)) #Results speaker-dependent: # 0.4874353476028858 # 0.6822788332623598 # 0.5516393803700689
# of the screen (its hard to keep your hand in the same # spot when training). N_FEATURES = 10 N_LABELS = 4 # Prepare the data data = pd.read_csv('data/walk00_raw_points.csv').values X, y = dutils.prepare_data_imputed_norm(data[:, :N_FEATURES], data[:, N_FEATURES:]) # Split train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Build the model model = MLPRegressor( hidden_layer_sizes=( 16, 42, ), activation='relu', solver='adam', ) # Fit the model model.fit(X_train, y_train) # Evaluate the model print(f'R^2={dutils.score_avg_rquared(model, X_test, y_test)}') print(f'MSE={dutils.score_avg_mse(model, X_test, y_test)}')
def regression(N, P): assert len(N) == len(P) clf = MLPRegressor(hidden_layer_sizes=(15, ), activation='relu', algorithm='adam', alpha=0.0001) clf.fit (N, P) return clf