def test_grrn_exceptions(self): with self.assertRaises(ValueError): # size of target data not the same as size of # input data. grnet = algorithms.GRNN(verbose=False) grnet.train(np.array([[0], [0]]), np.array([0])) with self.assertRaises(ValueError): # 2 features for target data grnet = algorithms.GRNN(verbose=False) grnet.train(np.array([[0], [0]]), np.array([[0, 0]])) with self.assertRaises(ValueError): # invalid feature size for prediction data grnet = algorithms.GRNN(verbose=False) grnet.train(np.array([[0], [0]]), np.array([0])) grnet.predict(np.array([[0]])) with self.assertRaises(NotTrained): # Prediction without training grnet = algorithms.GRNN(verbose=False) grnet.predict(np.array([[0]])) with self.assertRaises(ValueError): # different number of features for # train and test data grnet = algorithms.GRNN(verbose=False) grnet.train(np.array([[0]]), np.array([0])) grnet.predict(np.array([[0, 0]]))
def test_grid_search(self): def scorer(network, X, y): y = asfloat(y) result = asfloat(network.predict(X)) return self.eval(errors.rmsle(result[:, 0], y)) dataset = datasets.load_diabetes() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, test_size=0.3 ) grnnet = algorithms.GRNN(std=0.5, verbose=False) grnnet.train(x_train, y_train) error = scorer(grnnet, x_test, y_test) self.assertAlmostEqual(0.513, error, places=3) random_search = model_selection.RandomizedSearchCV( grnnet, param_distributions={'std': np.arange(1e-2, 0.1, 1e-4)}, n_iter=10, scoring=scorer, random_state=self.random_seed, cv=3, ) random_search.fit(dataset.data, dataset.target) scores = random_search.cv_results_ best_score = min(scores['mean_test_score']) self.assertAlmostEqual(0.4266, best_score, places=3)
def test_simple_grnn(self): dataset = datasets.load_diabetes() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, train_size=0.7 ) x_train_before = x_train.copy() x_test_before = x_test.copy() y_train_before = y_train.copy() grnnet = algorithms.GRNN(std=0.1, verbose=False) grnnet.train(x_train, y_train) result = grnnet.predict(x_test) error = rmsle(result, y_test) old_result = result.copy() self.assertAlmostEqual(error, 0.4245, places=4) # Test problem with variable links np.testing.assert_array_equal(x_train, x_train_before) np.testing.assert_array_equal(x_test, x_test_before) np.testing.assert_array_equal(y_train, y_train_before) x_train[:, :] = 0 result = grnnet.predict(x_test) total_classes_prob = np.round(result.sum(axis=1), 10) np.testing.assert_array_almost_equal(result, old_result)
def test_grid_search(self): def scorer(network, X, y): result = network.predict(X) return rmsle(result[:, 0], y) dataset = datasets.load_diabetes() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, train_size=0.7) grnnet = algorithms.GRNN(std=0.5, verbose=False) grnnet.train(x_train, y_train) error = scorer(grnnet, x_test, y_test) self.assertAlmostEqual(0.513, error, places=3) random_search = grid_search.RandomizedSearchCV( grnnet, param_distributions={'std': np.arange(1e-2, 0.1, 1e-4)}, n_iter=10, scoring=scorer) random_search.fit(dataset.data, dataset.target) scores = random_search.grid_scores_ best_score = min(scores, key=itemgetter(1)) self.assertAlmostEqual(0.4303, best_score[1], places=3)
def test_simple_grnn(self): dataset = datasets.load_diabetes() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, train_size=0.7 ) x_train_before = x_train.copy() x_test_before = x_test.copy() y_train_before = y_train.copy() grnnet = algorithms.GRNN(std=0.1, verbose=False) grnnet.train(x_train, y_train) result = grnnet.predict(x_test) error = metrics.mean_absolute_error(result, y_test) old_result = result.copy() self.assertAlmostEqual(error, 46.3358, places=4) # Test problem with variable links np.testing.assert_array_equal(x_train, x_train_before) np.testing.assert_array_equal(x_test, x_test_before) np.testing.assert_array_equal(y_train, y_train_before) x_train[:, :] = 0 result = grnnet.predict(x_test) np.testing.assert_array_almost_equal(result, old_result)
def test_predict_different_inputs(self): grnnet = algorithms.GRNN(verbose=False) data = np.array([[1, 2, 3]]).T target = np.array([[1, 2, 3]]).T grnnet.train(data, target) self.assertInvalidVectorPred(grnnet, data.ravel(), target, decimal=2)
def test_grnn(g): """预测模型""" train_data, test_data = feature_engineering() gn = algorithms.GRNN(std=g) x = train_data.iloc[:, :-1] y = train_data.iloc[:, -1] gn.train(x, y) y_predicted = gn.predict(test_data) return y_predicted
def test_transfrom_method(self): dataset = datasets.load_diabetes() grnnet = algorithms.GRNN(std=0.5, verbose=False) grnnet.train(dataset.data, dataset.target) y_predicted = grnnet.predict(dataset.data) y_transformed = grnnet.transform(dataset.data) np.testing.assert_array_almost_equal(y_predicted, y_transformed)
def GRNN_train(family, prop): """This function is used to predict properties by using the General Regression Neural Network model.""" train, test = df_prediction(family, prop) ###create data for train and test x_train = train[train.columns[4:]] ###select functional groups y_train = train[prop] ###select prop groups scaler = MinMaxScaler(feature_range=(0, 1)) #Rescale model rescaledX = scaler.fit_transform(x_train) #Rescale x np.set_printoptions(precision=4) # summarize transformed data for x,, and also set up the descimal place of the value grnn = algorithms.GRNN(std=0.4,verbose=False,) #Set up the model grnn.train(x_train, y_train) #Train the model return grnn
def test_handle_errors(self): with self.assertRaises(ValueError): # Wrong: size of target data not the same as size of # input data. algorithms.GRNN(verbose=False).train(np.array([[0], [0]]), np.array([0])) with self.assertRaises(ValueError): # Wrong: 2-D target vector (must be 1-D) algorithms.GRNN(verbose=False).train(np.array([[0], [0]]), np.array([[0]])) with self.assertRaises(AttributeError): # Wrong: can't use iterative learning process for this # algorithm algorithms.GRNN(verbose=False).train_epoch() with self.assertRaises(ValueError): # Wrong: invalid feature size for prediction data grnet = algorithms.GRNN(verbose=False) grnet.train(np.array([[0], [0]]), np.array([0])) grnet.predict(np.array([[0]]))
def train_grnn(g, k=4): """训练模型""" train_data, test_data = feature_engineering() x = train_data.iloc[:, :-1] y = train_data.iloc[:, -1] rmse_list = [] for i in range(k): x_train, y_train, x_test, y_test = get_k_fold_data(k, i, x, y) gn = algorithms.GRNN(std=g) gn.train(x_train, y_train) y_predicted = gn.predict(x_test) rmse = math.sqrt(np.mean((y_predicted - np.array(y_test))**2)) rmse_list.append(rmse) return sum(rmse_list) / len(rmse_list)
def GRNN_Predict_Model(X, Y, std=0.02): scaler = preprocessing.MinMaxScaler() arima_values = scaler.fit_transform(X) origin_values = scaler.fit_transform(Y.reshape((-1, 1))) x_train, x_test, y_train, y_test = train_test_split(arima_values, origin_values, train_size=0.7, random_state=0) nw = algorithms.GRNN(std=std, verbose=False) nw.train(x_train, y_train) # y_Predict = nw.predict(x_test) GRNN_Predict = nw.predict(arima_values) origin_values_inverse = scaler.inverse_transform(origin_values) GRNN_Predict_inverse = scaler.inverse_transform(GRNN_Predict) return GRNN_Predict_inverse
def train_model(g): """训练模型 Parameters: ---------- g: 待优化的光滑因子 return: 返回的是预测值与真实值 """ environment.reproducible() df, norm_eigen, norm_target = read_csv() x_train = norm_eigen[:10] y_train = norm_target[:10] x_test = norm_eigen[10:] y_test = norm_target[10:] gn = algorithms.GRNN(std=g) gn.train(x_train, y_train) y_predicted = gn.predict(x_train) return y_predicted, y_train
def test_grnn(g, file_name): '''验证学习后的PSO_GRNN结果 Parameters: ---------- g: 优化好的光滑因子 file_name: 读取的文件名 return: 返回的是预测值 ''' norm_eigen, norm_target = read_csv(file_name) x_train = norm_eigen[:18] y_train = norm_target[:18] x_test = norm_eigen[24:] y_test = norm_target[24:] gn = algorithms.GRNN(std=g, verbose=False) gn.train(x_train, y_train) y_predicted = gn.predict(x_test) return y_predicted, y_test
def train_grnn(g, file_name): '''将样本集划分为训练集,验证集和测试集 Parameters: ---------- g: 待优化的光滑因子 file_name: 读取的文件名 return: 返回的是预测值与真实值 ''' norm_eigen, norm_target = read_csv(file_name) x_train = norm_eigen[:18] y_train = norm_target[:18] x_test = norm_eigen[18:24] y_test = norm_target[18:24] gn = algorithms.GRNN(std=g) gn.train(x_train, y_train) y_predicted = gn.predict(x_test) return y_predicted, y_test
def test_model(g): '''验证学习后的PSO_GRNN结果 Parameters: ---------- g: 优化好的光滑因子 return: 返回的是预测值 ''' df, norm_eigen, norm_target = read_csv() x_train = norm_eigen[:10] y_train = norm_target[:10] x_test = norm_eigen[10:] y_test = norm_target[10:] gn = algorithms.GRNN(std=g, verbose=False) gn.train(x_train, y_train) y_predicted = gn.predict(x_test) normalize_value = y_predicted * ( df.iloc[:, 10].max() - df.iloc[:, 10].min()) + df.iloc[:, 10].min() return normalize_value
def fromPN(cls, pos_train, neg_train, z_trains, y_train, models=None): ''' a neural network will be trained with z_trains, y_train Arguments: pos_train, neg_train same in super class z_trains {List[DateFrame]} y_train {Array} Returns: ZeroOneSemiNaiveBayesClassifier ''' sbc = super(ZeroOneHemiNaiveBayesClassifier, cls).fromPN(pos_train, neg_train) if models is None or models == 'grnn': sbc.models = [ algorithms.GRNN(std=np.std([ a for a in z_train.values.ravel() if str(a) != 'nan' and a != 0 ]), verbose=False) for z_train in z_trains ] elif models == 'pnn': sbc.models = [ algorithms.PNN(std=np.std([ a for a in z_train.values.ravel() if str(a) != 'nan' and a != 0 ]), verbose=False) for z_train in z_trains ] elif models == 'svm': sbc.models = [svm.SVC(kernel='rbf') for z_train in z_trains] elif models == 'lasso': sbc.models = [LassoLars() for z_train in z_trains] else: sbc.models = [ copy.deepcopy(model_dict[model]) if isinstance(model, str) else copy.deepcopy(model) for model in models ] sbc.features2 = [z_train.columns for z_train in z_trains] sbc.fit(z_trains, y_train) return sbc
def fromPN(cls, pos_train1, neg_train1, z_train, y_train): ''' a neural network will be trained with z_train, y_train Arguments: pos_train1, neg_train1 same in super class z_train {DateFrame} y_train {Array} Returns: ZeroOneSemiNaiveBayesClassifier ''' sbc = super(ZeroOneSemiNaiveBayesClassifier, cls).fromPN(pos_train1, neg_train1) nn = algorithms.GRNN(std=np.std(z_train.values), verbose=False) nn.train(z_train, y_train) sbc.model = nn sbc.features2 = z_train.columns return sbc
def test_mixture_of_experts_init_networks_exceptions(self): networks = self.networks with self.assertRaises(ValueError): # Invalid network (not GradientDescent) algorithms.MixtureOfExperts( networks=networks + [algorithms.GRNN(verbose=False)], gating_network=algorithms.GradientDescent( layers.Input(1) > layers.Sigmoid(3), verbose=False, )) with self.assertRaises(ValueError): # Invalid number of outputs in third network algorithms.MixtureOfExperts( networks=networks + [ algorithms.GradientDescent( (1, 20, 2), step=0.2, verbose=False) ], gating_network=algorithms.GradientDescent( layers.Input(1) > layers.Sigmoid(3), verbose=False, )) with self.assertRaises(ValueError): # Invalid network error function algorithms.MixtureOfExperts( networks=networks + [ algorithms.GradientDescent( (1, 20, 1), step=0.2, error='rmsle', verbose=False, ) ], gating_network=algorithms.GradientDescent( layers.Input(1) > layers.Sigmoid(3), verbose=False, ), )
def GRNN(data_model): x_train, x_test, y_train, y_test = create_dataset(data_model['2017']) grnnet = algorithms.GRNN(std=0.5, verbose=True) grnnet.train(x_train, y_train) error = scorer(grnnet, x_test, y_test) print("GRNN RMSLE = {:.3f}\n".format(error)) part_to_predict = data_model['2018'].copy() df_test = part_to_predict.copy() index_predict = df_test.index df_test.reset_index(inplace=True) df_test.drop(["Date"], axis=1, inplace=True) # fix random seed for reproducibility pd.np.random.seed(7) X = df_test.drop([pr.PowerPV], axis=1) y = df_test.drop([x for x in df_test.columns if x not in [pr.PowerPV]], axis=1) pred = grnnet.predict(X) prediction_to_plot = pd.DataFrame(index=index_predict, data={ 'observed': pd.np.array(y[pr.PowerPV]), 'predicted': pred.reshape(pred.shape[0], ) }) pr.plot_data(prediction_to_plot['2018-04-01':'2018-04-05'], prediction_to_plot.columns, 1) print("Run Random Search CV") grnnet.verbose = False random_search = grid_search.RandomizedSearchCV( grnnet, param_distributions={'std': np.arange(1e-2, 1, 1e-4)}, n_iter=400, scoring=scorer, ) random_search.fit( data_model[[x for x in df_test.columns if x not in [pr.PowerPV]]], data_model[pr.PowerPV]) report(random_search.grid_scores_)
def Find_best_smoothing_factor(x_train, x_test, y_train, y_test): list = [] for x in np.linspace(0.01, 0.2, 40): SmoRMSE = {"smoothing_factor": "", "rmse": ""} nn = algorithms.GRNN(std=x, verbose=False) nn.train(x_train, y_train) y_pred = nn.predict(x_test) # print(x) RMSE = np.sqrt(mean_squared_error(y_pred, y_test)) SmoRMSE["smoothing_factor"] = x SmoRMSE["rmse"] = RMSE list.append(SmoRMSE) SmoRMSE = pd.DataFrame(list) plt.plot(SmoRMSE.iloc[:, 1], SmoRMSE.iloc[:, 0]) plt.title(" Find best smoothing factor for GRNN ") plt.xlabel('smoothing factor') plt.ylabel('RMSE') plt.show() a = SmoRMSE.ix[SmoRMSE["rmse"].idxmin()] print(a) best_smoothing_factor = a[1:2].values return best_smoothing_factor
############ auto-encoder ############### encoded_X_combine = train_encoder(X_combine,wn) print(encoded_X_combine.shape) X1 = encoded_X_combine[0:10778,:] X2 = encoded_X_combine[10778:,:] print(X1.shape) print(X2.shape) # rbf = RBFNet(k=40) # rbf.fit(X1,y) # prediction = rbf.predict(X2) ##################GRNN####################### nw = algorithms.GRNN(std=0.015, verbose=False) nw.train(X1, y) error = scorer(nw,X2,y_valid) print("GRNN RMSLE = {:.3f}\n".format(error)) prediction = nw.predict(X2) real = y_valid.flatten() pred = prediction.flatten() for i in range(len(y_valid)): print('real :', real[i]) print('prediction:', pred[i]) print('*' * 30) diff = abs(real - pred)
def scorer(network, X, y): result = network.predict(X) return rmsle(result, y) def report(results, n_top=3): ranks = heapq.nlargest(n_top, results['rank_test_score']) for i in ranks: candidates = np.flatnonzero(results['rank_test_score'] == i) for candidate in candidates: print("Mean validation score: {0:.3f} (std: {1:.3f})".format( results['mean_test_score'][candidate], results['std_test_score'][candidate])) print("Parameters: {0}".format(results['params'][candidate])) print("") print("Run Random Search CV") dataset = datasets.load_diabetes() random_search = RandomizedSearchCV( algorithms.GRNN(std=0.1, verbose=False), param_distributions={'std': np.arange(1e-2, 1, 1e-3)}, n_iter=100, cv=3, scoring=scorer, ) random_search.fit(dataset.data, dataset.target) report(random_search.cv_results_)
def test_handle_errors(self): networks = [ algorithms.GradientDescent((1, 20, 1), step=0.2, verbose=False), algorithms.GradientDescent((1, 20, 1), step=0.2, verbose=False), ] with self.assertRaises(ValueError): # Ivalid network (not GradientDescent) algorithms.MixtureOfExperts( networks=networks + [algorithms.GRNN(verbose=False)], gating_network=algorithms.GradientDescent( layers.Sigmoid(1) > layers.Output(3), verbose=False, )) with self.assertRaises(ValueError): # Ivalid number of outputs in third network algorithms.MixtureOfExperts( networks=networks + [ algorithms.GradientDescent( (1, 20, 2), step=0.2, verbose=False) ], gating_network=algorithms.GradientDescent( layers.Sigmoid(1) > layers.Output(3), verbose=False, )) with self.assertRaises(ValueError): # Ivalid gating network output layer size algorithms.MixtureOfExperts( networks=networks, gating_network=algorithms.GradientDescent( layers.Softmax(1) > layers.Output(1), verbose=False, )) with self.assertRaises(ValueError): # Ivalid gating network input layer algorithms.MixtureOfExperts( networks=networks, gating_network=algorithms.GradientDescent( layers.Sigmoid(1) > layers.Output(2), verbose=False, )) with self.assertRaises(ValueError): # Ivalid gating network output layer algorithms.MixtureOfExperts( networks=networks, gating_network=algorithms.GradientDescent( layers.Softmax(1) > layers.RoundedOutput(2), verbose=False, )) with self.assertRaises(ValueError): # Ivalid network error function algorithms.MixtureOfExperts( networks=networks + [ algorithms.GradientDescent( (1, 20, 1), step=0.2, error='rmsle', verbose=False, ) ], gating_network=algorithms.GradientDescent( layers.Sigmoid(1) > layers.Output(3), verbose=False, ), ) with self.assertRaises(ValueError): moe = algorithms.MixtureOfExperts( # Ivalid gating error function networks=networks, gating_network=algorithms.GradientDescent( layers.Softmax(1) > layers.Output(2), error='rmsle', verbose=False), ) moe = algorithms.MixtureOfExperts( # Ivalid gating network output layer networks=networks, gating_network=algorithms.GradientDescent( layers.Softmax(1) > layers.Output(2), verbose=False), ) with self.assertRaises(ValueError): # Wrong number of train input features moe.train(np.array([[1, 2]]), np.array([[0]])) with self.assertRaises(ValueError): # Wrong number of train output features moe.train(np.array([[1]]), np.array([[0, 0]]))
environment.reproducible() # best_model = None # best_error = 99 # for std_in in np.array([0.01, 0.1, 0.5, 0.8, 1.0, 1.2, 1.4, 2.0, 5.0, 10.0]): # grnnet = algorithms.GRNN(std=std_in, verbose=True) # grnnet.train(training_X, training_Y) # predicted = grnnet.predict(testing_X) # error = scorer(testing_Y, predicted) # print("GRNN RMSE = {:.5f}\n".format(error)) # if error < best_error: # print("New Best Error Found: " + str(error)) # best_error = error # best_model = grnnet grnnet2 = algorithms.GRNN(std=0.2, verbose=True) grnnet2.train(training_X, training_Y) y_predicted = grnnet2.predict(testing_X) print("RMSE = " + str(estimators.rmse(y_predicted, testing_Y.ravel()))) print("MAE = " + str(estimators.mae(y_predicted, testing_Y.ravel()))) actual_mae = y_data_scaler.inverse_transform( estimators.mae(y_predicted, testing_Y)) print("MAE (no. of shares) = " + str(actual_mae.squeeze())) # Save the best GRNN model import _pickle with open( '/home/pier/Machine_Learning/KE5206NN/regression/regression_models/grnn.pkl', 'wb') as fid: _pickle.dump(grnnet2, fid)
def test_train_different_inputs(self): self.assertInvalidVectorTrain( algorithms.GRNN(verbose=False), np.array([1, 2, 3]), np.array([1, 2, 3]) )
y_train2, batch_size=128, epochs=3, validation_split=0.05) def plot_predict(predicted_data, true_data): fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(true_data, label='True Data') plt.plot(predicted_data, label='Prediction') plt.title('LSTM model loss') plt.legend() plt.show() #plot train and validation for LSTM model lstm_model_preds = lstm_model.predict(X_test2) lstm_model_rmse = np.sqrt(mean_squared_error(lstm_model_preds, y_test2)) plot_predict(lstm_model_preds, y_test2) plotHistory(lstm_model_history, "LSTM Model", "loss", "epochs") plotHistory(ann_model_history, "ANN Model", "lose", "epochs") from neupy import algorithms, estimators, environment environment.reproducible() grnn_model = algorithms.GRNN(std=0.1, verbose=False) grnn_model.train(X_train, y_train) grnn_predicted = grnn_model.predict(X_test) grnn_model_rmse = estimators.rmse(grnn_predicted, y_test) print('ANN RMSE', ann_model_rmse) print('LSTM RMSE', lstm_model_rmse) print('GRNN RMSE', grnn_model_rmse)
def create_grnn(sigma=0.1, verbose=False): grnn = algorithms.GRNN(std=sigma, verbose=verbose) return grnn
df = df[df[' timedelta'] > 60] #Conduct PCA data = df[df.columns[2:60]] target = df[' shares'].ravel() data_norm = StandardScaler().fit_transform(data) x_train, x_test, y_train, y_test = train_test_split(data_norm, target, test_size=0.3) network = architectures.mixture_of_experts([ algorithms.GRNN(std=0.5, verbose=True), algorithms.PNN(std=0.1, verbose=True), ]) network.train(x_train, y_train, epochs=500) result = network.predict(x_test) error = estimators.rmse(result, y_test) print("GRNN RMSE = {}\n".format(error)) r2_score = metrics.r2_score(result, y_test) print("GRNN R_SCORE = {}\n".format(r2_score))
epochs = 10000 # epochs for training ## Data Processing # load data x_train, x_test, y_train_do, y_test_do, scaler_do_y = prepare_do() print('x_train:{}'.format(x_train.shape)) print('x_test:{}'.format(x_test.shape)) print('y_train_do:{}'.format(y_train_do.shape)) print('x_train_do:{}'.format(y_test_do.shape)) ## GRNN Model nw = algorithms.GRNN(std=1, verbose=False) nw.train(x_train, y_train_do) y_pred = nw.predict(x_test) print(y_pred) # # Example 2: use Anisotropic GRNN with Limited-Memory BFGS algorithm to select the optimal bandwidths # AGRNN = GRNN() # AGRNN.fit(x_train, y_train_do.ravel()) # sigma=AGRNN.sigma # y_pred = AGRNN.predict(X_test) # x_test_ori = data_scaler.inverse_transform(x_test.reshape(-1, n_memory_steps)) # y_test_ori = scaler_do_y.inverse_transform(y_test_do) y_predicted = scaler_do_y.inverse_transform(y_pred)