def create_error_models(self): """ In order to calculate the total error of the individual GPs an error model is created for each GP. The inputs to this are the error between the individual GP predictions and the truth value at all available truth data points. The prior_error value is subtracted from the difference to ensure that the points are centred around 0. """ gp_error_models = [] for i in range(self.num_models): gpmodel_mean, gpmodel_var = self.gp_models[i].predict_var( self.x_true) gpmodel_mean = gpmodel_mean * self.model_std[i] + self.model_mean[i] error = np.abs(self.y_true - gpmodel_mean) self.err_mean.append(np.mean(error)) self.err_std.append(np.std(error)) if self.err_std[i] == 0: self.err_std[i] = 1 new_model = gp_model(self.x_true, (error - self.err_mean[i]) / self.err_std[i], self.err_model_hp["l"][i], self.err_model_hp["sf"][i], self.err_model_hp["sn"][i], self.num_dim, self.kernel) gp_error_models.append(new_model) return gp_error_models
def setup(self): data = pd.read_excel('data/rve_data.xlsx') data.iloc[:, 0] = (data.iloc[:, 0] - 650) / 200 data.iloc[:, 2] = data.iloc[:, 2] / 3 data.iloc[:, 3] = data.iloc[:, 3] / 2 self.mean = np.mean(data.iloc[:, 5]) self.std = np.std(data.iloc[:, 5]) data.iloc[:, 5] = (data.iloc[:, 5] - self.mean) / self.std self.gp = gp_model( data.iloc[:, 0:4], data.iloc[:, 5], np.array([0.12274117, 0.08612411, 0.65729583, 0.23342798]), 0.16578065, 0.1, 4, 'SE')
def create_gps(self): """ GPs need to be created for each of the lower dimension information sources as used in the reification method. These can be multi-dimensional models. As a result, the x_train and y_train data needs to be added to the class as a list of numpy arrays. """ gp_models = [] for i in range(self.num_models): new_model = gp_model( self.x_train[i], (self.y_train[i] - self.model_mean[i]) / self.model_std[i], self.model_hp["l"][i], self.model_hp["sf"][i], self.model_hp["sn"][i], self.num_dim, self.kernel) gp_models.append(new_model) return gp_models
def setup(self): data = pd.read_excel("data/tc_data.xlsx") x_train = np.array(data.iloc[:, 1:5]) x_train[:, 0] = (x_train[:, 0] - 650) / 200 x_train[:, 1] = 100 * x_train[:, 1] x_train[:, 2] = 100 * x_train[:, 2] / 2 x_train[:, 3] = 100 * x_train[:, 3] / 3 l_param_list = [[ np.sqrt(0.28368), np.sqrt(0.44255), np.sqrt(0.19912), np.sqrt(5.48465) ], [ np.sqrt(2.86816), np.sqrt(2.57049), np.sqrt(0.64243), np.sqrt(94.43864) ], [ np.sqrt(6.41552), np.sqrt(12.16391), np.sqrt(7.16226), np.sqrt(27.87327) ], [ np.sqrt(34.57352), np.sqrt(12.83549), np.sqrt(4.73291), np.sqrt(275.83489) ]] sf_list = [4 * 1.57933, 4 * 5.5972, 4 * 78.32377, 4 * 14.79803] for k in range(4): self.y_mean.append(np.mean(np.array(data.iloc[:, k + 5]))) self.y_max.append(np.max(np.array(data.iloc[:, k + 5]))) self.y_std.append(np.std(np.array(data.iloc[:, k + 5]))) y_train = (np.array(data.iloc[:, k + 5]) - self.y_mean[k]) / self.y_std[k] l_param = l_param_list[k] sf = sf_list[k] self.tc_gp.append( gp_model(x_train, y_train, np.array(l_param), sf, 0.05, 4, 'M52'))
def create_fused_GP(self, x_test, l_param, sigma_f, sigma_n, kernel): model_mean = [] model_var = [] for i in range(len(self.gp_models)): m_mean, m_var = self.gp_models[i].predict_var(x_test) m_mean = m_mean * self.model_std[i] + self.model_mean[i] m_var = m_var * (self.model_std[i]**2) model_mean.append(m_mean) err_mean, err_var = self.gp_err_models[i].predict_var(x_test) err_mean = err_mean * self.err_std[i] + self.err_mean[i] model_var.append((err_mean)**2 + m_var) fused_mean, fused_var = reification(model_mean, model_var) self.fused_y_mean = np.mean(fused_mean[0:400:12]) self.fused_y_std = np.std(fused_mean[0:400:12]) if self.fused_y_std == 0: self.fused_y_std = 1 fused_mean = (fused_mean - self.fused_y_mean) / self.fused_y_std self.fused_GP = gp_model(x_test[0:400:12], fused_mean[0:400:12], l_param, sigma_f, abs(fused_var[0:400:12])**(0.5), self.num_dim, kernel) return self.fused_GP
def test_fit(self): data = pd.read_excel('data/rve_data.xlsx') data_1 = deepcopy(data) data.iloc[:, 0] = (data.iloc[:, 0] - 650) / 200 data.iloc[:, 2] = data.iloc[:, 2] / 3 data.iloc[:, 3] = data.iloc[:, 3] / 2 test_data = [[], [], [], [], [], [], [], [], [], []] train_data = [[], [], [], [], [], [], [], [], [], []] count = 1 while count <= 1500: new_num = np.random.randint(0, 1522) if (new_num not in test_data[0]) and (len(test_data[0]) < 150): test_data[0].append(new_num) count += 1 elif (new_num not in test_data[1]) and (len(test_data[1]) < 150): test_data[1].append(new_num) count += 1 elif (new_num not in test_data[2]) and (len(test_data[2]) < 150): test_data[2].append(new_num) count += 1 elif (new_num not in test_data[3]) and (len(test_data[3]) < 150): test_data[3].append(new_num) count += 1 elif (new_num not in test_data[4]) and (len(test_data[4]) < 150): test_data[4].append(new_num) count += 1 elif (new_num not in test_data[5]) and (len(test_data[5]) < 150): test_data[5].append(new_num) count += 1 elif (new_num not in test_data[6]) and (len(test_data[6]) < 150): test_data[6].append(new_num) count += 1 elif (new_num not in test_data[7]) and (len(test_data[7]) < 150): test_data[7].append(new_num) count += 1 elif (new_num not in test_data[8]) and (len(test_data[8]) < 150): test_data[8].append(new_num) count += 1 elif (new_num not in test_data[9]) and (len(test_data[9]) < 150): test_data[9].append(new_num) count += 1 for i in range(1522): if i not in test_data[0]: train_data[0].append(i) if i not in test_data[1]: train_data[1].append(i) if i not in test_data[2]: train_data[2].append(i) if i not in test_data[3]: train_data[3].append(i) if i not in test_data[4]: train_data[4].append(i) if i not in test_data[5]: train_data[5].append(i) if i not in test_data[6]: train_data[6].append(i) if i not in test_data[7]: train_data[7].append(i) if i not in test_data[8]: train_data[8].append(i) if i not in test_data[9]: train_data[9].append(i) test_data = np.array(test_data) train_data = np.array(train_data) self.mean = np.mean(data.iloc[:, 5]) self.std = np.std(data.iloc[:, 5]) data.iloc[:, 5] = (data.iloc[:, 5] - self.mean) / self.std results = np.zeros((1500, 2)) for i in range(10): self.gp = gp_model( data.iloc[train_data[i], [0, 1, 2, 3]], data.iloc[train_data[i], 5], [0.12274117, 0.08612411, 0.65729583, 0.23342798], 0.16578065, 0.1, 4, 'SE') out = self.predict( np.array(data_1.iloc[test_data[i], [0, 1, 2, 3]])) results[i * 150:(i + 1) * 150, 0] = out results[i * 150:(i + 1) * 150, 1] = data.iloc[test_data[i], 5] * self.std + self.mean self.setup() results_all = np.zeros((1522, 2)) results_all[:, 1] = data.iloc[:, 5] * self.std + self.mean results_all[:, 0] = self.predict(np.array(data_1.iloc[:, [0, 1, 2, 3]])) return results, results_all