Esempio n. 1
0
 def create_error_models(self):
     """
     In order to calculate the total error of the individual GPs an error
     model is created for each GP. The inputs to this are the error between
     the individual GP predictions and the truth value at all available truth
     data points. The prior_error value is subtracted from the difference to
     ensure that the points are centred around 0.
     """
     gp_error_models = []
     for i in range(self.num_models):
         gpmodel_mean, gpmodel_var = self.gp_models[i].predict_var(
             self.x_true)
         gpmodel_mean = gpmodel_mean * self.model_std[i] + self.model_mean[i]
         error = np.abs(self.y_true - gpmodel_mean)
         self.err_mean.append(np.mean(error))
         self.err_std.append(np.std(error))
         if self.err_std[i] == 0:
             self.err_std[i] = 1
         new_model = gp_model(self.x_true,
                              (error - self.err_mean[i]) / self.err_std[i],
                              self.err_model_hp["l"][i],
                              self.err_model_hp["sf"][i],
                              self.err_model_hp["sn"][i], self.num_dim,
                              self.kernel)
         gp_error_models.append(new_model)
     return gp_error_models
Esempio n. 2
0
 def setup(self):
     data = pd.read_excel('data/rve_data.xlsx')
     data.iloc[:, 0] = (data.iloc[:, 0] - 650) / 200
     data.iloc[:, 2] = data.iloc[:, 2] / 3
     data.iloc[:, 3] = data.iloc[:, 3] / 2
     self.mean = np.mean(data.iloc[:, 5])
     self.std = np.std(data.iloc[:, 5])
     data.iloc[:, 5] = (data.iloc[:, 5] - self.mean) / self.std
     self.gp = gp_model(
         data.iloc[:, 0:4], data.iloc[:, 5],
         np.array([0.12274117, 0.08612411, 0.65729583, 0.23342798]),
         0.16578065, 0.1, 4, 'SE')
Esempio n. 3
0
 def create_gps(self):
     """
     GPs need to be created for each of the lower dimension information sources
     as used in the reification method. These can be multi-dimensional models.
     As a result, the x_train and y_train data needs to be added to the class
     as a list of numpy arrays.
     """
     gp_models = []
     for i in range(self.num_models):
         new_model = gp_model(
             self.x_train[i],
             (self.y_train[i] - self.model_mean[i]) / self.model_std[i],
             self.model_hp["l"][i], self.model_hp["sf"][i],
             self.model_hp["sn"][i], self.num_dim, self.kernel)
         gp_models.append(new_model)
     return gp_models
Esempio n. 4
0
    def setup(self):
        data = pd.read_excel("data/tc_data.xlsx")
        x_train = np.array(data.iloc[:, 1:5])
        x_train[:, 0] = (x_train[:, 0] - 650) / 200
        x_train[:, 1] = 100 * x_train[:, 1]
        x_train[:, 2] = 100 * x_train[:, 2] / 2
        x_train[:, 3] = 100 * x_train[:, 3] / 3

        l_param_list = [[
            np.sqrt(0.28368),
            np.sqrt(0.44255),
            np.sqrt(0.19912),
            np.sqrt(5.48465)
        ],
                        [
                            np.sqrt(2.86816),
                            np.sqrt(2.57049),
                            np.sqrt(0.64243),
                            np.sqrt(94.43864)
                        ],
                        [
                            np.sqrt(6.41552),
                            np.sqrt(12.16391),
                            np.sqrt(7.16226),
                            np.sqrt(27.87327)
                        ],
                        [
                            np.sqrt(34.57352),
                            np.sqrt(12.83549),
                            np.sqrt(4.73291),
                            np.sqrt(275.83489)
                        ]]
        sf_list = [4 * 1.57933, 4 * 5.5972, 4 * 78.32377, 4 * 14.79803]

        for k in range(4):
            self.y_mean.append(np.mean(np.array(data.iloc[:, k + 5])))
            self.y_max.append(np.max(np.array(data.iloc[:, k + 5])))
            self.y_std.append(np.std(np.array(data.iloc[:, k + 5])))
            y_train = (np.array(data.iloc[:, k + 5]) -
                       self.y_mean[k]) / self.y_std[k]
            l_param = l_param_list[k]
            sf = sf_list[k]
            self.tc_gp.append(
                gp_model(x_train, y_train, np.array(l_param), sf, 0.05, 4,
                         'M52'))
Esempio n. 5
0
 def create_fused_GP(self, x_test, l_param, sigma_f, sigma_n, kernel):
     model_mean = []
     model_var = []
     for i in range(len(self.gp_models)):
         m_mean, m_var = self.gp_models[i].predict_var(x_test)
         m_mean = m_mean * self.model_std[i] + self.model_mean[i]
         m_var = m_var * (self.model_std[i]**2)
         model_mean.append(m_mean)
         err_mean, err_var = self.gp_err_models[i].predict_var(x_test)
         err_mean = err_mean * self.err_std[i] + self.err_mean[i]
         model_var.append((err_mean)**2 + m_var)
     fused_mean, fused_var = reification(model_mean, model_var)
     self.fused_y_mean = np.mean(fused_mean[0:400:12])
     self.fused_y_std = np.std(fused_mean[0:400:12])
     if self.fused_y_std == 0:
         self.fused_y_std = 1
     fused_mean = (fused_mean - self.fused_y_mean) / self.fused_y_std
     self.fused_GP = gp_model(x_test[0:400:12], fused_mean[0:400:12],
                              l_param, sigma_f,
                              abs(fused_var[0:400:12])**(0.5), self.num_dim,
                              kernel)
     return self.fused_GP
Esempio n. 6
0
    def test_fit(self):
        data = pd.read_excel('data/rve_data.xlsx')
        data_1 = deepcopy(data)
        data.iloc[:, 0] = (data.iloc[:, 0] - 650) / 200
        data.iloc[:, 2] = data.iloc[:, 2] / 3
        data.iloc[:, 3] = data.iloc[:, 3] / 2

        test_data = [[], [], [], [], [], [], [], [], [], []]
        train_data = [[], [], [], [], [], [], [], [], [], []]
        count = 1
        while count <= 1500:
            new_num = np.random.randint(0, 1522)
            if (new_num not in test_data[0]) and (len(test_data[0]) < 150):
                test_data[0].append(new_num)
                count += 1
            elif (new_num not in test_data[1]) and (len(test_data[1]) < 150):
                test_data[1].append(new_num)
                count += 1
            elif (new_num not in test_data[2]) and (len(test_data[2]) < 150):
                test_data[2].append(new_num)
                count += 1
            elif (new_num not in test_data[3]) and (len(test_data[3]) < 150):
                test_data[3].append(new_num)
                count += 1
            elif (new_num not in test_data[4]) and (len(test_data[4]) < 150):
                test_data[4].append(new_num)
                count += 1
            elif (new_num not in test_data[5]) and (len(test_data[5]) < 150):
                test_data[5].append(new_num)
                count += 1
            elif (new_num not in test_data[6]) and (len(test_data[6]) < 150):
                test_data[6].append(new_num)
                count += 1
            elif (new_num not in test_data[7]) and (len(test_data[7]) < 150):
                test_data[7].append(new_num)
                count += 1
            elif (new_num not in test_data[8]) and (len(test_data[8]) < 150):
                test_data[8].append(new_num)
                count += 1
            elif (new_num not in test_data[9]) and (len(test_data[9]) < 150):
                test_data[9].append(new_num)
                count += 1
        for i in range(1522):
            if i not in test_data[0]:
                train_data[0].append(i)
            if i not in test_data[1]:
                train_data[1].append(i)
            if i not in test_data[2]:
                train_data[2].append(i)
            if i not in test_data[3]:
                train_data[3].append(i)
            if i not in test_data[4]:
                train_data[4].append(i)
            if i not in test_data[5]:
                train_data[5].append(i)
            if i not in test_data[6]:
                train_data[6].append(i)
            if i not in test_data[7]:
                train_data[7].append(i)
            if i not in test_data[8]:
                train_data[8].append(i)
            if i not in test_data[9]:
                train_data[9].append(i)

        test_data = np.array(test_data)
        train_data = np.array(train_data)
        self.mean = np.mean(data.iloc[:, 5])
        self.std = np.std(data.iloc[:, 5])
        data.iloc[:, 5] = (data.iloc[:, 5] - self.mean) / self.std

        results = np.zeros((1500, 2))
        for i in range(10):
            self.gp = gp_model(
                data.iloc[train_data[i],
                          [0, 1, 2, 3]], data.iloc[train_data[i], 5],
                [0.12274117, 0.08612411, 0.65729583, 0.23342798], 0.16578065,
                0.1, 4, 'SE')
            out = self.predict(
                np.array(data_1.iloc[test_data[i], [0, 1, 2, 3]]))
            results[i * 150:(i + 1) * 150, 0] = out
            results[i * 150:(i + 1) * 150,
                    1] = data.iloc[test_data[i], 5] * self.std + self.mean

        self.setup()

        results_all = np.zeros((1522, 2))

        results_all[:, 1] = data.iloc[:, 5] * self.std + self.mean
        results_all[:, 0] = self.predict(np.array(data_1.iloc[:,
                                                              [0, 1, 2, 3]]))

        return results, results_all