Example #1
0
def descriptors_analysis():
    """Make a comparison with different types of descriptors.

    There are nine types of descriptors to test totally. -Crystal part, -CW,
    -Structure, -Statistical part, Crystal part, CW, Structure, Statistical part,
    Crystal+CW and All descriptor. ’-’ indicates the name of the hold-out feature
     category and the descriptors are generated from the ensemble of the other three
     categories.

    Args:
        None
    Returns:
        A numpy array with shape '(10, 4)'.
    """
    data_path = './data'
    labels = pd.read_csv(os.path.join(data_path,
                                      'labels.2020.1.29.csv')).to_numpy()
    raw_train_data = pd.read_csv(os.path.join(data_path,
                                              'td.2020.1.29.csv')).to_numpy()
    result = []
    for descriptors_i in range(10):
        new_train_data = load_and_split_descriptor(raw_train_data,
                                                   descriptors_i)
        length = new_train_data.shape[1]
        t, l = read_label(new_train_data, labels, label_index=0)
        MAEs_list = []
        for i in range(4):
            if i == 0:
                epochs = 1
                model = svr()
                model_type = 'SVR'
            elif i == 1:
                epochs = 200
                model = fc(length)
                model_type = 'FC'
            elif i == 2:
                epochs = 1
                model = krr()
                model_type = 'KRR'
            else:
                epochs = 1
                model = xgb_model()
                model_type = 'XGBoost'
            x_train, x_test, y_train, y_test = transform_data(
                t, l, 0.1, 4, model_type=model_type)
            kappa_model = KappaModel(x_train, x_test, y_train, y_test)
            kappa_model.train_model(model, epochs=epochs)
            predict_train = kappa_model.predict(model, 'train')
            predict_test = kappa_model.predict(model, 'test')

            MAEs_train = mae(np.exp(y_train), np.exp(predict_train))
            MAEs_test = mae(np.exp(y_test), np.exp(predict_test))
            print(MAEs_test)

            MAEs_list.append(MAEs_test)

        result.append(MAEs_list)
    return result
    def test(self):
        #mae_list = list()
        violated_const_dataset = 0
        violated_const_model = 0
        (X, y) = self._test_data._dataset
        y_pred = self._model.predict(Ten(X))

        test_mae = util.mae(y, y_pred)
        violated_const_dataset += len(util.violated_const(X, y))
        violated_pairs = util.violated_const(X, y_pred)
        violated_const_model += len(violated_pairs)

        y_pred = copy.deepcopy(np.array([10 ** -t for t in y_pred]))
        y_true = copy.deepcopy(np.array([10 ** -t for t in y]))

        y_diff = np.abs(y_true - y_pred)
        median_ae = np.median(y_diff)
        mean_ae = np.mean(y_diff)
        rmse_ = np.sqrt(mse(y, y_pred))
        sum_mag_viol = np.sum([abs(y_pred[i] - y_pred[j]) for (i, j) in violated_pairs])

        #print(f"Test precision: {test_mae}, "
        #      f"Violated constraints Dataset: {violated_const_dataset}, "
       #       f"Violated constraints Model: {violated_const_model}, "
        #      f"Duplicates: {duplicates(y_pred)}")
        self.y_true  = y_true
        self.y_pred = y_pred
        self.violated_pairs = violated_pairs
        return (test_mae, violated_const_model, violated_const_dataset, median_ae, mean_ae, rmse_, sum_mag_viol )
    def validation_step(self, epoch):
        #mae_res = list()
        violated_const_model = 0
        violated_const_dataset = 0
        (X, y) = self._valid_data._dataset
        y_pred = self._model.predict(Ten(X))
        val_mae = util.mae(y, y_pred)
        violated_const_dataset += len(util.violated_const(X, y))
        violated_pairs = util.violated_const(X, y_pred)
        violated_const_model += len(violated_pairs)

        y_pred = copy.deepcopy( np.array([10 ** -t for t in y_pred]))
        y_true = copy.deepcopy( np.array([10 ** -t for t in y]))

        val_rmse = np.sqrt( mse(y_true, y_pred))
        y_diff = np.abs(y_true - y_pred)
        sum_mag_viol = np.sum([abs( y_pred[i] -y_pred[j]) for (i,j) in violated_pairs]  )
        median_ae = np.median(y_diff)
        mean_ae = np.mean(y_diff)

        if self.verbose:
            print(f"epoch: {epoch}, "
                  f"MAE: {val_mae}, "
                  f"Violated constraints dataset: {violated_const_dataset}, "
                  f"Violated constraints model: {violated_const_model}")

        self.logs.append([val_mae, violated_const_dataset,  violated_const_model, median_ae, mean_ae, val_rmse, sum_mag_viol])
Example #4
0
def significance_analysis():
    x = np.load('./data/trains/train_x.npy')
    y = np.load('./data/trains/train_y.npy')
    model = pickle.load(open('./models/ptc_ab.pkl', 'rb'))
    pk_list = []
    for k in range(32):
        sum = 0
        for i in [-0.2, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15]:
            tmp = np.copy(x)
            tmpp = np.copy(x)
            tmp[:, k] = tmp[:, k] * (1 + i)
            tmpp[:, k] = tmp[:, k] * (1 + i + 0.05)
            predict_y = model.predict(tmp)
            predict_yy = model.predict(tmpp)
            delta_ki = mae(predict_y, y)
            delta_kii = mae(predict_yy, y)
            sum = sum + np.abs((delta_kii - delta_ki)) / delta_ki
        pk = sum / 7
        pk_list.append(pk)
    return pk_list
Example #5
0
 def test_one_epoch(self, loader, epoch):
     self.G.eval()
     self.D.eval()
     test_loss = 0.0
     num_examples = 0
     imgs = []
     pred_labels = []
     labels = []
     for data in tqdm(loader):
         img, label = data
         img = img.to(self.device)
         label = label.to(self.device)
         pred_label = self.predict(img)
         loss = self.criterion(pred_label, label)
         batch_size = img.size(0)
         test_loss += loss.item() * batch_size
         num_examples += batch_size
         imgs.append(img.cpu().numpy())
         labels.append(label.cpu().numpy())
         pred_labels.append(pred_label.detach().cpu().numpy())
     img = np.concatenate(imgs, axis=0)
     label = np.concatenate(labels, axis=0)
     pred_label = np.concatenate(pred_labels, axis=0)
     log = {
         'loss': test_loss / num_examples,
         'img': img,
         'label': label,
         'pred_label': pred_label,
         'pp_r2': pp_r2(pred_label, label),
         'mse': mse(pred_label, label),
         'rmse': rmse(pred_label, label),
         'mae': mae(pred_label, label),
         'pp_mse': pp_mse(pred_label, label).tolist(),
         'pp_rmse': pp_rmse(pred_label, label).tolist(),
         'pp_mae': pp_mae(pred_label, label).tolist(),
     }
     log['avg_r2'] = np.mean(log['pp_r2'])
     self.logger.write(log, epoch=epoch, stage='test')
     if test_loss < self.best_test_loss:
         self.best_test_loss = test_loss
         self.save(os.path.join(self.exp_path, 'models', 'model.best.t7'))
     return log
 def analysis(self):
     '''
     分析预测误差分布
     :return:
     '''
     n = len(self.current_prod)
     Y_pred, Y_future = np.array(self.Y_pred), np.array(self.Y_future)
     i, c = list(zip(*self.current_prod))
     _y_pred = Y_pred[:, i]
     _y_future = Y_future[:, i]
     _y_pred_mean = np.mean(_y_pred, axis=0)
     _y_future_mean = np.mean(_y_future, axis=0)
     _mae = mae(y_pred=_y_pred, y_true=_y_future, axis=0)
     _mer = mean_error_ratio(y_pred=_y_pred, y_true=_y_future, axis=0)
     _y_pred_DF, _y_future_DF = pd.DataFrame(
         _y_pred_mean, columns=['数值']), pd.DataFrame(_y_future_mean,
                                                     columns=['数值'])
     _y_pred_DF['标签'], _y_future_DF['标签'] = ['预测'] * n, ['实际'] * n
     _y_pred_DF['井号'] = _y_future_DF['井号'] = c
     _y_pred_DF['误差'] = _y_future_DF['误差'] = _mer
     _y_DF = pd.concat([_y_pred_DF, _y_future_DF], axis=0)
     plt.subplot(2, 1, 1)
     sns.barplot(
         data=_y_DF,
         x='井号',
         y='数值',
         hue='标签',
     )
     plt.xticks([])
     plt.yticks(fontsize=15)
     plt.subplot(2, 1, 2)
     sns.pointplot(
         data=_y_DF,
         x='井号',
         y='误差',
     )
     plt.ylim(0, 1)
     plt.xticks(fontsize=15, rotation=15)
     plt.yticks(fontsize=15)
     plt.grid()
     plt.show()
Example #7
0
 def train_one_epoch(self, loader, epoch):
     self.G.train()
     self.scheduler.step()
     train_loss = 0.0
     num_examples = 0
     pred_labels = []
     labels = []
     for data in tqdm(loader):
         img, label = data
         img = img.to(self.device)
         label = label.to(self.device)
         self.opt.zero_grad()
         pred_label = self.predict(img)
         pred_labels.append(pred_label.detach().cpu().numpy())
         labels.append(label.cpu().numpy())
         loss = self.criterion(pred_label*self.param_scale, label*self.param_scale) \
                + torch.sum((self.psf*(pred_label-label))**2)
         loss.backward()
         self.opt.step()
         batch_size = img.size(0)
         train_loss += loss.item() * batch_size
         num_examples += batch_size
     pred_label = np.concatenate(pred_labels, axis=0)
     label = np.concatenate(labels, axis=0)
     log = {
         'loss': train_loss / num_examples,
         'pp_r2': pp_r2(pred_label, label),
         'mse': mse(pred_label, label),
         'rmse': rmse(pred_label, label),
         'mae': mae(pred_label, label),
         'pp_mse': pp_mse(pred_label, label).tolist(),
         'pp_rmse': pp_rmse(pred_label, label).tolist(),
         'pp_mae': pp_mae(pred_label, label).tolist(),
     }
     log['avg_r2'] = np.mean(log['pp_r2'])
     self.logger.write(log, epoch=epoch)
     self.save(os.path.join(self.exp_path, 'models', 'model.%d.t7' % epoch))
     return log
Example #8
0
            r2.insert(0, model_type)
            csv_writer.writerow(r2)

        if train_on_whole_data:
            x_train, x_test, y_train, y_test = transform_data(
                x_data=train_data, y_data=label, test_size=0.1, random_state=4)
            kappa_model = KappaModel(x_train, x_test, y_train, y_test)
            kappa_model.train_model(model, epochs=epochs)
            predict_train = kappa_model.predict(model, 'train')
            predict_test = kappa_model.predict(model, 'test')

            r2_train, mae_log_train, rmse_train = metric(
                y_train, predict_train)
            r2_test, mae_log_test, rmse_test = metric(y_cal=y_test,
                                                      y_pred=predict_test)
            MAEs_train = mae(np.exp(y_train), np.exp(predict_train))
            MAEs_test = mae(np.exp(y_test), np.exp(predict_test))
            print(mae_log_test, r2_test)

            metric_list = [
                MAEs_train, MAEs_test, r2_train, r2_test, mae_log_train,
                mae_log_test, rmse_train, rmse_test
            ]
            metric_matrix.append(metric_list)

    if train_on_whole_data:
        metric_matrix.insert(0, [
            'MAEs of train data', 'MAEs of test data', 'R2 of train data',
            'R2 of test data', 'Logarithmic mae of train data',
            'Logarithmic mae of test data', 'RMSE_train', 'RMSE_test'
        ])