def metrics_calculator(self, x, test=True):
        mask = torch.isnan(x[:, -self.n_properties:])
        r2_scores = []
        mlls = []
        rmses = []
        input = copy.deepcopy(x)
        input[torch.where(torch.isnan(input))] = 0.0

        for p in range(0, self.n_properties, 1):
            p_idx = torch.where(~mask[:, p])[0]
            input_batch = copy.deepcopy(input[p_idx, :])
            input_batch[:, -self.n_properties + p] = 0.0
            target = input[p_idx, -self.n_properties + p]

            output_batches = []

            for model in self.models:
                output_batch = model.forward(input_batch, self.n_cycles)
                output_batches.append(output_batch[:, -self.n_properties + p])
            output_batches = torch.stack(output_batches)
            predict_mean = torch.mean(output_batches, dim=0).detach()
            predict_std = torch.std(output_batches, dim=0).detach()

            if (self.means is not None) and (self.stds is not None):
                predict_mean = (
                    predict_mean.numpy() * self.stds[-self.n_properties + p] +
                    self.means[-self.n_properties + p])
                predict_std = predict_std.numpy() * self.stds[
                    -self.n_properties + p]
                target = (target.numpy() * self.stds[-self.n_properties + p] +
                          self.means[-self.n_properties + p])
                r2_scores.append(r2_score(target, predict_mean))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(np.sqrt(mean_squared_error(target, predict_mean)))

                path_to_save = self.dir_name + '/' + self.file_start + '_' + str(
                    p)

                if (self.epoch % 250) == 0 and (self.epoch > 0):
                    if test:
                        np.save(path_to_save + '_mean.npy', predict_mean)
                        np.save(path_to_save + '_std.npy', predict_std)
                        np.save(path_to_save + '_target.npy', target)

                    #confidence_curve(predict_mean, predict_std**2, target,
                    #filename=path_to_save + '_rmse_conf_curve.png',
                    #metric = 'rmse')
            else:
                r2_scores.append(r2_score(target.numpy(),
                                          predict_mean.numpy()))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(
                    np.sqrt(
                        mean_squared_error(target.numpy(),
                                           predict_mean.numpy())))
        return r2_scores, mlls, rmses
Beispiel #2
0
    def metrics_calculator(self, x, test=True):
        mask = torch.isnan(x[:, -self.n_properties:])
        r2_scores = []
        mlls = []
        rmses = []

        for p in range(0, self.n_properties, 1):
            p_idx = torch.where(~mask[:, p])[0]
            x_p = x[p_idx]

            input_p = copy.deepcopy(x_p)
            input_p[:, -self.n_properties:][mask[p_idx]] = 0.0
            input_p[:, (-self.n_properties + p)] = 0.0

            mask_p = torch.zeros_like(mask[p_idx, :]).fill_(True)
            mask_p[:, p] = False
            z = self.encoder(input_p)
            predict_mean, predict_var = self.decoder.forward(z, mask_p)

            predict_mean = predict_mean[p].reshape(-1).detach()
            predict_std = (predict_var[p]**0.5).reshape(-1).detach()

            target = x_p[:, (-self.n_properties + p)]

            if (self.means is not None) and (self.stds is not None):
                predict_mean = (
                    predict_mean.numpy() * self.stds[-self.n_properties + p] +
                    self.means[-self.n_properties + p])
                predict_std = predict_std.numpy() * self.stds[
                    -self.n_properties + p]
                target = (target.numpy() * self.stds[-self.n_properties + p] +
                          self.means[-self.n_properties + p])
                r2_scores.append(r2_score(target, predict_mean))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(np.sqrt(mean_squared_error(target, predict_mean)))

                path_to_save = self.dir_name + '/' + self.file_start + str(p)

                if (self.epoch % 500) == 0 and (self.epoch > 0):
                    if test:
                        np.save(path_to_save + '_mean.npy', predict_mean)
                        np.save(path_to_save + '_std.npy', predict_std)
                        np.save(path_to_save + '_target.npy', target)

            else:
                r2_scores.append(r2_score(target.numpy(),
                                          predict_mean.numpy()))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(
                    np.sqrt(
                        mean_squared_error(target.numpy(),
                                           predict_mean.numpy())))
        return r2_scores, mlls, rmses
Beispiel #3
0
    def metrics_calculator(self, x, save=False):
        mask = torch.isnan(x[:, -self.n_properties:])
        r2_scores = []
        mlls = []
        rmses = []

        for p in range(0, self.n_properties, 1):
            p_idx = torch.where(~mask[:, p])[0]
            x_p = x[p_idx]

            input_p = copy.deepcopy(x_p)

            if self.standardised:
                input_p[:, -self.n_properties:][mask[p_idx]] = 0.0
                input_p[:, (-self.n_properties + p)] = 0.0
            else:
                input_p[:, -self.n_properties:][mask[p_idx]] = torch.take(self.means, torch.where(mask[p_idx])[1])
                input_p[:, (-self.n_properties + p)] = self.means[p]

            mask_p = torch.zeros_like(mask[p_idx, :]).fill_(True)
            mask_p[:, p] = False

            predict_mean, predict_var = self.network.forward(input_p)
            predict_mean = predict_mean[:, p].reshape(-1).detach()
            predict_std = (predict_var[:, p] ** 0.5).reshape(-1).detach()

            target = x_p[:, (-self.n_properties + p)]

            if self.standardised:
                predict_mean = (predict_mean.numpy() * self.stds[p] +
                                self.means[p])
                predict_std = predict_std.numpy() * self.stds[p]
                target = (target.numpy() * self.stds[p] +
                          self.means[p])
            else:
                predict_mean = predict_mean.numpy()
                predict_std = predict_std.numpy()
                target = target.numpy()

            r2_scores.append(r2_score(target, predict_mean))
            mlls.append(mll(predict_mean, predict_std ** 2, target))
            rmses.append(np.sqrt(mean_squared_error(target, predict_mean)))

            if save:
                path_to_save = self.dir_name + '/predictions/' + self.file_start + '_' + str(p)
                np.save(path_to_save + '_mean.npy', predict_mean)
                np.save(path_to_save + '_std.npy', predict_std)
                np.save(path_to_save + '_target.npy', target)

        return r2_scores, mlls, rmses
    def metrics_calculator(self, x, n_samples=1, plot=True):
        mask = torch.isnan(x[:, -self.n_properties:])
        r2_scores = []
        mlls = []
        rmses = []
        for p in range(0, self.n_properties, 1):
            p_idx = torch.where(~mask[:, p])[0]
            if p_idx.shape[0] > 40:
                x_p = x[p_idx]
                target = x_p[:, (-self.n_properties + p)]

                mask_context = copy.deepcopy(mask[p_idx, :])
                mask_context[:, p] = True
                mask_p = torch.zeros_like(mask_context).fill_(True)
                mask_p[:, p] = False

                # [test_size, n_properties, z_dim]
                mu_priors, sigma_priors = self.encoder(
                    x_p[:, :-self.n_properties], x_p[:, -self.n_properties:],
                    mask_context)
                samples = []
                for i in range(n_samples):
                    z = mu_priors + sigma_priors * torch.randn_like(mu_priors)
                    recon_mu, recon_sigma = self.decoder(z, mask_p)
                    recon_mu = recon_mu.detach()
                    recon_sigma = recon_sigma.detach()
                    recon_mu = recon_mu[:, p]
                    recon_sigma = recon_sigma[:, p]
                    sample = recon_mu + recon_sigma * torch.randn_like(
                        recon_mu)
                    samples.append(sample.transpose(0, 1))

                samples = torch.cat(samples)
                predict_mean = torch.mean(samples, dim=0)
                predict_std = torch.std(samples, dim=0)

                if (self.means is not None) and (self.stds is not None):
                    predict_mean = (predict_mean.numpy() *
                                    self.stds[-self.n_properties + p] +
                                    self.means[-self.n_properties + p])
                    predict_std = predict_std.numpy() * self.stds[
                        -self.n_properties + p]
                    target = (
                        target.numpy() * self.stds[-self.n_properties + p] +
                        self.means[-self.n_properties + p])
                    r2_scores.append(r2_score(target, predict_mean))
                    mlls.append(mll(predict_mean, predict_std**2, target))
                    rmses.append(
                        np.sqrt(mean_squared_error(target, predict_mean)))

                    path_to_save = self.dir_name + '/' + self.file_start + str(
                        p)

                    #np.save(path_to_save + '_mean.npy', predict_mean)
                    #np.save(path_to_save + '_std.npy', predict_std)
                    #np.save(path_to_save + '_target.npy', target)

                    #if plot:
                    #    confidence_curve(predict_mean, predict_std**2, target,
                    #                     filename=path_to_save + '_rmse_conf_curve.png',
                    #                     metric='rmse')
                    #    confidence_curve(predict_mean, predict_std**2, target,
                    #                     filename=path_to_save + '_r2_conf_curve.png',
                    #                     metric='r2')

                else:
                    r2_scores.append(
                        r2_score(target.numpy(), predict_mean.numpy()))
                    mlls.append(
                        mll(predict_mean.numpy(),
                            predict_std.numpy()**2, target.numpy()))
                    rmses.append(
                        np.sqrt(
                            mean_squared_error(target.numpy(),
                                               predict_mean.numpy())))

        return r2_scores, mlls, rmses
    def metrics_calculator(self, x, n_samples, test=True):
        mask = torch.isnan(x[:, -self.n_properties:])
        r2_scores = []
        mlls = []
        rmses = []

        for p in range(0, self.n_properties, 1):
            p_idx = torch.where(~mask[:, p])[0]
            x_p = x[p_idx]

            input_p = copy.deepcopy(x_p)
            input_p[:, -self.n_properties:][mask[p_idx]] = 0.0
            input_p[:, (-self.n_properties + p)] = 0.0

            mask_p = torch.zeros_like(mask[p_idx, :]).fill_(True)
            mask_p[:, p] = False
            mu_priors, var_priors = self.encoder(input_p)

            samples = []
            for i in range(n_samples):
                z = mu_priors + var_priors**0.5 * torch.randn_like(mu_priors)
                recon_mus, recon_vars = self.decoder.forward(z, mask_p)
                recon_mu = recon_mus[p].detach().reshape(-1)
                recon_sigma = (recon_vars[p]**0.5).detach().reshape(-1)
                sample = recon_mu + recon_sigma * torch.randn_like(recon_mu)
                samples.append(sample)
            samples = torch.stack(samples)
            predict_mean = torch.mean(samples, dim=0).detach()
            predict_std = torch.std(samples, dim=0).detach()
            target = x_p[:, (-self.n_properties + p)]

            if (self.means is not None) and (self.stds is not None):
                predict_mean = (
                    predict_mean.numpy() * self.stds[-self.n_properties + p] +
                    self.means[-self.n_properties + p])
                predict_std = predict_std.numpy() * self.stds[
                    -self.n_properties + p]
                target = (target.numpy() * self.stds[-self.n_properties + p] +
                          self.means[-self.n_properties + p])
                r2_scores.append(r2_score(target, predict_mean))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(np.sqrt(mean_squared_error(target, predict_mean)))

                path_to_save = self.dir_name + '/' + self.file_start + '_' + str(
                    p)

                if (self.epoch % 2000) == 0 and (self.epoch > 0):
                    if test:
                        np.save(path_to_save + '_mean.npy', predict_mean)
                        np.save(path_to_save + '_std.npy', predict_std)
                        np.save(path_to_save + '_target.npy', target)

            else:
                r2_scores.append(r2_score(target.numpy(),
                                          predict_mean.numpy()))
                mlls.append(mll(predict_mean, predict_std**2, target))
                rmses.append(
                    np.sqrt(
                        mean_squared_error(target.numpy(),
                                           predict_mean.numpy())))
        return r2_scores, mlls, rmses
def main(args):
    """
    :return:
    """

    warnings.filterwarnings('ignore')
    torch.set_default_dtype(torch.float64)

    extra = ''

    filename = args.dataname + '_' + args.model_name + '_' + extra
    #run_number = 9
    #epochs = 250
    run_number = 10
    batches = 5
    epochs = 250
    n_properties = 5

    if args.model_name == 'npbasic':
        extra_dir = 'not_restrict_var/'
    else:
        extra_dir = ''

    with open(
            'results/rdkit_descriptors/lr0001/{}/summary/{}_ensemble.txt'.
            format(args.dataname, filename), 'w+') as f:
        r2_scores_list = []
        mlls_list = []
        rmses_list = []

        metric = 'rmse'
        percentiles = np.arange(100, 4, -5)
        fig_pts = 'results/{}/summary/{}_{}_{}_ensemble_confidence_curve.png'.format(
            args.dataname, args.dataname, args.model_name, metric)

        metric_model_mns = []
        metric_oracle_mns = []

        for batch in range(batches):
            metric_models = []
            metric_oracles = []
            r2_scores = []
            rmses = []
            mlls = []

            for p in range(n_properties):
                mns = []
                stds = []
                targets = []
                for i in range(run_number):
                    if args.model_name == 'npbasic':
                        filestart = '{}{}_{}_{}_{}_250_'.format(
                            args.dataname, args.num, args.model_name,
                            (batch * run_number + i), p)
                    elif args.model_name == 'cnpbasic':
                        filestart = '{}{}_{}_{}_{}_'.format(
                            args.dataname, args.num, args.model_name,
                            (batch * run_number + i), p)
                    else:
                        filestart = '{}{}_{}_{}_{}_'.format(
                            args.dataname, args.num, args.model_name,
                            (batch * run_number + i), p)
                    mn = np.load('results/{}/{}/{}{}mean.npy'.format(
                        args.dataname, args.model_name, extra_dir, filestart))
                    std = np.load('results/{}/{}/{}{}std.npy'.format(
                        args.dataname, args.model_name, extra_dir, filestart))
                    target = np.load('results/{}/{}/{}{}target.npy'.format(
                        args.dataname, args.model_name, extra_dir, filestart))
                    mns.append(mn)
                    stds.append(std)
                    targets.append(target)

                # Ensemble mean, var, target
                mean = np.mean(np.array(mns), axis=0)
                var = np.mean(np.array(stds)**2, axis=0)
                target = np.mean(np.array(targets), axis=0)

                r2_scores.append(r2_score(target, mean))
                mlls.append(mll(mean, var, target))
                rmses.append(np.sqrt(mean_squared_error(target, mean)))

                conf_percentile, metric_model, metric_oracle = metric_ordering(
                    mean, var, target, metric)
                indices = []
                for percentile in percentiles:
                    indices.append(find_nearest(conf_percentile, percentile))
                indices = np.array(indices)

                metric_models.append(metric_model[indices])
                metric_oracles.append(metric_oracle[indices])

            r2_scores_list.append(np.mean(np.array(r2_scores)))
            rmses_list.append(np.mean(np.array(rmses)))
            mlls_list.append(np.mean(np.array(mlls)))

            metric_models = np.array(metric_models)
            metric_oracles = np.array(metric_oracles)

            metric_model = np.mean(metric_models, axis=0)
            metric_oracle = np.mean(metric_oracles, axis=0)

            metric_model_mns.append(metric_model)
            metric_oracle_mns.append(metric_oracle)

        r2_scores_list = np.array(r2_scores_list)
        mlls_list = np.array(mlls_list)
        rmses_list = np.array(rmses_list)

        f.write('\n R^2 score: {:.4f}+- {:.4f}'.format(np.mean(r2_scores_list),
                                                       np.std(r2_scores_list)))
        f.write('\n MLL: {:.4f}+- {:.4f} \n'.format(np.mean(mlls_list),
                                                    np.std(mlls_list)))
        f.write('\n RMSE: {:.4f}+- {:.4f} \n'.format(np.mean(rmses_list),
                                                     np.std(rmses_list)))
        f.flush()

        metric_model_mns = np.array(metric_model_mns)
        metric_model_mn = np.mean(metric_model_mns, axis=0)
        metric_model_std = np.std(metric_model_mns, axis=0)
        metric_oracle_mns = np.array(metric_oracle_mns)
        metric_oracle_mn = np.mean(metric_oracle_mns, axis=0)
        metric_oracle_std = np.std(metric_oracle_mns, axis=0)

        print(metric_model_mn)
        print(metric_model_std)

        confidence_curve(percentiles, metric_model_mn, metric_oracle_mn,
                         fig_pts, metric_model_std, metric_oracle_std, metric)
def main(args):
    """
    :return:
    """

    warnings.filterwarnings('ignore')
    torch.set_default_dtype(torch.float64)

    extra = 'single'
    extra_dir = ''

    filename = args.dataname + '_' + args.model_name + '_' + extra
    task_type = 'regression'
    run_number = 1
    batches = [
        0,
    ]
    epochs = 250
    n_properties = properties_map[args.dataname]

    with open(
            'results/{}/{}/summary/{}_ensemble.txt'.format(
                args.dataname, task_type, filename), 'a') as f:
        r2_scores_list = []
        mlls_list = []
        rmses_list = []
        f1_scores_list = []
        roc_aucs_list = []
        roc_aucs_binary_list = []

        metric = 'rmse'
        percentiles = np.arange(100, 4, -5)
        dir_name = os.path.dirname(f.name)
        fig_pts = '{}/{}_{}_ensemble_confidence_curve.png'.format(
            dir_name, filename, metric)

        metric_model_mns = []
        metric_oracle_mns = []

        for batch in batches:
            metric_models = []
            metric_oracles = []
            f1_scores = []
            r2_scores = []
            roc_aucs = []
            roc_aucs_binary = []
            rmses = []
            mlls = []

            for p in range(n_properties):
                mns = []
                stds = []
                targets = []
                for i in range(run_number):
                    filestart = '{}{}_{}_{}_{}_'.format(
                        args.dataname, args.num, args.model_name,
                        (batch * run_number + i), p)
                    mn = np.load('results/{}/{}/{}/{}{}mean.npy'.format(
                        args.dataname, task_type, args.model_name, extra_dir,
                        filestart))
                    std = np.load('results/{}/{}/{}/{}{}std.npy'.format(
                        args.dataname, task_type, args.model_name, extra_dir,
                        filestart))
                    target = np.load('results/{}/{}/{}/{}{}target.npy'.format(
                        args.dataname, task_type, args.model_name, extra_dir,
                        filestart))
                    mns.append(mn)
                    stds.append(std)
                    targets.append(target)

                # Ensemble mean, var, target
                mean = np.mean(np.array(mns), axis=0)
                var = np.mean(np.array(stds)**2, axis=0)
                target = np.mean(np.array(targets), axis=0)
                binary_target = np.zeros_like(target)
                binary_target[target > 6] = 1.0

                binary_mean = np.zeros_like(mean)
                binary_mean[mean > 6] = 1.0
                f1_scores.append(f1_score(binary_target, binary_mean))
                try:
                    roc_aucs_binary.append(
                        roc_auc_score(binary_target, binary_mean))
                    roc_aucs.append(roc_auc_score(binary_target, binary_mean))
                except:
                    continue
                r2_scores.append(r2_score(target, mean))
                mlls.append(mll(mean, var, target))
                rmses.append(np.sqrt(mean_squared_error(target, mean)))

                conf_percentile, metric_model, metric_oracle = metric_ordering(
                    mean, var, target, metric)
                indices = []
                for percentile in percentiles:
                    indices.append(find_nearest(conf_percentile, percentile))
                indices = np.array(indices)

                metric_models.append(metric_model[indices])
                metric_oracles.append(metric_oracle[indices])

            f1_scores_list.append(np.mean(np.array(f1_scores)))
            roc_aucs_list.append(np.mean(np.array(roc_aucs)))
            roc_aucs_binary_list.append(np.mean(np.array(roc_aucs_binary)))
            mlls_list.append(np.mean(np.array(mlls)))
            r2_scores_list.append(np.mean(np.array(r2_scores)))
            rmses_list.append(np.mean(np.array(rmses)))
            mlls_list.append(np.mean(np.array(mlls)))

            metric_models = np.array(metric_models)
            metric_oracles = np.array(metric_oracles)

            metric_model = np.mean(metric_models, axis=0)
            metric_oracle = np.mean(metric_oracles, axis=0)

            metric_model_mns.append(metric_model)
            metric_oracle_mns.append(metric_oracle)

        r2_scores_list = np.array(r2_scores_list)
        mlls_list = np.array(mlls_list)
        rmses_list = np.array(rmses_list)
        roc_aucs_list = np.array(roc_aucs_list)
        roc_aucs_binary_list = np.array(roc_aucs_binary_list)
        f1_scores_list = np.array(f1_scores_list)

        f.write('\n R^2 score: {:.4f}+- {:.4f}'.format(np.mean(r2_scores_list),
                                                       np.std(r2_scores_list)))
        f.write('\n MLL: {:.4f}+- {:.4f} \n'.format(np.mean(mlls_list),
                                                    np.std(mlls_list)))
        f.write('\n RMSE: {:.4f}+- {:.4f} \n'.format(np.mean(rmses_list),
                                                     np.std(rmses_list)))
        f.write('\n F^1 score: {:.4f}+- {:.4f}'.format(np.mean(f1_scores_list),
                                                       np.std(f1_scores_list)))
        f.write('\n ROC-AUC: {:.4f}+- {:.4f} \n'.format(
            np.mean(roc_aucs_list), np.std(roc_aucs_list)))
        f.write('\n ROC-AUC (binary): {:.4f}+- {:.4f} \n'.format(
            np.mean(roc_aucs_binary_list), np.std(roc_aucs_binary_list)))

        f.flush()

        metric_model_mns = np.array(metric_model_mns)
        metric_model_mn = np.mean(metric_model_mns, axis=0)
        metric_model_std = np.std(metric_model_mns, axis=0)
        metric_oracle_mns = np.array(metric_oracle_mns)
        metric_oracle_mn = np.mean(metric_oracle_mns, axis=0)
        metric_oracle_std = np.std(metric_oracle_mns, axis=0)

        print(metric_model_mn)
        print(metric_model_std)

        confidence_curve(percentiles, metric_model_mn, metric_oracle_mn,
                         fig_pts, metric_model_std, metric_oracle_std, metric)