def main(task, path, representation, use_pca, test_set_size, r_size,
         det_encoder_n_hidden, lat_encoder_n_hidden, decoder_n_hidden):
    """
    :param task: str specifying the task name. One of [Photoswitch, ESOL, FreeSolv, Lipophilicity].
    :param path: str specifying the path to the photoswitches.csv file
    :param representation: str specifying the representation. One of [fingerprints, fragments, fragprints]
    :param use_pca: bool specifying whether or not to use PCA to perform Principal Components Regression
    :param test_set_size: float specifying the train/test split ratio. e.g. 0.2 is 80/20 train/test split
    :param r_size: Dimensionality of context encoding r.
    :param det_encoder_n_hidden: Number of deterministic encoder hidden layers.
    :param lat_encoder_n_hidden: Number of latent encoder hidden layers.
    :param decoder_n_hidden: Number of decoder hidden layers.
    :return:
    """

    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)
    y_size = 1

    # If True we perform Principal Components Regression

    if use_pca:
        n_components = 50
    else:
        n_components = None

    print('\nBeginning training loop...')
    j = 0  # index for saving results

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_set_size, random_state=42)

    if task != 'Photoswitch':
        # Artificially create a 80/10/10 train/validation/test split discarding the validation set.
        split_in_two = int(len(y_test) / 2)
        X_test = X_test[0:split_in_two]
        y_test = y_test[0:split_in_two]

    else:
        # We subdivide the train set in order to run cross-validation.
        X_train, X_test, y_train, y_test = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.1,
                                                            random_state=42)

    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    #  We standardise the outputs but leave the inputs unchanged

    X_train, y_train, X_test, _, y_scaler = transform_data(
        X_train,
        y_train,
        X_test,
        y_test,
        n_components=n_components,
        use_pca=use_pca)

    X_train = torch.from_numpy(X_train).float().unsqueeze(dim=0)
    X_test = torch.from_numpy(X_test).float().unsqueeze(dim=0)
    y_train = torch.from_numpy(y_train).float().unsqueeze(dim=0)

    det_encoder_hidden_sizes = [8, 16]
    lat_encoder_hidden_sizes = [8, 16]
    decoder_hidden_sizes = [8, 16]
    learning_rates = [0.01, 0.001]
    batch_sizes = [16, 32]
    iteration_numbers = [250, 500]

    best_rmse = 10000000  # a big number
    best_params = {
        'det_encs': 0,
        'lat_encs': 0,
        'dec_hid': 0,
        'lr': 0,
        'batch_size': 0,
        'iterations': 0
    }

    for det_encs in det_encoder_hidden_sizes:
        for lat_encs in lat_encoder_hidden_sizes:
            for dec_hid in decoder_hidden_sizes:
                for l_rate in learning_rates:
                    for batch_s in batch_sizes:
                        for iter_num in iteration_numbers:

                            m = AttentiveNP(
                                x_size=X_train.shape[2],
                                y_size=y_size,
                                r_size=r_size,
                                det_encoder_hidden_size=det_encs,
                                det_encoder_n_hidden=det_encoder_n_hidden,
                                lat_encoder_hidden_size=lat_encs,
                                lat_encoder_n_hidden=lat_encoder_n_hidden,
                                decoder_hidden_size=dec_hid,
                                decoder_n_hidden=decoder_n_hidden,
                                lr=l_rate,
                                attention_type="multihead")

                            print('...training.')

                            m.train(X_train,
                                    y_train,
                                    batch_size=batch_s,
                                    iterations=iter_num,
                                    print_freq=None)

                            # Now, the context set comprises the training x / y values, the target set comprises the test x values.

                            y_pred, y_var = m.predict(X_train,
                                                      y_train,
                                                      X_test,
                                                      n_samples=100)

                            # Output Standardised RMSE and RMSE on Train Set

                            score = r2_score(
                                y_test, y_scaler.inverse_transform(y_pred))
                            rmse = np.sqrt(
                                mean_squared_error(
                                    y_test,
                                    y_scaler.inverse_transform(y_pred)))
                            mae = mean_absolute_error(
                                y_test, y_scaler.inverse_transform(y_pred))

                            print("\nR^2: {:.3f}".format(score))
                            print("RMSE: {:.3f}".format(rmse))
                            print("MAE: {:.3f}".format(mae))

                            if rmse < best_rmse:
                                best_rmse = rmse
                                best_params['det_encs'] = det_encs
                                best_params['lat_encs'] = lat_encs
                                best_params['dec_hid'] = dec_hid
                                best_params['lr'] = l_rate
                                best_params['batch_size'] = batch_s
                                best_params['iterations'] = iter_num
                            print('Best parameters are \n')
                            print(best_params)

    print('Final best parameters are \n')
    print(best_params)

    with open(f'cross_val_hypers/{task}/ANP/hypers_{representation}.txt',
              'w') as f:
        f.write(str(best_params))
Exemple #2
0
def main(path, representation):
    """
    :param path: str specifying path to dataset.
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    """

    task = 'e_iso_pi'  # Always e_iso_pi for human performance comparison
    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)

    # 5 test molecules

    test_smiles = [
        'BrC1=CC=C(/N=N/C2=CC=CC=C2)C=C1',
        'O=[N+]([O-])C1=CC=C(/N=N/C2=CC=CC=C2)C=C1',
        'CC(C=C1)=CC=C1/N=N/C2=CC=C(N(C)C)C=C2',
        'BrC1=CC([N+]([O-])=O)=CC([N+]([O-])=O)=C1/N=N/C2=CC([H])=C(C=C2[H])N(CC)CC',
        'ClC%11=CC([N+]([O-])=O)=CC(C#N)=C%11/N=N/C%12=CC([H])=C(C=C%12OC)N(CC)CC'
    ]

    # and their indices in the loaded data
    test_smiles_indices = [116, 131, 168, 221, 229]

    X_train = np.delete(X, np.array(test_smiles_indices), axis=0)
    y_train = np.delete(y, np.array(test_smiles_indices))
    X_test = X[[116, 131, 168, 221, 229]]

    # experimental wavelength values in EtOH. Main csv file has 400nm instead of 407nm because measurement was
    # under a different solvent
    y_test = y[[116, 131, 168, 221, 229]]
    y_test[2] = 407.

    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    #  We standardise the outputs but leave the inputs unchanged

    _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test,
                                                     y_test)

    X_train = X_train.astype(np.float64)
    X_test = X_test.astype(np.float64)

    num_features = np.shape(X)[1]

    # We define the Gaussian Process Regression Model using the Tanimoto kernel

    m = None

    def objective_closure():
        return -m.log_marginal_likelihood()

    # for plotting confidence-error curves

    rmse_confidence_list = []
    mae_confidence_list = []

    k = Tanimoto()
    m = gpflow.models.GPR(data=(X_train, y_train),
                          mean_function=Constant(np.mean(y_train)),
                          kernel=k,
                          noise_variance=1)

    # Optimise the kernel variance and noise level by the marginal likelihood

    opt = gpflow.optimizers.Scipy()
    opt.minimize(objective_closure,
                 m.trainable_variables,
                 options=dict(maxiter=100))
    print_summary(m)

    # mean and variance GP prediction

    y_pred, y_var = m.predict_f(X_test)
    y_pred = y_scaler.inverse_transform(y_pred)
    y_test = y_scaler.inverse_transform(y_test)

    # Output Standardised RMSE and RMSE on Train Set

    y_pred_train, _ = m.predict_f(X_train)
    train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
    train_rmse = np.sqrt(
        mean_squared_error(y_scaler.inverse_transform(y_train),
                           y_scaler.inverse_transform(y_pred_train)))
    print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
    print("Train RMSE: {:.3f}".format(train_rmse))

    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    per_molecule = abs(y_pred - y_test)

    print("\n Averaged test statistics are")
    print("\nR^2: {:.3f}".format(r2))
    print("RMSE: {:.3f}".format(rmse))
    print("MAE: {:.3f}".format(mae))
    print("\nAbsolute error per molecule is {} ".format(per_molecule))
Exemple #3
0
    num_features = np.shape(X)[1]

    r2_list = []
    rmse_list = []
    mae_list = []

    print('\nBeginning training loop...')
    j = 0  # index for saving results

    for i in range(0, 25):

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=i)
        X_train, y_train, X_test, y_test, y_scaler = transform_data(
            X_train, y_train, X_test, y_test, n_components, use_pca)

        regr_rf = RandomForestRegressor(n_estimators=100,
                                        max_depth=30,
                                        random_state=2)
        regr_rf.fit(X_train, y_train)

        # Predict on new data
        y_rf = regr_rf.predict(X_test)
        y_rf = y_scaler.inverse_transform(y_rf)
        y_test = y_scaler.inverse_transform(y_test)
        score = r2_score(y_test, y_rf)
        rmse = np.sqrt(mean_squared_error(y_test, y_rf))
        mae = mean_absolute_error(y_test, y_rf)

        print("\nR^2: {:.3f}".format(score))
Exemple #4
0
def main(path, task, representation, use_pca, n_trials, test_set_size,
         use_rmse_conf):
    """
    :param path: str specifying path to dataset.
    :param task: str specifying the task. One of ['e_iso_pi', 'z_iso_pi', 'e_iso_n', 'z_iso_n']
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    :param use_pca: bool. If True apply PCA to perform Principal Components Regression.
    :param n_trials: int specifying number of random train/test splits to use
    :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set
    :param use_rmse_conf: bool specifying wheter to compute the rmse confidence-error curves or the mae confidence-
    error curves. True is the option for rmse.
    """

    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)

    # If True we perform Principal Components Regression

    if use_pca:
        n_components = 100
    else:
        n_components = None

    num_features = np.shape(X)[1]

    # We define the Gaussian Process Regression Model using the Tanimoto kernel

    m = None

    def objective_closure():
        return -m.log_marginal_likelihood()

    r2_list = []
    rmse_list = []
    mae_list = []

    # We pre-allocate arrays for plotting confidence-error curves

    _, _, _, y_test = train_test_split(
        X, y, test_size=test_set_size)  # To get test set size
    n_test = len(y_test)

    rmse_confidence_list = np.zeros((n_trials, n_test))
    mae_confidence_list = np.zeros((n_trials, n_test))

    print('\nBeginning training loop...')

    for i in range(0, n_trials):

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_set_size, random_state=i)

        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        #  We standardise the outputs but leave the inputs unchanged

        _, y_train, _, y_test, y_scaler = transform_data(
            X_train,
            y_train,
            X_test,
            y_test,
            n_components=n_components,
            use_pca=use_pca)

        X_train = X_train.astype(np.float64)
        X_test = X_test.astype(np.float64)

        k = Tanimoto()
        m = gpflow.models.GPR(data=(X_train, y_train),
                              mean_function=Constant(np.mean(y_train)),
                              kernel=k,
                              noise_variance=1)

        # Optimise the kernel variance and noise level by the marginal likelihood

        opt = gpflow.optimizers.Scipy()
        opt.minimize(objective_closure,
                     m.trainable_variables,
                     options=dict(maxiter=100))
        print_summary(m)

        # mean and variance GP prediction

        y_pred, y_var = m.predict_f(X_test)
        y_pred = y_scaler.inverse_transform(y_pred)
        y_test = y_scaler.inverse_transform(y_test)

        # Compute scores for confidence curve plotting.

        ranked_confidence_list = np.argsort(y_var, axis=0).flatten()

        for k in range(len(y_test)):

            # Construct the RMSE error for each level of confidence

            conf = ranked_confidence_list[0:k + 1]
            rmse = np.sqrt(mean_squared_error(y_test[conf], y_pred[conf]))
            rmse_confidence_list[i, k] = rmse

            # Construct the MAE error for each level of confidence

            mae = mean_absolute_error(y_test[conf], y_pred[conf])
            mae_confidence_list[i, k] = mae

        # Output Standardised RMSE and RMSE on Train Set

        y_pred_train, _ = m.predict_f(X_train)
        train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
        train_rmse = np.sqrt(
            mean_squared_error(y_scaler.inverse_transform(y_train),
                               y_scaler.inverse_transform(y_pred_train)))
        print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
        print("Train RMSE: {:.3f}".format(train_rmse))

        score = r2_score(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mae = mean_absolute_error(y_test, y_pred)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))

        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)

    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))

    # Plot confidence-error curves

    confidence_percentiles = np.arange(
        1e-14, 100, 100 / len(y_test)
    )  # 1e-14 instead of 0 to stop weirdness with len(y_test) = 29

    if use_rmse_conf:

        rmse_mean = np.mean(rmse_confidence_list, axis=0)
        rmse_std = np.std(rmse_confidence_list, axis=0)

        # We flip because we want the most confident predictions on the right-hand side of the plot

        rmse_mean = np.flip(rmse_mean)
        rmse_std = np.flip(rmse_std)

        # One-sigma error bars

        lower = rmse_mean - rmse_std
        upper = rmse_mean + rmse_std

        plt.plot(confidence_percentiles, rmse_mean, label='mean')
        plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
        plt.xlabel('Confidence Percentile')
        plt.ylabel('RMSE (nm)')
        plt.ylim([0, np.max(upper) + 1])
        plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
        plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
        plt.savefig(
            task +
            '/results/gpr/{}_confidence_curve_rmse.png'.format(representation))
        plt.show()

    else:

        # We plot the Mean-absolute error confidence-error curves

        mae_mean = np.mean(mae_confidence_list, axis=0)
        mae_std = np.std(mae_confidence_list, axis=0)

        mae_mean = np.flip(mae_mean)
        mae_std = np.flip(mae_std)

        lower = mae_mean - mae_std
        upper = mae_mean + mae_std

        plt.plot(confidence_percentiles, mae_mean, label='mean')
        plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
        plt.xlabel('Confidence Percentile')
        plt.ylabel('MAE (nm)')
        plt.ylim([0, np.max(upper) + 1])
        plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
        plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
        plt.savefig(
            task +
            '/results/gpr/{}_confidence_curve_mae.png'.format(representation))
        plt.show()
Exemple #5
0
def main(path, path_to_dft_dataset, representation, theory_level):
    """
    :param path: str specifying path to photoswitches.csv file.
    :param path_to_dft_dataset: str specifying path to dft_comparison.csv file.
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    :param theory_level: str giving the level of theory to compare against - CAM-B3LYP or PBE0 ['CAM-B3LYP', 'PBE0']
    """

    task = 'e_iso_pi'  # e_iso_pi only task supported for TD-DFT comparison
    data_loader = TaskDataLoader(task, path)
    smiles_list, _, pbe0_vals, cam_vals, experimental_vals = data_loader.load_dft_comparison_data(
        path_to_dft_dataset)

    X = featurise_mols(smiles_list, representation)

    # Keep only non-duplicate entries because we're not considering effects of solvent

    non_duplicate_indices = np.array([
        i for i, smiles in enumerate(smiles_list)
        if smiles not in smiles_list[:i]
    ])
    X = X[non_duplicate_indices, :]
    experimental_vals = experimental_vals[non_duplicate_indices]
    non_dup_pbe0 = np.array([
        i for i, smiles in enumerate(smiles_list)
        if smiles not in smiles_list[:i]
    ])
    non_dup_cam = np.array([
        i for i, smiles in enumerate(smiles_list)
        if smiles not in smiles_list[:i]
    ])
    pbe0_vals = pbe0_vals[non_dup_pbe0]
    cam_vals = cam_vals[non_dup_cam]

    # molecules with dft values to be split into train/test
    if theory_level == 'CAM-B3LYP':
        X_with_dft = np.delete(X, np.argwhere(np.isnan(cam_vals)), axis=0)
        y_with_dft = np.delete(experimental_vals,
                               np.argwhere(np.isnan(cam_vals)))
        # DFT values for the CAM-B3LYP level of theory
        dft_vals = np.delete(cam_vals, np.argwhere(np.isnan(cam_vals)))
        # molecules with no dft vals must go into the training set.
        X_no_dft = np.delete(X, np.argwhere(~np.isnan(cam_vals)), axis=0)
        y_no_dft = np.delete(experimental_vals,
                             np.argwhere(~np.isnan(cam_vals)))
    else:
        X_with_dft = np.delete(X, np.argwhere(np.isnan(pbe0_vals)), axis=0)
        y_with_dft = np.delete(experimental_vals,
                               np.argwhere(np.isnan(pbe0_vals)))
        # DFT values for the PBE0 level of theory
        dft_vals = np.delete(pbe0_vals, np.argwhere(np.isnan(pbe0_vals)))
        # molecules with no dft vals must go into the training set.
        X_no_dft = np.delete(X, np.argwhere(~np.isnan(pbe0_vals)), axis=0)
        y_no_dft = np.delete(experimental_vals,
                             np.argwhere(~np.isnan(pbe0_vals)))

    mae_list = []
    dft_mae_list = []

    # We define the Gaussian Process optimisation objective

    m = None

    def objective_closure():
        return -m.log_marginal_likelihood()

    print('\nBeginning training loop...')

    for i in range(len(y_with_dft)):

        X_train = np.delete(X_with_dft, i, axis=0)
        y_train = np.delete(y_with_dft, i)
        X_test = X_with_dft[i].reshape(1, -1)
        y_test = y_with_dft[i]
        dft_test = dft_vals[i]

        X_train = np.concatenate((X_train, X_no_dft))
        y_train = np.concatenate((y_train, y_no_dft))
        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        #  We standardise the outputs but leave the inputs unchanged

        _, y_train, _, y_test, y_scaler = transform_data(
            X_train, y_train, X_test, y_test)

        X_train = X_train.astype(np.float64)
        X_test = X_test.astype(np.float64)

        k = Tanimoto()
        m = gpflow.models.GPR(data=(X_train, y_train),
                              mean_function=Constant(np.mean(y_train)),
                              kernel=k,
                              noise_variance=1)

        # Optimise the kernel variance and noise level by the marginal likelihood

        opt = gpflow.optimizers.Scipy()
        opt.minimize(objective_closure,
                     m.trainable_variables,
                     options=dict(maxiter=100))
        print_summary(m)

        # Output Standardised RMSE and RMSE on Train Set

        y_pred_train, _ = m.predict_f(X_train)
        train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
        train_rmse = np.sqrt(
            mean_squared_error(y_scaler.inverse_transform(y_train),
                               y_scaler.inverse_transform(y_pred_train)))
        print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
        print("Train RMSE: {:.3f}".format(train_rmse))

        # mean and variance GP prediction

        y_pred, y_var = m.predict_f(X_test)
        y_pred = y_scaler.inverse_transform(y_pred)
        y_test = y_scaler.inverse_transform(y_test)

        # Output MAE for this trial

        mae = abs(y_test - y_pred)

        print("MAE: {}".format(mae))

        # Store values in order to compute the mean and standard error of the statistics across trials

        mae_list.append(mae)

        # DFT prediction scores on the same trial

        dft_mae = abs(y_test - dft_test)

        dft_mae_list.append(dft_mae)

    mae_list = np.array(mae_list)
    dft_mae_list = np.array(dft_mae_list)

    print("\nmean GP-Tanimoto MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))

    print("mean {} MAE: {:.4f} +- {:.4f}\n".format(
        theory_level, np.mean(dft_mae_list),
        np.std(dft_mae_list) / np.sqrt(len(dft_mae_list))))
    elif TASK == 'e_iso_n':
        X_train, X_test, y_train, y_test, dft_vals = dft_train_test_split(PATH, TASK)
    elif TASK == 'z_iso_n':
        X_train, X_test, y_train, y_test, dft_vals = dft_train_test_split(PATH, TASK)
    else:
        raise Exception('Must specify a valid task')

    rdkit_train_mols = [MolFromSmiles(smiles) for smiles in X_train]
    X_train = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=512) for mol in rdkit_train_mols]
    X_train = np.asarray(X_train)

    rdkit_test_mols = [MolFromSmiles(smiles) for smiles in X_test]
    X_test = [AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=512) for mol in rdkit_test_mols]
    X_test = np.asarray(X_test)

    X_train, y_train, X_test, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test)

    regr_rf = RandomForestRegressor(n_estimators=100, max_depth=30, random_state=2)
    regr_rf.fit(X_train, y_train)

    # Predict on new data
    y_rf = regr_rf.predict(X_test)
    y_rf = y_scaler.inverse_transform(y_rf)
    y_test = y_scaler.inverse_transform(y_test)
    score = r2_score(y_test, y_rf)
    rmse = np.sqrt(mean_squared_error(y_test, y_rf))
    mae = mean_absolute_error(y_test, y_rf)

    dft_rmse = np.sqrt(mean_squared_error(y_test, dft_vals))
    dft_mae = mean_absolute_error(y_test, dft_vals)
    dft_score = r2_score(y_test, dft_vals)
Exemple #7
0
def main(path, task, representation, use_pca, n_trials, test_set_size):
    """
    :param path: str specifying path to dataset.
    :param task: str specifying the task. One of ['e_iso_pi', 'z_iso_pi', 'e_iso_n', 'z_iso_n']
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    :param use_pca: bool. If True apply PCA to perform Principal Components Regression.
    :param n_trials: int specifying number of random train/test splits to use
    :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set
    """

    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)

    # If True we perform Principal Components Regression

    if use_pca:
        n_components = 100
    else:
        n_components = None

    r2_list = []
    rmse_list = []
    mae_list = []

    print('\nBeginning training loop...')

    for i in range(0, n_trials):

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_set_size, random_state=i)

        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        #  We standardise the outputs but leave the inputs unchanged

        _, y_train, _, y_test, y_scaler = transform_data(
            X_train,
            y_train,
            X_test,
            y_test,
            n_components=n_components,
            use_pca=use_pca)

        X_train = X_train.astype(np.float64)
        X_test = X_test.astype(np.float64)

        gp_kernel = TanimotoKernel()
        gpr = GaussianProcessRegressor(kernel=gp_kernel)
        gpr.fit(X_train, y_train)

        # mean GP prediction

        X_test = np.tile(X_test, (10000, 1))

        import time
        start = time.time()

        y_pred = gpr.predict(X_test, return_std=False)

        end = time.time()
        print(f'time elapsed is {end - start}')
        y_pred = y_scaler.inverse_transform(y_pred)
        y_test = y_scaler.inverse_transform(y_test)

        # Output Standardised RMSE and RMSE on Train Set

        y_pred_train = gpr.predict(X_train, return_std=False)
        train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
        train_rmse = np.sqrt(
            mean_squared_error(y_scaler.inverse_transform(y_train),
                               y_scaler.inverse_transform(y_pred_train)))
        print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
        print("Train RMSE: {:.3f}".format(train_rmse))

        score = r2_score(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mae = mean_absolute_error(y_test, y_pred)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))

        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)

    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))
def main(path, task, representation, use_pca, n_trials, test_set_size):
    """
    :param path: str specifying path to dataset.
    :param task: str specifying the task. One of ['e_iso_pi', 'z_iso_pi', 'e_iso_n', 'z_iso_n']
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    :param use_pca: bool. If True apply PCA to perform Principal Components Regression.
    :param n_trials: int specifying number of random train/test splits to use
    :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set.
    """

    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()

    X = featurise_mols(smiles_list, representation)

    if use_pca:
        n_components = 50
    else:
        n_components = None

    r2_list = []
    rmse_list = []
    mae_list = []

    print('\nBeginning training loop...')

    for i in range(0, n_trials):

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_set_size, random_state=i)
        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)
        X_train, y_train, X_test, y_test, y_scaler = transform_data(
            X_train, y_train, X_test, y_test, n_components, use_pca)

        regr_rf = RandomForestRegressor(n_estimators=1519,
                                        random_state=4,
                                        max_features=0.086,
                                        bootstrap=False,
                                        min_samples_leaf=2)
        regr_rf.fit(X_train, y_train)

        # Output Standardised RMSE and RMSE on Train Set

        y_pred_train = regr_rf.predict(X_train)
        train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
        train_rmse = np.sqrt(
            mean_squared_error(y_scaler.inverse_transform(y_train),
                               y_scaler.inverse_transform(y_pred_train)))
        print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
        print("Train RMSE: {:.3f}".format(train_rmse))

        # Predict on new data
        y_rf = regr_rf.predict(X_test)
        y_rf = y_scaler.inverse_transform(y_rf)
        y_test = y_scaler.inverse_transform(y_test)
        score = r2_score(y_test, y_rf)
        rmse = np.sqrt(mean_squared_error(y_test, y_rf))
        mae = mean_absolute_error(y_test, y_rf)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))

        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)
    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))
Exemple #9
0
def main(path, task, representation, use_pca, n_trials, test_set_size,
         use_rmse_conf, precompute_repr):
    """
    :param path: str specifying path to dataset.
    :param task: str specifying the task. One of ['Photoswitch', 'ESOL', 'FreeSolv', 'Lipophilicity']
    :param representation: str specifying the molecular representation. One of ['SMILES, fingerprints, 'fragments', 'fragprints']
    :param use_pca: bool. If True apply PCA to perform Principal Components Regression.
    :param n_trials: int specifying number of random train/test splits to use
    :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set
    :param use_rmse_conf: bool specifying whether to compute the rmse confidence-error curves or the mae confidence-
    error curves. True is the option for rmse.
    :param precompute_repr: bool indicating whether to precompute representations or not.
    """

    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)

    if precompute_repr:
        if representation == 'SMILES':
            with open(
                    f'precomputed_representations/{task}_{representation}.txt',
                    'w') as f:
                for smiles in X:
                    f.write(smiles + '\n')
        else:
            np.savetxt(
                f'precomputed_representations/{task}_{representation}.txt', X)

    # If True we perform Principal Components Regression

    if use_pca:
        n_components = 100
    else:
        n_components = None

    r2_list = []
    rmse_list = []
    mae_list = []

    # We pre-allocate arrays for plotting confidence-error curves

    _, _, _, y_test = train_test_split(X,
                                       y,
                                       test_size=test_set_size,
                                       random_state=42)  # To get test set size

    # Photoswitch dataset requires 80/20 splitting. Other datasets are 80/10/10.

    if task != 'Photoswitch':
        split_in_two = int(len(y_test) / 2)
        n_test = split_in_two
    else:
        n_test = len(y_test)

    rmse_confidence_list = np.zeros((n_trials, n_test))
    mae_confidence_list = np.zeros((n_trials, n_test))

    # For Calibration curve

    prediction_prop = [[] for _ in range(n_trials)]

    print('\nBeginning training loop...')

    for i in range(0, n_trials):

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_set_size, random_state=i)

        if representation == 'SMILES':

            np.savetxt(f'fixed_train_test_splits/{task}/X_train_split_{i}.txt',
                       X_train,
                       fmt="%s")
            np.savetxt(f'fixed_train_test_splits/{task}/X_test_split_{i}.txt',
                       X_test,
                       fmt="%s")
            np.savetxt(f'fixed_train_test_splits/{task}/y_train_split_{i}.txt',
                       y_train)
            np.savetxt(f'fixed_train_test_splits/{task}/y_test_split_{i}.txt',
                       y_test)

        else:

            if task != 'Photoswitch':

                # Artificially create a 80/10/10 train/validation/test split discarding the validation set.
                split_in_two = int(len(y_test) / 2)
                X_test = X_test[0:split_in_two]
                y_test = y_test[0:split_in_two]

            y_train = y_train.reshape(-1, 1)
            y_test = y_test.reshape(-1, 1)

            #  We standardise the outputs but leave the inputs unchanged

            _, y_train, _, y_test, y_scaler = transform_data(
                X_train,
                y_train,
                X_test,
                y_test,
                n_components=n_components,
                use_pca=use_pca)

            X_train = X_train.astype(np.float64)
            X_test = X_test.astype(np.float64)

            np.random.seed(42)

            datasets, n, d, mean_y_train, std_y_train = load_reg_data(
                X_train, y_train, X_test, y_test)

            train_set_x, train_set_y = datasets[0]
            test_set_x, test_set_y = datasets[1]

            N_train = train_set_x.get_value(borrow=True).shape[0]
            N_test = test_set_x.get_value(borrow=True).shape[0]
            layer_sizes = [d, 20, 20, len(mean_y_train)]
            n_samples = 100
            alpha = 0.5
            learning_rate = 0.01
            v_prior = 1.0
            batch_size = 32
            print('... building model')
            sys.stdout.flush()
            bb_alpha = BB_alpha(layer_sizes, n_samples, alpha, learning_rate,
                                v_prior, batch_size, train_set_x, train_set_y,
                                N_train, test_set_x, test_set_y, N_test,
                                mean_y_train, std_y_train)
            print('... training')
            sys.stdout.flush()

            test_error, test_ll = bb_alpha.train_ADAM(100)

            print('Test RMSE: ', test_error)
            print('Test ll: ', test_ll)

            samples = bb_alpha.sample_predictive_distribution(X_test)
            y_pred = np.mean(samples, axis=0)
            var = np.var(samples, axis=0)

            # For producing the calibration curve

            for k in [
                    0.13, 0.26, 0.39, 0.53, 0.68, 0.85, 1.04, 1.15, 1.28, 1.44,
                    1.645, 1.96
            ]:
                a = (y_scaler.inverse_transform(y_test) <
                     y_scaler.inverse_transform(y_pred + k * np.sqrt(var)))
                b = (y_scaler.inverse_transform(y_test) >
                     y_scaler.inverse_transform(y_pred - k * np.sqrt(var)))
                prediction_prop[i].append(
                    np.argwhere((a == True) & (b == True)).shape[0] /
                    len(y_test))

            # We transform the standardised predictions back to the original data space

            y_pred = y_scaler.inverse_transform(y_pred)
            y_test = y_scaler.inverse_transform(y_test)

            # Compute scores for confidence curve plotting.

            ranked_confidence_list = np.argsort(var, axis=0).flatten()

            for k in range(len(y_test)):

                # Construct the RMSE error for each level of confidence

                conf = ranked_confidence_list[0:k + 1]
                rmse = np.sqrt(mean_squared_error(y_test[conf], y_pred[conf]))
                rmse_confidence_list[i, k] = rmse

                # Construct the MAE error for each level of confidence

                mae = mean_absolute_error(y_test[conf], y_pred[conf])
                mae_confidence_list[i, k] = mae

            # Output Standardised RMSE and RMSE on Train Set

            train_samples = bb_alpha.sample_predictive_distribution(X_train)
            y_pred_train = np.mean(train_samples, axis=0)

            train_rmse_stan = np.sqrt(mean_squared_error(
                y_train, y_pred_train))
            train_rmse = np.sqrt(
                mean_squared_error(y_scaler.inverse_transform(y_train),
                                   y_scaler.inverse_transform(y_pred_train)))
            print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
            print("Train RMSE: {:.3f}".format(train_rmse))

            score = r2_score(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            mae = mean_absolute_error(y_test, y_pred)

            print("\nR^2: {:.3f}".format(score))
            print("RMSE: {:.3f}".format(rmse))
            print("MAE: {:.3f}".format(mae))

            r2_list.append(score)
            rmse_list.append(rmse)
            mae_list.append(mae)

    if representation != 'SMILES':

        r2_list = np.array(r2_list)
        rmse_list = np.array(rmse_list)
        mae_list = np.array(mae_list)

        print("\nmean R^2: {:.4f} +- {:.4f}".format(np.mean(r2_list),
                                                    np.std(r2_list)))
        print("mean RMSE: {:.4f} +- {:.4f}".format(np.mean(rmse_list),
                                                   np.std(rmse_list)))
        print("mean MAE: {:.4f} +- {:.4f}\n".format(np.mean(mae_list),
                                                    np.std(mae_list)))

        # Plot confidence-error curves

        confidence_percentiles = np.arange(
            1e-14, 100, 100 / len(y_test)
        )  # 1e-14 instead of 0 to stop weirdness with len(y_test) = 29

        if use_rmse_conf:

            rmse_mean = np.mean(rmse_confidence_list, axis=0)
            rmse_std = np.std(rmse_confidence_list, axis=0)

            # We flip because we want the most confident predictions on the right-hand side of the plot

            rmse_mean = np.flip(rmse_mean)
            rmse_std = np.flip(rmse_std)

            # One-sigma error bars

            lower = rmse_mean - rmse_std
            upper = rmse_mean + rmse_std

            plt.plot(confidence_percentiles, rmse_mean, label='mean')
            plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
            plt.xlabel('Confidence Percentile')
            plt.ylabel('RMSE')
            plt.ylim([0, np.max(upper) + 1])
            plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
            plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
            plt.savefig(task +
                        '/results/BNN/{}_{}_confidence_curve_rmse.png'.format(
                            representation, task))
            plt.show()

        else:

            # We plot the Mean-absolute error confidence-error curves

            mae_mean = np.mean(mae_confidence_list, axis=0)
            mae_std = np.std(mae_confidence_list, axis=0)

            mae_mean = np.flip(mae_mean)
            mae_std = np.flip(mae_std)

            lower = mae_mean - mae_std
            upper = mae_mean + mae_std

            plt.plot(confidence_percentiles, mae_mean, label='mean')
            plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
            plt.xlabel('Confidence Percentile')
            plt.ylabel('MAE')
            plt.ylim([0, np.max(upper) + 1])
            plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
            plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
            plt.savefig(task +
                        '/results/BNN/{}_{}_confidence_curve_mae.png'.format(
                            representation, task))
            plt.show()

        # Plot the calibration curve

        mean_props = np.mean(prediction_prop, axis=0)
        sd_props = np.std(prediction_prop, axis=0)
        lower = mean_props - sd_props
        upper = mean_props + sd_props
        qs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
        plt.plot(qs, mean_props, label='mean')
        plt.fill_between(qs, lower, upper, alpha=0.2)
        plt.plot(qs, qs, color="red")
        plt.xlabel('q')
        plt.ylabel('C(q)')
        plt.savefig(task + '/results/BNN/{}_{}_calibration_curve.png'.format(
            representation, task))
        plt.show()

        np.savetxt(
            task +
            '/results/BNN/{}_{}_mean_props'.format(representation, task),
            mean_props)
        np.savetxt(
            task + '/results/BNN/{}_{}_sd_props'.format(representation, task),
            sd_props)
Exemple #10
0
def main(path, path_to_large_dataset, task, representation, test_set_size,
         augment_photo_dataset, n_trials):
    """
    :param path: str giving path to the photoswitches.csv file.
    :param path_to_large_dataset: str giving path to paper_allDB.csv file
    :param task: str specifying the task. Always e_iso_pi for the generalization experiment
    :param representation: str specifying the molecular representation. One of [fingerprints, fragments, fragprints].'
    :param test_set_size: float in range [0, 1] specifying fraction of dataset to use as test set
    :param augment_photo_dataset: If True augment the photoswitch dataset with the Beard et al. 2019 dataset
    :param n_trials: int specifying the number of random train/test splits.
    """

    data_loader = TaskDataLoader(task, path)

    photo_smiles_list, y_vals_photo = data_loader.load_property_data()
    beard_smiles_list, y_vals_beard = data_loader.load_large_comparison_data(
        path_to_large_dataset)

    r2_list = []
    rmse_list = []
    mae_list = []

    if not augment_photo_dataset:
        # test set is now fixed
        n_trials = 1
        # We train on the Beard dataset and test on the photoswitch dataset
        X_train = featurise_mols(beard_smiles_list, representation)
        X_test = featurise_mols(photo_smiles_list, representation)
        y_train = y_vals_beard
        y_test = y_vals_photo

    for i in range(0, n_trials):

        if augment_photo_dataset:
            # We add the Beard dataset as additional training data
            X_train, X_test, y_train, y_test = train_test_split(
                photo_smiles_list,
                y_vals_photo,
                test_size=test_set_size,
                random_state=i)
            X_train = X_train + beard_smiles_list
            y_train = np.concatenate((y_train, y_vals_beard))
            X_train = featurise_mols(X_train, representation)
            X_test = featurise_mols(X_test, representation)

        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)
        X_train, y_train, X_test, y_test, y_scaler = transform_data(
            X_train, y_train, X_test, y_test)

        regr_rf = RandomForestRegressor(n_estimators=1000,
                                        max_depth=300,
                                        random_state=2)
        regr_rf.fit(X_train, y_train)

        # Output Standardised RMSE and RMSE on Train Set

        y_pred_train = regr_rf.predict(X_train)
        train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
        train_rmse = np.sqrt(
            mean_squared_error(y_scaler.inverse_transform(y_train),
                               y_scaler.inverse_transform(y_pred_train)))
        print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
        print("Train RMSE: {:.3f}".format(train_rmse))

        # Predict on new data
        y_rf = regr_rf.predict(X_test)
        y_rf = y_scaler.inverse_transform(y_rf)
        y_test = y_scaler.inverse_transform(y_test)
        score = r2_score(y_test, y_rf)
        rmse = np.sqrt(mean_squared_error(y_test, y_rf))
        mae = mean_absolute_error(y_test, y_rf)

        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)

    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))
def main(batch_size, learning_rate, iterations, r_size,
         det_encoder_hidden_size, det_encoder_n_hidden,
         lat_encoder_hidden_size, lat_encoder_n_hidden, decoder_hidden_size,
         decoder_n_hidden, testing, plotting):
    """
    :param batch_size: Integer, describing the number of times we should sample the set
                                of context points used to form the aggregated embedding during
                                training, given the number of context points to be sampled
                                N_context. When testing this is set to 1
    :param learning_rate: A float number, describing the optimiser's learning rate
    :param iterations: An integer, describing the number of iterations. In this case it also
                       corresponds to the number of times we sample the number of context points
                       N_context

    :param r_size: An integer describing the dimensionality of the embedding / context vector r
    :param det_encoder_hidden_size: An integer describing the number of nodes per hidden layer in the
                                deterministic encoder neural network
    :param encoder_n_hidden: An integer describing the number of hidden layers in the encoder neural
                             network
    :param decoder_hidden_size: An integer describing the number of nodes per hidden layer in the
                                decoder neural network
    :param decoder_n_hidden: An integer describing the number of hidden layers in the decoder neural
                             network
    :param testing: A Boolean variable; if true, during testing the RMSE on test and train data '
                             'will be printed after a specific number of iterations.
    :param plotting: A Boolean variable; if true, during testing the context points and predicted mean '
                             'and variance will be plotted after a specific number of iterations.
    :return:
    """
    warnings.filterwarnings('ignore')

    r2_list = []
    rmse_list = []
    mae_list = []
    time_list = []
    print('\nBeginning training loop...')
    j = 0
    for i in range(1, 2):
        start_time = time.time()

        #Load training data
        x_train = np.load('data/xtrain_1dreg' + str(i) + '.npy')
        y_train = np.load('data/ytrain_1dreg' + str(i) + '.npy')

        #Generate target values of x and y for sampling from later on
        x_test = np.load('data/xtest_1dreg' + str(i) + '.npy')
        y_test = np.load('data/ytest_1dreg' + str(i) + '.npy')

        #Transform the data: standardise to zero mean and unit variance
        x_train, y_train, x_test, y_test, x_scaler, y_scaler = transform_data(
            x_train, y_train, x_test, y_test)

        print('... building model.')

        #Build the Attentive Neural Process model, with the following architecture:
        #(x, y)_i --> encoder --> r_i
        #r = average(r_i)
        #(x*, r) --> decoder --> y_mean*, y_var*
        #The encoder and decoder functions are neural networks, with size and number of layers being
        # hyperparameters to be selected.
        anp = AttentiveNP(x_size=x_train.shape[1],
                          y_size=y_train.shape[1],
                          r_size=r_size,
                          det_encoder_hidden_size=det_encoder_hidden_size,
                          det_encoder_n_hidden=det_encoder_n_hidden,
                          lat_encoder_hidden_size=lat_encoder_hidden_size,
                          lat_encoder_n_hidden=lat_encoder_n_hidden,
                          decoder_hidden_size=decoder_hidden_size,
                          decoder_n_hidden=decoder_n_hidden,
                          attention_type="multihead")

        print('... training.')

        #Train the model(NB can replace x_test, y_test with x_valid and y_valid if planning to use
        # a cross validation set)
        anp.train(x_train=x_train,
                  y_train=y_train,
                  x_test=x_test,
                  y_test=y_test,
                  x_scaler=x_scaler,
                  y_scaler=y_scaler,
                  batch_size=batch_size,
                  lr=learning_rate,
                  iterations=iterations,
                  testing=testing,
                  plotting=plotting)

        #Testing: the 'context points' when testing are the entire training set, and the 'target
        # points' are the entire test set.
        x_context = torch.tensor(np.expand_dims(x_train, axis=0))
        y_context = torch.tensor(np.expand_dims(y_train, axis=0))
        x_test = torch.tensor(np.expand_dims(x_test, axis=0))

        #Predict mean and error in y given the test inputs x_test
        _, predict_test_mean, predict_test_var = anp.predict(
            x_context, y_context, x_test)

        predict_test_mean = np.squeeze(predict_test_mean.data.numpy(), axis=0)
        predict_test_var = np.squeeze(predict_test_var.data.numpy(), axis=0)

        # We transform the standardised predicted and actual y values back to the original data
        # space
        y_mean_pred = y_scaler.inverse_transform(predict_test_mean)
        y_var_pred = y_scaler.var_ * predict_test_var
        y_test = y_scaler.inverse_transform(y_test)

        #Calculate relevant metrics
        score = r2_score(y_test, y_mean_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_mean_pred))
        mae = mean_absolute_error(y_test, y_mean_pred)
        nlpd_test = nlpd(y_mean_pred, y_var_pred, y_test)
        time_taken = time.time() - start_time

        np.save('ytest_mean_pred_1dreg' + str(i) + '_anp.npy', y_mean_pred)
        np.save('ytest_var_pred_1dreg' + str(i) + '_anp.npy', y_var_pred)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))
        print("NLPD: {:.4f}".format(nlpd_test))
        print("Execution time: {:.3f}".format(time_taken))
        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)
        time_list.append(time_taken)

        j += 1

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)
    time_list = np.array(time_list)

    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))
    print("mean Execution time: {:.3f} +- {:.3f}\n".format(
        np.mean(time_list),
        np.std(time_list) / np.sqrt(len(time_list))))
def main(task, path, representation, use_pca, n_trials, test_set_size,
         batch_size, lr, iterations, r_size, det_encoder_hidden_size,
         det_encoder_n_hidden, lat_encoder_hidden_size, lat_encoder_n_hidden,
         decoder_hidden_size, decoder_n_hidden):
    """
    :param task: str specifying the task name. One of [e_iso_pi, e_iso_n, z_iso_pi, z_iso_n]
    :param path: str specifying the path to the photoswitches.csv file
    :param representation: str specifying the representation. One of [fingerprints, fragments, fragprints]
    :param use_pca: bool specifying whether or not to use PCA to perform Principal Components Regression
    :param n_trials: int specifying the number of random train/test splits.
    :param test_set_size: float specifying the train/test split ratio. e.g. 0.2 is 80/20 train/test split
    :param batch_size: int specifying the number of samples to take of the context set, given the number of
    context points that should be selected.
    :param lr: float specifying the learning rate.
    :param iterations: int specifying the number of training iterations
    :param r_size: Dimensionality of context encoding r.
    :param det_encoder_hidden_size: Dimensionality of deterministic encoder hidden layers.
    :param det_encoder_n_hidden: Number of deterministic encoder hidden layers.
    :param lat_encoder_hidden_size: Dimensionality of latent encoder hidden layers.
    :param lat_encoder_n_hidden: Number of latent encoder hidden layers.
    :param decoder_hidden_size: Dimensionality of decoder hidden layers.
    :param decoder_n_hidden: Number of decoder hidden layers.
    :return:
    """

    path_to_save = task + '/results/anp/'
    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    y_size = 1

    if args.representation == 'fingerprints':
        X = featurise_mols(smiles_list, representation)
    elif args.representation == 'fragments':
        X = featurise_mols(smiles_list, representation)
    else:
        X = featurise_mols(smiles_list, representation)

    # If True we perform Principal Components Regression

    if use_pca:
        n_components = 50
    else:
        n_components = None

    r2_list = []
    rmse_list = []
    mae_list = []

    # We pre-allocate arrays for plotting confidence-error curves

    _, _, _, y_test = train_test_split(
        X, y, test_size=test_set_size)  # To get test set size
    n_test = len(y_test)

    rmse_confidence_list = np.zeros((n_trials, n_test))
    mae_confidence_list = np.zeros((n_trials, n_test))

    print('\nBeginning training loop...')
    j = 0  # index for saving results

    for i in range(0, n_trials):

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_set_size, random_state=i)

        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        #  We standardise the outputs but leave the inputs unchanged

        X_train, y_train, X_test, _, y_scaler = transform_data(
            X_train,
            y_train,
            X_test,
            y_test,
            n_components=n_components,
            use_pca=use_pca)

        X_train = torch.from_numpy(X_train).float().unsqueeze(dim=0)
        X_test = torch.from_numpy(X_test).float().unsqueeze(dim=0)
        y_train = torch.from_numpy(y_train).float().unsqueeze(dim=0)

        m = AttentiveNP(x_size=X_train.shape[2],
                        y_size=y_size,
                        r_size=r_size,
                        det_encoder_hidden_size=det_encoder_hidden_size,
                        det_encoder_n_hidden=det_encoder_n_hidden,
                        lat_encoder_hidden_size=lat_encoder_hidden_size,
                        lat_encoder_n_hidden=lat_encoder_n_hidden,
                        decoder_hidden_size=decoder_hidden_size,
                        decoder_n_hidden=decoder_n_hidden,
                        lr=lr,
                        attention_type="multihead")

        print('...training.')

        m.train(X_train,
                y_train,
                batch_size=batch_size,
                iterations=iterations,
                print_freq=None)

        # Now, the context set comprises the training x / y values, the target set comprises the test x values.

        y_pred, y_var = m.predict(X_train, y_train, X_test, n_samples=100)

        y_pred = y_scaler.inverse_transform(y_pred)

        # Compute scores for confidence curve plotting.

        ranked_confidence_list = np.argsort(y_var.numpy(), axis=0).flatten()

        for k in range(len(y_test)):
            # Construct the RMSE error for each level of confidence

            conf = ranked_confidence_list[0:k + 1]
            rmse = np.sqrt(mean_squared_error(y_test[conf], y_pred[conf]))
            rmse_confidence_list[i, k] = rmse

            # Construct the MAE error for each level of confidence

            mae = mean_absolute_error(y_test[conf], y_pred[conf])
            mae_confidence_list[i, k] = mae

        # Output Standardised RMSE and RMSE on Train Set

        score = r2_score(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mae = mean_absolute_error(y_test, y_pred)

        print("\nR^2: {:.3f}".format(score))
        print("RMSE: {:.3f}".format(rmse))
        print("MAE: {:.3f}".format(mae))

        r2_list.append(score)
        rmse_list.append(rmse)
        mae_list.append(mae)

        np.savetxt(
            path_to_save + '_seed_' + str(j) + '_ypred_' + representation +
            '.txt', y_pred)
        np.savetxt(path_to_save + '_seed_' + str(j) + '_ytest.txt', y_test)
        np.savetxt(
            path_to_save + '_seed_' + str(j) + '_ystd_' + representation +
            '.txt', np.sqrt(y_var))

        j += 1

    r2_list = np.array(r2_list)
    rmse_list = np.array(rmse_list)
    mae_list = np.array(mae_list)

    print("\nmean R^2: {:.4f} +- {:.4f}".format(
        np.mean(r2_list),
        np.std(r2_list) / np.sqrt(len(r2_list))))
    print("mean RMSE: {:.4f} +- {:.4f}".format(
        np.mean(rmse_list),
        np.std(rmse_list) / np.sqrt(len(rmse_list))))
    print("mean MAE: {:.4f} +- {:.4f}\n".format(
        np.mean(mae_list),
        np.std(mae_list) / np.sqrt(len(mae_list))))

    with open(path_to_save + representation + '.txt', 'w+') as f:
        f.write('\n Representation = ' + str(representation))
        f.write('\n Task = ' + str(task))
        f.write('\n Use PCA? = ' + str(use_pca))
        f.write('\n Number of trials = {} \n'.format(n_trials))
        f.write('\n Deterministic encoder hidden size = ' +
                str(det_encoder_hidden_size))
        f.write('\n Deterministic encoder number of layers = ' +
                str(det_encoder_n_hidden))
        f.write('\n Latent encoder hidden size = ' +
                str(lat_encoder_hidden_size))
        f.write('\n Latent encoder number of layers = ' +
                str(lat_encoder_n_hidden))
        f.write('\n Decoder hidden size = ' + str(decoder_hidden_size))
        f.write('\n Decoder number of layers = ' + str(decoder_n_hidden))
        f.write('\n Latent variable size = ' + str(r_size))
        f.write('\n Batch size = {}'.format(batch_size))
        f.write('\n Learning rate = {}'.format(lr))
        f.write('\n Number of iterations = {} \n'.format(iterations))
        f.write("\nmean R^2: {:.4f} +- {:.4f}".format(
            np.mean(r2_list),
            np.std(r2_list) / np.sqrt(len(r2_list))))
        f.write("\nmean RMSE: {:.4f} +- {:.4f}".format(
            np.mean(rmse_list),
            np.std(rmse_list) / np.sqrt(len(rmse_list))))
        f.write("\nmean MAE: {:.4f} +- {:.4f}\n".format(
            np.mean(mae_list),
            np.std(mae_list) / np.sqrt(len(mae_list))))

        f.flush()

    # Plot confidence-error curves

    # 1e-14 instead of 0 to stop weirdness with len(y_test) = 29
    confidence_percentiles = np.arange(1e-14, 100, 100 / len(y_test))

    rmse_mean = np.mean(rmse_confidence_list, axis=0)
    rmse_std = np.std(rmse_confidence_list, axis=0)

    # We flip because we want the most confident predictions on the right-hand side of the plot

    rmse_mean = np.flip(rmse_mean)
    rmse_std = np.flip(rmse_std)

    # One-sigma error bars

    lower = rmse_mean - rmse_std
    upper = rmse_mean + rmse_std

    plt.plot(confidence_percentiles, rmse_mean, label='mean')
    plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
    plt.xlabel('Confidence Percentile')
    plt.ylabel('RMSE (nm)')
    plt.ylim([0, np.max(upper) + 1])
    plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
    plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
    plt.savefig(path_to_save + 'confidence_curve_rmse.png')

    # We plot the Mean-absolute error confidence-error curves

    mae_mean = np.mean(mae_confidence_list, axis=0)
    mae_std = np.std(mae_confidence_list, axis=0)

    mae_mean = np.flip(mae_mean)
    mae_std = np.flip(mae_std)

    lower = mae_mean - mae_std
    upper = mae_mean + mae_std

    plt.plot(confidence_percentiles, mae_mean, label='mean')
    plt.fill_between(confidence_percentiles, lower, upper, alpha=0.2)
    plt.xlabel('Confidence Percentile')
    plt.ylabel('MAE (nm)')
    plt.ylim([0, np.max(upper) + 1])
    plt.xlim([0, 100 * ((len(y_test) - 1) / len(y_test))])
    plt.yticks(np.arange(0, np.max(upper) + 1, 5.0))
    plt.savefig(path_to_save + 'confidence_curve_mae.png')