Exemple #1
0
def run_gomez_bombarelli(constrained=True):
    """Runs the GFP comparative tests on the Gomez-Bombarelli optimization algorithm"""
    TRAIN_SIZE = 5000
    train_size_str = "%ik" % (TRAIN_SIZE / 1000)
    for it in range(3):
        RANDOM_STATE = it + 1

        X_train, _, _ = util.get_experimental_X_y(random_state=RANDOM_STATE,
                                                  train_size=TRAIN_SIZE)
        ground_truth = gfp_gp.SequenceGP(load=True,
                                         load_prefix="../data/gfp_gp")

        L = X_train.shape[1]
        LD = 20
        gt_var = 0.01
        pred_vae = util.build_pred_vae_model(latent_dim=LD,
                                             n_tokens=X_train.shape[2],
                                             seq_length=L,
                                             enc1_units=50,
                                             pred_var=gt_var)
        suffix = "_%s_%i" % (train_size_str, RANDOM_STATE)

        pred_vae.encoder_.load_weights(
            "../models/pred_vae_encoder_weights%s.h5" % suffix)
        pred_vae.decoder_.load_weights(
            "../models/pred_vae_decoder_weights%s.h5" % suffix)
        pred_vae.predictor_.load_weights(
            "../models/pred_vae_predictor_weights%s.h5" % suffix)
        pred_vae.vae_.load_weights("../models/pred_vae_vae_weights%s.h5" %
                                   suffix)
        if not constrained:
            suffix = "_unconstrained" + suffix
        bomb_results, test_max = optimization_algs.bombarelli_opt(
            X_train,
            pred_vae,
            ground_truth,
            total_it=1000,
            constrained=constrained)
        with open('../results/%s_max%s.json' % ('bombarelli', suffix),
                  'w') as outfile:
            json.dump(test_max, outfile)
Exemple #2
0
def run_killoran(killoran=True):
    """Runs the GFP comparative tests on the Killoran (aka AM-VAE) optimization algorithm"""
    TRAIN_SIZE = 5000
    train_size_str = "%ik" % (TRAIN_SIZE / 1000)
    for i in range(3):
        RANDOM_STATE = i + 1
        print(RANDOM_STATE)
        num_models = [1, 5, 20][i]
        X_train, _, _ = util.get_experimental_X_y(random_state=RANDOM_STATE,
                                                  train_size=TRAIN_SIZE)

        LD = 20
        L = X_train.shape[1]

        vae_suffix = '_%s_%i' % (train_size_str, RANDOM_STATE)

        ground_truth = gfp_gp.SequenceGP(load=True, load_prefix="data/gfp_gp")
        loss = losses.neg_log_likelihood
        keras.utils.get_custom_objects().update({"neg_log_likelihood": loss})
        oracle_suffix = '_%s_%i_%i' % (train_size_str, num_models,
                                       RANDOM_STATE)

        sess = tf.Session(graph=tf.get_default_graph())
        K.set_session(sess)
        vae = util.build_vae(latent_dim=20,
                             n_tokens=20,
                             seq_length=X_train.shape[1],
                             enc1_units=50)
        vae.encoder_.load_weights("../models/vae_0_encoder_weights%s.h5" %
                                  vae_suffix)
        vae.decoder_.load_weights("../models/vae_0_decoder_weights%s.h5" %
                                  vae_suffix)
        vae.vae_.load_weights("../models/vae_0_vae_weights%s.h5" % vae_suffix)

        oracles = [
            keras.models.load_model("../models/oracle_%i%s.h5" %
                                    (i, oracle_suffix))
            for i in range(num_models)
        ]
        if not killoran:
            results, test_max = optimization_algs.killoran_opt(X_train,
                                                               vae,
                                                               oracles,
                                                               ground_truth,
                                                               steps=30000,
                                                               epsilon1=1e-5,
                                                               epsilon2=1.,
                                                               noise_std=1e-5,
                                                               LD=20,
                                                               verbose=False,
                                                               adam=False)

            np.save(
                "../results/mala_results_%s_%i.npy" %
                (train_size_str, RANDOM_STATE), results)
            suffix = "_%s_%i" % (train_size_str, RANDOM_STATE)
            with open('results/%s_max%s.json' % ('mala', suffix),
                      'w') as outfile:
                json.dump(test_max, outfile)

        else:
            results, test_max = optimization_algs.killoran_opt(X_train,
                                                               vae,
                                                               oracles,
                                                               ground_truth,
                                                               steps=10000,
                                                               epsilon1=0.,
                                                               epsilon2=0.1,
                                                               noise_std=1e-6,
                                                               LD=20,
                                                               verbose=False,
                                                               adam=True)
            np.save(
                "../results/killoran_may_results_%s_%i.npy" %
                (train_size_str, RANDOM_STATE), results)
            suffix = "_%s_%i" % (train_size_str, RANDOM_STATE)
            with open('../results/%s_max%s.json' % ('killoran', suffix),
                      'w') as outfile:
                json.dump(test_max, outfile)
Exemple #3
0
def run_experimental_weighted_ml(it, repeats=3):
    """Runs the GFP comparative tests on the weighted ML models and FBVAE."""

    assert it in [0, 1, 2]

    TRAIN_SIZE = 5000
    train_size_str = "%ik" % (TRAIN_SIZE / 1000)
    num_models = [1, 5, 20][it]
    RANDOM_STATE = it + 1

    X_train, y_train, gt_train = util.get_experimental_X_y(
        random_state=RANDOM_STATE, train_size=TRAIN_SIZE)

    vae_suffix = '_%s_%i' % (train_size_str, RANDOM_STATE)
    oracle_suffix = '_%s_%i_%i' % (train_size_str, num_models, RANDOM_STATE)

    vae_0 = build_vae(latent_dim=20,
                      n_tokens=20,
                      seq_length=X_train.shape[1],
                      enc1_units=50)

    vae_0.encoder_.load_weights("../models/vae_0_encoder_weights%s.h5" %
                                vae_suffix)
    vae_0.decoder_.load_weights("../models/vae_0_decoder_weights%s.h5" %
                                vae_suffix)
    vae_0.vae_.load_weights("../models/vae_0_vae_weights%s.h5" % vae_suffix)

    ground_truth = gfp_gp.SequenceGP(load=True, load_prefix="data/gfp_gp")

    loss = neg_log_likelihood
    keras.utils.get_custom_objects().update({"neg_log_likelihood": loss})
    oracles = [
        keras.models.load_model("../models/oracle_%i%s.h5" %
                                (i, oracle_suffix)) for i in range(num_models)
    ]

    test_kwargs = [{
        'weights_type': 'cbas',
        'quantile': 1
    }, {
        'weights_type': 'rwr',
        'alpha': 20
    }, {
        'weights_type': 'dbas',
        'quantile': 0.95
    }, {
        'weights_type': 'cem-pi',
        'quantile': 0.8
    }, {
        'weights_type': 'fbvae',
        'quantile': 0.8
    }]

    base_kwargs = {
        'homoscedastic': False,
        'homo_y_var': 0.01,
        'train_gt_evals': gt_train,
        'samples': 100,
        'cutoff': 1e-6,
        'it_epochs': 10,
        'verbose': True,
        'LD': 20,
        'enc1_units': 50,
        'iters': 50
    }

    if num_models == 1:
        base_kwargs['homoscedastic'] = True
        base_kwargs['homo_y_var'] = np.mean(
            (util.get_balaji_predictions(oracles, X_train)[0] - y_train)**2)

    for k in range(repeats):
        for j in range(len(test_kwargs)):
            test_name = test_kwargs[j]['weights_type']
            suffix = "_%s_%i_%i" % (train_size_str, RANDOM_STATE, k)
            if test_name == 'fbvae':
                if base_kwargs['iters'] > 100:
                    suffix += '_long'

                print(suffix)
                kwargs = {}
                kwargs.update(test_kwargs[j])
                kwargs.update(base_kwargs)
                [
                    kwargs.pop(k) for k in
                    ['homoscedastic', 'homo_y_var', 'cutoff', 'it_epochs']
                ]
                test_traj, test_oracle_samples, test_gt_samples, test_max = optimization_algs.fb_opt(
                    np.copy(X_train), oracles, ground_truth, vae_0, **kwargs)
            else:
                if base_kwargs['iters'] > 100:
                    suffix += '_long'
                kwargs = {}
                kwargs.update(test_kwargs[j])
                kwargs.update(base_kwargs)
                test_traj, test_oracle_samples, test_gt_samples, test_max = optimization_algs.weighted_ml_opt(
                    np.copy(X_train), oracles, ground_truth, vae_0, **kwargs)
            np.save('../results/%s_traj%s.npy' % (test_name, suffix),
                    test_traj)
            np.save('../results/%s_oracle_samples%s.npy' % (test_name, suffix),
                    test_oracle_samples)
            np.save('../results/%s_gt_samples%s.npy' % (test_name, suffix),
                    test_gt_samples)

            with open('../results/%s_max%s.json' % (test_name, suffix),
                      'w') as outfile:
                json.dump(test_max, outfile)