예제 #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Convolutional VAE for FS-Peptide data.')
    parser.add_argument('--data_path',
                        type=str,
                        help='Path to load fs-peptide data.')
    parser.add_argument('--weight_path',
                        type=str,
                        help='Path to save network weights.')
    parser.add_argument('--embedding_path',
                        type=str,
                        help='Path to save embeddings.')

    args = parser.parse_args()

    train_data = FSPeptide(args.data_path, partition='train', download=True)
    val_data = FSPeptide(args.data_path, partition='validation', download=True)

    x_train, _ = train_data.load_data()
    x_val, _ = val_data.load_data()

    # Keras complains if height and width dimensions are odd.
    x_train = np.pad(x_train, [(0, 0), (0, 1), (0, 1), (0, 0)],
                     mode='constant')
    x_val = np.pad(x_val, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant')
    input_shape = (22, 22, 1)

    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    encoder = EncoderConvolution2D(input_shape=input_shape)

    encoder._get_final_conv_params()
    num_conv_params = encoder.total_conv_params
    encode_conv_shape = encoder.final_conv_shape

    decoder = DecoderConvolution2D(output_shape=input_shape,
                                   enc_conv_params=num_conv_params,
                                   enc_conv_shape=encode_conv_shape)

    cvae = VAE(input_shape=input_shape,
               latent_dim=3,
               encoder=encoder,
               decoder=decoder,
               optimizer=optimizer)

    callback = EmbeddingCallback(x_train, cvae)
    cvae.train(x_train,
               validation_data=x_val,
               batch_size=512,
               epochs=100,
               callbacks=[callback])

    weight_path = os.path.join(args.weight_path, 'cvae_fspeptide.h5')
    cvae.save_weights(weight_path)
    callback.save_embeddings(filename='fspeptide', path=args.embedding_path)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Convolutional VAE for FS-Peptide data.')
    parser.add_argument('--data_path',
                        type=str,
                        help='Path to load fs-peptide data.')
    parser.add_argument('--weight_path',
                        type=str,
                        help='Path to save network weights.')
    parser.add_argument('--embedding_path',
                        type=str,
                        help='Path to save embeddings.')

    args = parser.parse_args()

    train_data = FSPeptide(args.data_path, partition='train', download=True)
    val_data = FSPeptide(args.data_path, partition='validation', download=True)

    x_train, _ = train_data.load_data()
    x_val, _ = val_data.load_data()

    # Keras complains if height and width dimensions are odd.
    x_train = np.pad(x_train, [(0, 0), (0, 1), (0, 1), (0, 0)],
                     mode='constant')
    x_val = np.pad(x_val, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant')
    input_shape = (22, 22, 1)

    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    encoder = EncoderConvolution2D(input_shape=input_shape)

    encode_conv_shape, num_conv_params = encoder.get_final_conv_params()

    decoder = DecoderConvolution2D(output_shape=input_shape,
                                   enc_conv_params=num_conv_params,
                                   enc_conv_shape=encode_conv_shape)

    cvae = VAE(input_shape=input_shape,
               encoder=encoder,
               decoder=decoder,
               optimizer=optimizer)

    weight_path = os.path.join(args.weight_path, 'cvae_fspeptide.h5')
    cvae.load_weights(weight_path)

    embeddings_train = cvae.embed(x_train)
    embeddings_val = cvae.embed(x_val)

    embed_train_path = os.path.join(args.embedding_path, 'train_embeddings')
    embed_val_path = os.path.join(args.embedding_path, 'validation_embeddings')
    np.save(embed_train_path, embeddings_train)
    np.save(embed_val_path, embeddings_val)
예제 #3
0
def main():
    parser = argparse.ArgumentParser(
        description='Convolutional VAE for 1FME data.')
    parser.add_argument('--data_path',
                        type=str,
                        help='Path to load 1FME data.')
    parser.add_argument('--weight_path',
                        type=str,
                        help='Path to save network weights.')
    parser.add_argument('--embedding_path',
                        type=str,
                        help='Path to save embeddings.')
    args = parser.parse_args()

    train_data = OneFME(args.data_path, partition='train', download=True)
    val_data = OneFME(args.data_path, partition='validation', download=True)

    x_train = train_data.load_data()
    x_val = val_data.load_data()
    input_shape = (28, 28, 1)

    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    encoder = EncoderConvolution2D(input_shape=input_shape)

    encoder._get_final_conv_params()
    num_conv_params = encoder.total_conv_params
    encode_conv_shape = encoder.final_conv_shape

    decoder = DecoderConvolution2D(output_shape=input_shape,
                                   enc_conv_params=num_conv_params,
                                   enc_conv_shape=encode_conv_shape)

    cvae = VAE(input_shape=input_shape,
               latent_dim=3,
               encoder=encoder,
               decoder=decoder,
               optimizer=optimizer)

    callback = EmbeddingCallback(x_train, cvae)
    cvae.train(x_train,
               validation_data=x_val,
               batch_size=512,
               epochs=100,
               callbacks=[callback])

    weight_path = os.path.join(args.weight_path, 'cvae_onefme.h5')
    cvae.save_weights(weight_path)
    callback.save_embeddings(filename='onefme', path=args.embedding_path)
예제 #4
0
파일: mnist.py 프로젝트: sborah53/molecules
def main():
    parser = argparse.ArgumentParser(
        description='Convolutional VAE for MNIST.')
    parser.add_argument('--weight_path',
                        type=str,
                        help='Path to save network weights.')
    args = parser.parse_args()

    img_rows = img_cols = 28
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    encoder = EncoderConvolution2D(input_shape=input_shape)

    encoder._get_final_conv_params()
    num_conv_params = encoder.total_conv_params
    encode_conv_shape = encoder.final_conv_shape

    decoder = DecoderConvolution2D(output_shape=input_shape,
                                   enc_conv_params=num_conv_params,
                                   enc_conv_shape=encode_conv_shape)

    cvae = VAE(input_shape=input_shape,
               latent_dim=3,
               encoder=encoder,
               decoder=decoder,
               optimizer=optimizer)

    cvae.train(x_train, validation_data=x_test, batch_size=128, epochs=10)
    weight_path = os.path.join(args.weight_path, 'cvae_mnist.h5')
    cvae.save_weights(weight_path)
예제 #5
0
def generate_embeddings(encoder_hparams_path, encoder_weight_path, cm_path):
    encoder_hparams = EncoderHyperparams.load(encoder_hparams_path)

    with open_h5(cm_path) as file:

        # Access contact matrix data from h5 file
        data = file['contact_maps']

        # Get shape of an individual contact matrix
        # (ignore total number of matrices)
        input_shape = data.shape[1:]

        encoder = EncoderConvolution2D(input_shape=input_shape,
                                       hyperparameters=encoder_hparams)

        # Load best model weights
        encoder.load_weights(encoder_weight_path)

        # Create contact matrix embeddings
        cm_embeddings, *_ = encoder.embed(data)

    return cm_embeddings
예제 #6
0
def main(input_path, out_path, model_id, gpu, epochs, batch_size, latent_dim):

    # Set CUDA environment variables
    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)

    with open_h5(input_path) as input_file:

        # Access contact matrix data from h5 file
        data = np.array(input_file['contact_maps'])

    # Shuffle data before train validation split
    np.random.shuffle(data)

    # 80-20 train validation split index
    split = int(0.8 * len(data))

    # Partition input data into 80-20 train valid split
    train, valid = data[:split], data[split:]

    # Get shape of an individual contact matrix
    # (ignore total number of matrices)
    input_shape = train.shape[1:]

    # Set model hyperparameters for encoder and decoder
    shared_hparams = {'num_conv_layers': 4,
                      'filters': [64, 64, 64, 64],
                      'kernels': [3, 3, 3, 3],
                      'strides': [1, 2, 1, 1],
                      'num_affine_layers': 1,
                      'affine_widths': [128],
                      'latent_dim': latent_dim
                     }

    affine_dropouts = [0]

    encoder_hparams = EncoderHyperparams(affine_dropouts=affine_dropouts,
                                         **shared_hparams)
    decoder_hparams = DecoderHyperparams(**shared_hparams)

    encoder = EncoderConvolution2D(input_shape=input_shape,
                                   hyperparameters=encoder_hparams)

    # Get shape attributes of the last encoder layer to define the decoder
    encode_conv_shape, num_conv_params = encoder.get_final_conv_params()

    decoder = DecoderConvolution2D(output_shape=input_shape,
                                   enc_conv_params=num_conv_params,
                                   enc_conv_shape=encode_conv_shape,
                                   hyperparameters=decoder_hparams)

    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    cvae = VAE(input_shape=input_shape,
               encoder=encoder,
               decoder=decoder,
               optimizer=optimizer)

    # Define callbacks to report model performance for analysis
    embed_callback = EmbeddingCallback(train, cvae)
    loss_callback = LossHistory()

    cvae.train(data=train, validation_data=valid,
               batch_size=batch_size, epochs=epochs,
               callbacks=[embed_callback, loss_callback])

    # Define file paths to store model performance and weights
    ae_weight_path = os.path.join(out_path, f'ae-weight-{model_id}.h5')
    encoder_weight_path = os.path.join(out_path, f'encoder-weight-{model_id}.h5')
    encoder_hparams_path = os.path.join(out_path, f'encoder-hparams-{model_id}.pkl')
    decoder_hparams_path = os.path.join(out_path, f'decoder-hparams-{model_id}.pkl')
    embed_path = os.path.join(out_path, f'embed-{model_id}.npy')
    idx_path = os.path.join(out_path, f'embed-idx-{model_id}.npy')
    loss_path = os.path.join(out_path, f'loss-{model_id}.npy')
    val_loss_path = os.path.join(out_path, f'val-loss-{model_id}.npy')


    # Save weights, hyperparameters, and model performance.
    # Save encoder weights seperately so the full model doesn't need to be
    # loaded during the outlier detection stage.
    cvae.save_weights(ae_weight_path)
    encoder.save_weights(encoder_weight_path)
    encoder_hparams.save(encoder_hparams_path)
    decoder_hparams.save(decoder_hparams_path)
    embed_callback.save(embed_path=embed_path, idx_path=idx_path)
    loss_callback.save(loss_path=loss_path, val_loss_path=val_loss_path)