def main(): parser = argparse.ArgumentParser( description='Convolutional VAE for FS-Peptide data.') parser.add_argument('--data_path', type=str, help='Path to load fs-peptide data.') parser.add_argument('--weight_path', type=str, help='Path to save network weights.') parser.add_argument('--embedding_path', type=str, help='Path to save embeddings.') args = parser.parse_args() train_data = FSPeptide(args.data_path, partition='train', download=True) val_data = FSPeptide(args.data_path, partition='validation', download=True) x_train, _ = train_data.load_data() x_val, _ = val_data.load_data() # Keras complains if height and width dimensions are odd. x_train = np.pad(x_train, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant') x_val = np.pad(x_val, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant') input_shape = (22, 22, 1) optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) encoder = EncoderConvolution2D(input_shape=input_shape) encoder._get_final_conv_params() num_conv_params = encoder.total_conv_params encode_conv_shape = encoder.final_conv_shape decoder = DecoderConvolution2D(output_shape=input_shape, enc_conv_params=num_conv_params, enc_conv_shape=encode_conv_shape) cvae = VAE(input_shape=input_shape, latent_dim=3, encoder=encoder, decoder=decoder, optimizer=optimizer) callback = EmbeddingCallback(x_train, cvae) cvae.train(x_train, validation_data=x_val, batch_size=512, epochs=100, callbacks=[callback]) weight_path = os.path.join(args.weight_path, 'cvae_fspeptide.h5') cvae.save_weights(weight_path) callback.save_embeddings(filename='fspeptide', path=args.embedding_path)
def main(): parser = argparse.ArgumentParser( description='Convolutional VAE for FS-Peptide data.') parser.add_argument('--data_path', type=str, help='Path to load fs-peptide data.') parser.add_argument('--weight_path', type=str, help='Path to save network weights.') parser.add_argument('--embedding_path', type=str, help='Path to save embeddings.') args = parser.parse_args() train_data = FSPeptide(args.data_path, partition='train', download=True) val_data = FSPeptide(args.data_path, partition='validation', download=True) x_train, _ = train_data.load_data() x_val, _ = val_data.load_data() # Keras complains if height and width dimensions are odd. x_train = np.pad(x_train, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant') x_val = np.pad(x_val, [(0, 0), (0, 1), (0, 1), (0, 0)], mode='constant') input_shape = (22, 22, 1) optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) encoder = EncoderConvolution2D(input_shape=input_shape) encode_conv_shape, num_conv_params = encoder.get_final_conv_params() decoder = DecoderConvolution2D(output_shape=input_shape, enc_conv_params=num_conv_params, enc_conv_shape=encode_conv_shape) cvae = VAE(input_shape=input_shape, encoder=encoder, decoder=decoder, optimizer=optimizer) weight_path = os.path.join(args.weight_path, 'cvae_fspeptide.h5') cvae.load_weights(weight_path) embeddings_train = cvae.embed(x_train) embeddings_val = cvae.embed(x_val) embed_train_path = os.path.join(args.embedding_path, 'train_embeddings') embed_val_path = os.path.join(args.embedding_path, 'validation_embeddings') np.save(embed_train_path, embeddings_train) np.save(embed_val_path, embeddings_val)
def main(): parser = argparse.ArgumentParser( description='Convolutional VAE for 1FME data.') parser.add_argument('--data_path', type=str, help='Path to load 1FME data.') parser.add_argument('--weight_path', type=str, help='Path to save network weights.') parser.add_argument('--embedding_path', type=str, help='Path to save embeddings.') args = parser.parse_args() train_data = OneFME(args.data_path, partition='train', download=True) val_data = OneFME(args.data_path, partition='validation', download=True) x_train = train_data.load_data() x_val = val_data.load_data() input_shape = (28, 28, 1) optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) encoder = EncoderConvolution2D(input_shape=input_shape) encoder._get_final_conv_params() num_conv_params = encoder.total_conv_params encode_conv_shape = encoder.final_conv_shape decoder = DecoderConvolution2D(output_shape=input_shape, enc_conv_params=num_conv_params, enc_conv_shape=encode_conv_shape) cvae = VAE(input_shape=input_shape, latent_dim=3, encoder=encoder, decoder=decoder, optimizer=optimizer) callback = EmbeddingCallback(x_train, cvae) cvae.train(x_train, validation_data=x_val, batch_size=512, epochs=100, callbacks=[callback]) weight_path = os.path.join(args.weight_path, 'cvae_onefme.h5') cvae.save_weights(weight_path) callback.save_embeddings(filename='onefme', path=args.embedding_path)
def main(): parser = argparse.ArgumentParser( description='Convolutional VAE for MNIST.') parser.add_argument('--weight_path', type=str, help='Path to save network weights.') args = parser.parse_args() img_rows = img_cols = 28 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) encoder = EncoderConvolution2D(input_shape=input_shape) encoder._get_final_conv_params() num_conv_params = encoder.total_conv_params encode_conv_shape = encoder.final_conv_shape decoder = DecoderConvolution2D(output_shape=input_shape, enc_conv_params=num_conv_params, enc_conv_shape=encode_conv_shape) cvae = VAE(input_shape=input_shape, latent_dim=3, encoder=encoder, decoder=decoder, optimizer=optimizer) cvae.train(x_train, validation_data=x_test, batch_size=128, epochs=10) weight_path = os.path.join(args.weight_path, 'cvae_mnist.h5') cvae.save_weights(weight_path)
def generate_embeddings(encoder_hparams_path, encoder_weight_path, cm_path): encoder_hparams = EncoderHyperparams.load(encoder_hparams_path) with open_h5(cm_path) as file: # Access contact matrix data from h5 file data = file['contact_maps'] # Get shape of an individual contact matrix # (ignore total number of matrices) input_shape = data.shape[1:] encoder = EncoderConvolution2D(input_shape=input_shape, hyperparameters=encoder_hparams) # Load best model weights encoder.load_weights(encoder_weight_path) # Create contact matrix embeddings cm_embeddings, *_ = encoder.embed(data) return cm_embeddings
def main(input_path, out_path, model_id, gpu, epochs, batch_size, latent_dim): # Set CUDA environment variables os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) with open_h5(input_path) as input_file: # Access contact matrix data from h5 file data = np.array(input_file['contact_maps']) # Shuffle data before train validation split np.random.shuffle(data) # 80-20 train validation split index split = int(0.8 * len(data)) # Partition input data into 80-20 train valid split train, valid = data[:split], data[split:] # Get shape of an individual contact matrix # (ignore total number of matrices) input_shape = train.shape[1:] # Set model hyperparameters for encoder and decoder shared_hparams = {'num_conv_layers': 4, 'filters': [64, 64, 64, 64], 'kernels': [3, 3, 3, 3], 'strides': [1, 2, 1, 1], 'num_affine_layers': 1, 'affine_widths': [128], 'latent_dim': latent_dim } affine_dropouts = [0] encoder_hparams = EncoderHyperparams(affine_dropouts=affine_dropouts, **shared_hparams) decoder_hparams = DecoderHyperparams(**shared_hparams) encoder = EncoderConvolution2D(input_shape=input_shape, hyperparameters=encoder_hparams) # Get shape attributes of the last encoder layer to define the decoder encode_conv_shape, num_conv_params = encoder.get_final_conv_params() decoder = DecoderConvolution2D(output_shape=input_shape, enc_conv_params=num_conv_params, enc_conv_shape=encode_conv_shape, hyperparameters=decoder_hparams) optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) cvae = VAE(input_shape=input_shape, encoder=encoder, decoder=decoder, optimizer=optimizer) # Define callbacks to report model performance for analysis embed_callback = EmbeddingCallback(train, cvae) loss_callback = LossHistory() cvae.train(data=train, validation_data=valid, batch_size=batch_size, epochs=epochs, callbacks=[embed_callback, loss_callback]) # Define file paths to store model performance and weights ae_weight_path = os.path.join(out_path, f'ae-weight-{model_id}.h5') encoder_weight_path = os.path.join(out_path, f'encoder-weight-{model_id}.h5') encoder_hparams_path = os.path.join(out_path, f'encoder-hparams-{model_id}.pkl') decoder_hparams_path = os.path.join(out_path, f'decoder-hparams-{model_id}.pkl') embed_path = os.path.join(out_path, f'embed-{model_id}.npy') idx_path = os.path.join(out_path, f'embed-idx-{model_id}.npy') loss_path = os.path.join(out_path, f'loss-{model_id}.npy') val_loss_path = os.path.join(out_path, f'val-loss-{model_id}.npy') # Save weights, hyperparameters, and model performance. # Save encoder weights seperately so the full model doesn't need to be # loaded during the outlier detection stage. cvae.save_weights(ae_weight_path) encoder.save_weights(encoder_weight_path) encoder_hparams.save(encoder_hparams_path) decoder_hparams.save(decoder_hparams_path) embed_callback.save(embed_path=embed_path, idx_path=idx_path) loss_callback.save(loss_path=loss_path, val_loss_path=val_loss_path)