def train_final_vae(self, model_config): model_config["name"] = model_config["name"] + "_FULL" model_dir = self.get_model_dir(model_config["name"]) create_dir(model_dir) model_config["model_dir"] = model_dir n_epochs = 2 if self.debug else 200 full_dataset = Dataset.concatenate(*self.datasets) final_vae = VAE(model_config) final_vae.train(full_dataset, epochs=n_epochs, batch_size=50, validation_dataset=full_dataset) latent_reps = final_vae.encode(full_dataset.features) results = np.hstack((np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1))) header = ["cell_ids"] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("cell_type") header.append("cell_subtype") header = np.array(header) results = np.vstack((header, results)) save_data_table( results, model_config["model_dir"] + "/latent_representations.txt")
def train_vae(self, case_config): model_config = self.get_model_config(case_config) create_dir(model_config["model_dir"]) avg_valid_loss = 0.0 for k in range(0, 10): train_dataset = Dataset.concatenate(*(self.datasets[:k] + self.datasets[(k + 1):])) valid_dataset = self.datasets[k] # Start training! vae = VAE(model_config) if self.debug: epochs = 2 else: epochs = 100 vae.train(train_dataset, epochs=epochs, batch_size=50, validation_dataset=valid_dataset) fold_valid_loss = vae.evaluate(valid_dataset) self.logger.info("{}|Fold #{} Loss = {:f}".format( model_config["name"], k + 1, fold_valid_loss)) avg_valid_loss += fold_valid_loss if self.debug: break avg_valid_loss /= 10 self.logger.info("{}|Avg Validation Loss = {:f}".format( model_config["name"], avg_valid_loss)) self.case_counter += 1 return { "status": STATUS_OK, "loss": avg_valid_loss, "name": model_config["name"], "model_config": model_config }
writer = csv.writer( f, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL) for r in data: writer.writerow(r) cell_ids, features, cell_types, cell_subtypes = load_data() datasets = stratified_kfold( features, cell_subtypes, [cell_ids, cell_types, cell_subtypes], n_folds=5, convert_labels_to_int=True) full_dataset = Dataset.concatenate(*datasets) n_epochs = 200 final_vae = VAE(model_config) final_vae.train(full_dataset, epochs=n_epochs, batch_size=model_config["batch_size"]) loss = final_vae.evaluate(full_dataset) print(loss) latent_reps = final_vae.encode(full_dataset.features) results = np.hstack(( np.expand_dims(full_dataset.sample_data[0], axis=1), latent_reps, np.expand_dims(full_dataset.sample_data[1], axis=1), np.expand_dims(full_dataset.sample_data[2], axis=1) )) header = ["cell_ids"] for l in range(1, model_config["latent_size"] + 1):
def run(self): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype("float32") - 127.5) / 255 x_test = (x_test.astype("float32") - 127.5) / 255 train_dataset = Dataset(x_train, y_train, flatten=True, to_one_hot=False) test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False) model_name = "MNIST_VAE" model_dir = self.get_model_dir(model_name) create_dir(model_dir) model_config = { "name": model_name, "model_dir": model_dir, "input_shape": (784, ), "continuous": True, "encoder_layers": ["Dense:256:activation='elu'", "BatchNormalization"], "latent_size": 2, "optimizer": "adam" } if self.debug: epochs = 3 else: epochs = 50 vae = VAE(model_config) vae.train(train_dataset, epochs=epochs, batch_size=100, validation_dataset=test_dataset) latent_reps = vae.encode(test_dataset.features) results = np.hstack( (latent_reps, np.expand_dims(test_dataset.labels, axis=1))) header = [] for l in range(1, model_config["latent_size"] + 1): header.append("dim{}".format(l)) header.append("digit") header = np.array(header) results = np.vstack((header, results)) self.logger.info("Saving results") save_data_table( results, model_config["model_dir"] + "/latent_representations.txt") plt.figure(figsize=(6, 6)) plt.scatter(latent_reps[:, 0], latent_reps[:, 1], c=y_test, cmap="rainbow") plt.colorbar() plt.show()