def train_final_vae(self, model_config):
        model_config["bernoulli"] = False
        model_config["tensorboard"] = True
        model_config["checkpoint"] = True
        model_config["early_stopping_metric"] = "loss"
        model_config["checkpoint_metric"] = "loss"

        results = self.train_final_model(model_config)
        final_vae = results["model"]
        full_dataset = results["dataset"]

        self.logger.info("Encoding latent represenations...")
        latent_reps = final_vae.encode(full_dataset.features)

        results = np.hstack((
            np.expand_dims(full_dataset.sample_data[0], axis=1),
            latent_reps,
            np.expand_dims(full_dataset.sample_data[1], axis=1),
            np.expand_dims(full_dataset.sample_data[2], axis=1)
        ))

        header = ["cell_ids"]
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("cell_type")
        header.append("cell_subtype")
        header = np.array(header)

        results = np.vstack((header, results))

        self.logger.info("Saving results")
        save_data_table(
            results,
            model_config["model_dir"] + "/latent_representations.txt")
    def run(self, debug=False):
        self.logger.info("EXPERIMENT START")

        trials, _, best_loss_case_config = self.run_hyperopt(
            self.train_case_model)
        self.logger.info("Finished hyperopt optimization")

        # Save experiment results
        losses = []
        experiment_results = [[
            "model_name",
            "encoder_layers",
            "latent_size",
            "optimizer",
            "batch_size",
            "cv_reconstruction_loss",
            "cv_kl_divergence_loss",
            "cv_total_loss"
        ]]
        for result in trials.results:
            if None not in result["avg_valid_metrics"].values():
                losses.append((
                    result["model_config"],
                    result["avg_valid_metrics"]["reconstruction_loss"],
                    result["avg_valid_metrics"]["kl_divergence_loss"],
                    result["avg_valid_metrics"]["loss"]))
            experiment_results.append([
                result["model_config"]["name"],
                result["model_config"]["encoder_layers"],
                result["model_config"]["latent_size"],
                result["model_config"]["optimizer"],
                result["model_config"]["batch_size"],
                result["avg_valid_metrics"]["reconstruction_loss"],
                result["avg_valid_metrics"]["kl_divergence_loss"],
                result["avg_valid_metrics"]["loss"]
            ])
        save_data_table(
            experiment_results,
            self.experiment_dir + "/experiment_results.txt")
        self.logger.info("Saved experiment results")

        # Train the final VAE using the best model configs
        best_loss_model_config = self.get_model_config(best_loss_case_config)
        best_loss_model_config["name"] = "UsokinVAE_BestTotalLoss"

        best_recon_loss_model_config = sorted(losses, key=lambda x: x[1])[0][0]
        best_recon_loss_model_config["name"] = "UsokinVAE_BestReconLoss"

        best_kl_loss_model_config = sorted(losses, key=lambda x: x[2])[0][0]
        best_kl_loss_model_config["name"] = "UsokinVAE_BestKLDivergenceLoss"

        self.train_final_vae(best_loss_model_config)
        self.train_final_vae(best_recon_loss_model_config)
        self.train_final_vae(best_kl_loss_model_config)

        self.logger.info("EXPERIMENT END")
Exemple #3
0
    def run(self):
        self.logger.info("EXPERIMENT START")

        trials, _, best_loss_case_config = self.run_hyperopt(
            self.train_case_model)
        self.logger.info("Finished hyperopt optimization")

        # Save experiment results
        losses = []
        experiment_results = [[
            "model_name",
            "encoder_layers",
            "latent_size",
            "discriminator_layers",
            "ae_optimizer",
            "disc_optimizer",
            "batch_size",
            "cv_ae_loss",
            "cv_disc_loss_prior",
            "cv_disc_loss_posterior",
            "cv_disc_loss",
            "cv_adv_loss",
            "cv_total_loss"
        ]]
        for result in trials.results:
            if None not in result["avg_valid_metrics"].values():
                losses.append((
                    result["model_config"],
                    result["avg_valid_metrics"]["loss"]))
            experiment_results.append([
                result["model_config"]["name"],
                # TODO: Format the encoder layers better
                result["model_config"]["encoder_layers"],
                result["model_config"]["latent_size"],
                result["model_config"]["discriminator_layers"],
                result["model_config"]["autoencoder_optimizer"],
                result["model_config"]["discriminator_optimizer"],
                result["model_config"]["batch_size"],
                result["avg_valid_metrics"]["ae_loss"],
                result["avg_valid_metrics"]["disc_loss_prior"],
                result["avg_valid_metrics"]["disc_loss_posterior"],
                result["avg_valid_metrics"]["disc_loss"],
                result["avg_valid_metrics"]["adv_loss"],
                result["avg_valid_metrics"]["loss"]
            ])
        save_data_table(
            experiment_results,
            self.experiment_dir + "/experiment_results.txt")
        self.logger.info("Saved experiment results")

        # Train the final AAE using the best model config
        best_loss_model_config = self.get_model_config(best_loss_case_config)
        best_loss_model_config["name"] = "PollenAAE_Final"

        self.train_final_aae(best_loss_model_config)
        self.logger.info("EXPERIMENT END")
    def run(self):
        self.logger.info("EXPERIMENT START")

        trials, _, best_loss_case_config = self.run_hyperopt(
            self.train_case_model)
        self.logger.info("Finished hyperopt optimization")

        # Save experiment results
        losses = []
        experiment_results = [[
            "model_name",
            "n_layers",
            "encoder_layers",
            "latent_size",
            "optimizer",
            "batch_size",
            "cv_reconstruction_loss",
            "cv_kl_divergence_loss",
            "cv_total_loss"
        ]]
        for result in trials.results:
            if None not in result["avg_valid_metrics"].values():
                losses.append((
                    result["model_config"],
                    result["avg_valid_metrics"]["reconstruction_loss"],
                    result["avg_valid_metrics"]["kl_divergence_loss"],
                    result["avg_valid_metrics"]["loss"]))
            experiment_results.append([
                result["model_config"]["name"],
                len(result["model_config"]["encoder_layers"]) / 2,
                "|".join(result["model_config"]["encoder_layers"]),
                result["model_config"]["latent_size"],
                result["model_config"]["optimizer"],
                result["model_config"]["batch_size"],
                result["avg_valid_metrics"]["reconstruction_loss"],
                result["avg_valid_metrics"]["kl_divergence_loss"],
                result["avg_valid_metrics"]["loss"]
            ])
        save_data_table(
            experiment_results,
            self.experiment_dir + "/experiment_results.txt")
        self.logger.info("Saved experiment results")

        # Train the final VAE using the best model config
        best_loss_model_config = self.get_model_config(best_loss_case_config)
        best_loss_model_config["name"] = "MelanomaVAE_Final"

        self.train_final_vae(best_loss_model_config)
        self.logger.info("EXPERIMENT END")
    def train_final_vae(self, model_config):
        model_config["autoencoder_callbacks"]["tensorboard"] = True
        model_config["autoencoder_callbacks"]["checkpoint"] = {
            "metric": "loss",
            "file": "autoencoder_model.weights.h5"
        }
        model_config["autoencoder_callbacks"]["early_stopping"]["metric"] \
            = "loss"

        results = self.train_final_model(model_config)
        final_vae = results["model"]
        full_dataset = results["dataset"]

        self.logger.info("Encoding latent represenations...")
        latent_reps = final_vae.encode(full_dataset.features)

        results = np.hstack((
            np.expand_dims(full_dataset.sample_data[0], axis=1),
            latent_reps,
            np.expand_dims(full_dataset.sample_data[1], axis=1),
            np.expand_dims(full_dataset.sample_data[2], axis=1)
        ))

        header = ["cell_ids"]
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("cell_type")
        header.append("cell_subtype")
        header = np.array(header)

        results = np.vstack((header, results))

        self.logger.info("Saving results")
        save_data_table(
            results,
            model_config["model_dir"] + "/latent_representations.txt")

        self.logger.info("Saving losses")
        metrics = final_vae.evaluate(full_dataset)
        save_data_table(
            [["metric", "value"],
             ["total_loss", metrics["loss"]],
             ["reconstruction_loss", metrics["reconstruction_loss"]],
             ["kl_divergence_loss", metrics["kl_divergence_loss"]]],
            model_config["model_dir"] + "/final_losses.txt"
        )
    def run(self):
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = (x_train.astype("float32") - 127.5) / 255
        x_test = (x_test.astype("float32") - 127.5) / 255

        train_dataset = Dataset(x_train,
                                y_train,
                                flatten=True,
                                to_one_hot=False)
        test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False)

        model_name = "MNIST_VAE"
        model_dir = self.get_model_dir(model_name)

        create_dir(model_dir)

        model_config = {
            "name": model_name,
            "model_dir": model_dir,
            "input_shape": (784, ),
            "continuous": True,
            "encoder_layers":
            ["Dense:256:activation='elu'", "BatchNormalization"],
            "latent_size": 2,
            "optimizer": "adam"
        }

        if self.debug:
            epochs = 3
        else:
            epochs = 50

        vae = VAE(model_config)
        vae.train(train_dataset,
                  epochs=epochs,
                  batch_size=100,
                  validation_dataset=test_dataset)

        latent_reps = vae.encode(test_dataset.features)

        results = np.hstack(
            (latent_reps, np.expand_dims(test_dataset.labels, axis=1)))

        header = []
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("digit")
        header = np.array(header)

        results = np.vstack((header, results))

        self.logger.info("Saving results")
        save_data_table(
            results, model_config["model_dir"] + "/latent_representations.txt")

        plt.figure(figsize=(6, 6))
        plt.scatter(latent_reps[:, 0],
                    latent_reps[:, 1],
                    c=y_test,
                    cmap="rainbow")
        plt.colorbar()
        plt.show()