Ejemplo n.º 1
0
    def train_final_vae(self, model_config):
        model_config["name"] = model_config["name"] + "_FULL"
        model_dir = self.get_model_dir(model_config["name"])
        create_dir(model_dir)
        model_config["model_dir"] = model_dir

        n_epochs = 2 if self.debug else 200
        full_dataset = Dataset.concatenate(*self.datasets)
        final_vae = VAE(model_config)
        final_vae.train(full_dataset,
                        epochs=n_epochs,
                        batch_size=50,
                        validation_dataset=full_dataset)
        latent_reps = final_vae.encode(full_dataset.features)
        results = np.hstack((np.expand_dims(full_dataset.sample_data[0],
                                            axis=1), latent_reps,
                             np.expand_dims(full_dataset.sample_data[1],
                                            axis=1),
                             np.expand_dims(full_dataset.sample_data[2],
                                            axis=1)))

        header = ["cell_ids"]
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("cell_type")
        header.append("cell_subtype")
        header = np.array(header)

        results = np.vstack((header, results))

        save_data_table(
            results, model_config["model_dir"] + "/latent_representations.txt")
Ejemplo n.º 2
0
    def train_vae(self, case_config):
        model_config = self.get_model_config(case_config)
        create_dir(model_config["model_dir"])

        avg_valid_loss = 0.0
        for k in range(0, 10):
            train_dataset = Dataset.concatenate(*(self.datasets[:k] +
                                                  self.datasets[(k + 1):]))
            valid_dataset = self.datasets[k]
            # Start training!
            vae = VAE(model_config)

            if self.debug:
                epochs = 2
            else:
                epochs = 100

            vae.train(train_dataset,
                      epochs=epochs,
                      batch_size=50,
                      validation_dataset=valid_dataset)

            fold_valid_loss = vae.evaluate(valid_dataset)
            self.logger.info("{}|Fold #{} Loss = {:f}".format(
                model_config["name"], k + 1, fold_valid_loss))

            avg_valid_loss += fold_valid_loss

            if self.debug:
                break

        avg_valid_loss /= 10
        self.logger.info("{}|Avg Validation Loss = {:f}".format(
            model_config["name"], avg_valid_loss))

        self.case_counter += 1

        return {
            "status": STATUS_OK,
            "loss": avg_valid_loss,
            "name": model_config["name"],
            "model_config": model_config
        }
import six
import csv
import os

from framework.common.dataset import Dataset
from framework.common.sampling import stratified_kfold
from framework.keras.autoencoder import VariationalAutoencoder as VAE
import numpy as np

exp_name = "train_usokin-1000g-2layer-vae"
ref_model = "11_UsokinVAE"
model_config = VAE.load_config("results/{}/{}".format(exp_name, ref_model))
model_config["name"] = "UsokinVAE_BestTotalLoss"
model_config["model_dir"] = \
    "/pylon5/mc4s8ap/kchen8/single-cell-deep-learning/results/{}/{}".format(
        exp_name, model_config["name"])
model_config["tensorboard"] = True
model_config["bernoulli"] = False
model_config["checkpoint"] = True
model_config["early_stopping_metric"] = "loss"
model_config["checkpoint_metric"] = "loss"

if not os.path.exists(model_config["model_dir"]):
    os.makedirs(model_config["model_dir"])


def read_data_table(filepath, delimiter="\t"):
    with open(filepath, "r") as f:
        data = []
        for line in f.readlines():
            data.append(line.replace("\n", "").split(delimiter))
Ejemplo n.º 4
0
        if "Avg kl_divergence_loss" in line:
            loss = line.rstrip("\n").split("Avg kl_divergence_loss = ")[-1]
            model_name = line.split(" - ")[1].split("|")[0]
            kl_losses[model_name] = float(loss)

experiment_results = [[
    "model_name", "encoder_layers", "latent_size", "optimizer",
    "n_warmup_epochs", "batch_size", "cv_reconstruction_loss",
    "cv_kl_divergence_loss", "cv_total_loss"
]]

for model_name in os.listdir(experiment_dir):
    model_dir = os.path.join(experiment_dir, model_name)
    if os.path.isfile(model_dir) or "_FINAL" in model_name: continue
    model_config = VAE.load_config(model_dir)

    recon_loss = recon_losses[model_name] if model_name in recon_losses \
        else "NaN"
    kl_loss = kl_losses[model_name] if model_name in kl_losses \
        else "NaN"
    total_loss = total_losses[model_name] if model_name in total_losses \
        else "NaN"

    experiment_results.append([
        model_name,
        str("|".join(model_config["encoder_layers"])),
        model_config["latent_size"], model_config["optimizer"],
        model_config["n_warmup_epochs"], model_config["batch_size"],
        recon_loss, kl_loss, total_loss
    ])
Ejemplo n.º 5
0
    def run(self):
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = (x_train.astype("float32") - 127.5) / 255
        x_test = (x_test.astype("float32") - 127.5) / 255

        train_dataset = Dataset(x_train,
                                y_train,
                                flatten=True,
                                to_one_hot=False)
        test_dataset = Dataset(x_test, y_test, flatten=True, to_one_hot=False)

        model_name = "MNIST_VAE"
        model_dir = self.get_model_dir(model_name)

        create_dir(model_dir)

        model_config = {
            "name": model_name,
            "model_dir": model_dir,
            "input_shape": (784, ),
            "continuous": True,
            "encoder_layers":
            ["Dense:256:activation='elu'", "BatchNormalization"],
            "latent_size": 2,
            "optimizer": "adam"
        }

        if self.debug:
            epochs = 3
        else:
            epochs = 50

        vae = VAE(model_config)
        vae.train(train_dataset,
                  epochs=epochs,
                  batch_size=100,
                  validation_dataset=test_dataset)

        latent_reps = vae.encode(test_dataset.features)

        results = np.hstack(
            (latent_reps, np.expand_dims(test_dataset.labels, axis=1)))

        header = []
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("digit")
        header = np.array(header)

        results = np.vstack((header, results))

        self.logger.info("Saving results")
        save_data_table(
            results, model_config["model_dir"] + "/latent_representations.txt")

        plt.figure(figsize=(6, 6))
        plt.scatter(latent_reps[:, 0],
                    latent_reps[:, 1],
                    c=y_test,
                    cmap="rainbow")
        plt.colorbar()
        plt.show()