Ejemplo n.º 1
0
    def train_vae(self, case_config):
        model_config = self.get_model_config(case_config)
        create_dir(model_config["model_dir"])

        avg_valid_loss = 0.0
        for k in range(0, 10):
            train_dataset = Dataset.concatenate(*(self.datasets[:k] +
                                                  self.datasets[(k + 1):]))
            valid_dataset = self.datasets[k]
            # Start training!
            vae = VAE(model_config)

            if self.debug:
                epochs = 2
            else:
                epochs = 100

            vae.train(train_dataset,
                      epochs=epochs,
                      batch_size=50,
                      validation_dataset=valid_dataset)

            fold_valid_loss = vae.evaluate(valid_dataset)
            self.logger.info("{}|Fold #{} Loss = {:f}".format(
                model_config["name"], k + 1, fold_valid_loss))

            avg_valid_loss += fold_valid_loss

            if self.debug:
                break

        avg_valid_loss /= 10
        self.logger.info("{}|Avg Validation Loss = {:f}".format(
            model_config["name"], avg_valid_loss))

        self.case_counter += 1

        return {
            "status": STATUS_OK,
            "loss": avg_valid_loss,
            "name": model_config["name"],
            "model_config": model_config
        }
Ejemplo n.º 2
0
    def train_final_vae(self, model_config):
        model_config["name"] = model_config["name"] + "_FULL"
        model_dir = self.get_model_dir(model_config["name"])
        create_dir(model_dir)
        model_config["model_dir"] = model_dir

        n_epochs = 2 if self.debug else 100
        full_dataset = Dataset.concatenate(*self.datasets)

        self.logger.info("Training Final VAE: " + model_config["name"])
        final_vae = VAE(model_config)
        final_vae.train(full_dataset,
                        epochs=n_epochs,
                        batch_size=50,
                        validation_dataset=full_dataset)
        loss = final_vae.evaluate(full_dataset)
        self.logger.info("{}|Loss = {:f}".format(model_config["name"], loss))

        self.logger.info("Creating latent represenations...")
        latent_reps = final_vae.encode(full_dataset.features)

        results = np.hstack((np.expand_dims(full_dataset.sample_data[0],
                                            axis=1), latent_reps,
                             np.expand_dims(full_dataset.sample_data[1],
                                            axis=1),
                             np.expand_dims(full_dataset.sample_data[2],
                                            axis=1)))

        header = ["cell_ids"]
        for l in range(1, model_config["latent_size"] + 1):
            header.append("dim{}".format(l))
        header.append("cell_type")
        header.append("cell_subtype")
        header = np.array(header)

        results = np.vstack((header, results))

        self.logger.info("Saving results")
        save_data_table(
            results, model_config["model_dir"] + "/latent_representations.txt")
            writer.writerow(r)


cell_ids, features, cell_types, cell_subtypes = load_data()

datasets = stratified_kfold(
    features, cell_subtypes,
    [cell_ids, cell_types, cell_subtypes],
    n_folds=5, convert_labels_to_int=True)
full_dataset = Dataset.concatenate(*datasets)
n_epochs = 200

final_vae = VAE(model_config)
final_vae.train(full_dataset,
                epochs=n_epochs, batch_size=model_config["batch_size"])
loss = final_vae.evaluate(full_dataset)
print(loss)

latent_reps = final_vae.encode(full_dataset.features)
results = np.hstack((
    np.expand_dims(full_dataset.sample_data[0], axis=1),
    latent_reps,
    np.expand_dims(full_dataset.sample_data[1], axis=1),
    np.expand_dims(full_dataset.sample_data[2], axis=1)
))

header = ["cell_ids"]
for l in range(1, model_config["latent_size"] + 1):
    header.append("dim{}".format(l))
header.append("cell_type")
header.append("cell_subtype")