예제 #1
0
    def run(self, debug=False):
        cell_ids, features, cell_types, cell_subtypes = self.load_data()
        self.datasets = stratified_kfold(features,
                                         cell_subtypes,
                                         [cell_ids, cell_types, cell_subtypes],
                                         n_folds=10,
                                         convert_labels_to_int=True)

        trials = Trials()
        search_space = self.generate_search_space()
        n_evals = 1 if self.debug else 30
        best = fmin(self.train_vae,
                    space=search_space,
                    algo=tpe.suggest,
                    max_evals=n_evals,
                    trials=trials)

        self.logger.info("Finished hyperopt optimization")

        best_model_config = self.get_model_config(
            space_eval(search_space, best))
        self.train_final_vae(best_model_config)

        experiment_results = [["model_name", "10foldcv_loss"]]
        for result in trials.results:
            experiment_results.append([result["name"], result["loss"]])
        save_data_table(experiment_results,
                        self.experiment_dir + "/experiment_results.txt")
예제 #2
0
    def __init__(self, n_genes, debug=False):
        super(TrainUsoskinVAE, self).__init__(debug=debug)

        self.input_size = n_genes

        self.setup_dir()
        self.setup_logger()
        self.setup_hyperopt(n_evals=N_EVALS)

        cell_ids, features, cell_types, cell_subtypes = self.load_data()
        self.datasets = stratified_kfold(
            features, cell_subtypes, [cell_ids, cell_types, cell_subtypes],
            n_folds=N_FOLDS, convert_labels_to_int=True)
        self.logger.info(
            "Loaded {}g Usoskin dataset".format(
                self.input_size))

        self.setup_cross_validation(n_folds=N_FOLDS,
                                    datasets=self.datasets,
                                    model_class=VAE,
                                    epochs=MAX_EPOCHS)
    def __init__(self, debug=False):
        super(Experiment, self).__init__(debug)

        self.experiment_name = "train_usokin-100g-2layer-vae"
        if self.debug:
            self.experiment_name = "DEBUG_" + self.experiment_name

        self.setup_dir()
        self.setup_logger()
        self.setup_hyperopt(n_evals=50)

        self.input_size = 100
        cell_ids, features, cell_types, cell_subtypes = self.load_data()
        self.datasets = stratified_kfold(
            features, cell_subtypes,
            [cell_ids, cell_types, cell_subtypes],
            n_folds=5, convert_labels_to_int=True)
        self.logger.info("Loaded 100g, standardized Usokin dataset")

        self.setup_cross_validation(n_folds=5,
                                    datasets=self.datasets,
                                    model_class=VAE)
    if root is not None:
        filepath = root + "/" + filepath

    delimiter = str(delimiter) if six.PY2 else delimiter

    with open(filepath, "w") as f:
        writer = csv.writer(
            f, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
        for r in data:
            writer.writerow(r)


cell_ids, features, cell_types, cell_subtypes = load_data()

datasets = stratified_kfold(
    features, cell_subtypes,
    [cell_ids, cell_types, cell_subtypes],
    n_folds=5, convert_labels_to_int=True)
full_dataset = Dataset.concatenate(*datasets)
n_epochs = 200

final_vae = VAE(model_config)
final_vae.train(full_dataset,
                epochs=n_epochs, batch_size=model_config["batch_size"])
loss = final_vae.evaluate(full_dataset)
print(loss)

latent_reps = final_vae.encode(full_dataset.features)
results = np.hstack((
    np.expand_dims(full_dataset.sample_data[0], axis=1),
    latent_reps,
    np.expand_dims(full_dataset.sample_data[1], axis=1),