Ejemplo n.º 1
0
def train_autoencoder_all(methylation_array, latent_dimension):
    val_size = int(methylation_array.shape[0] * 0.1)
    val_set = AutoencoderGenerator(methylation_array.iloc[:val_size, :])
    train_set = AutoencoderGenerator(methylation_array.iloc[val_size:, :])

    # Autoencoder training
    encoder = Giskard(methylation_array.shape[1],
                      latent_dimension=latent_dimension,
                      model_serialization_path="../data/models/")
    encoder.fit(train_set,
                val_set,
                500,
                callbacks=[
                    EarlyStopping(monitor="val_loss",
                                  min_delta=0.05,
                                  patience=10)
                ])
    return encoder
Ejemplo n.º 2
0
# dataset = pickle.load(open("../data/mrna_exp_all.pkl", "rb"))[over_rate_mrna]
dataset = dataset[over_rate_genes]

# Generation of training and validation set
val_size = int(dataset.shape[0] * 0.1)
validation_set = AutoencoderGenerator(dataset.iloc[:val_size, :])
training_set = AutoencoderGenerator(dataset.iloc[val_size:, :])

# Autoencoder training
mrna_encoder = Giskard(dataset.shape[1],
                       latent_dimension=ld,
                       model_serialization_path="../data/models/")
mrna_encoder.fit(
    training_set,
    validation_set,
    2000,
    callbacks=[EarlyStopping(monitor="val_loss", min_delta=0.05, patience=10)])

# Creating an embedded representation of the mRNA methylation array
mrna_to_encode = pickle.load(open("../data/mrna_exp_ma.pkl", "rb"))
mrna_to_encode["beta"] = mrna_to_encode["beta"].rename(
    columns=lambda g: g.split('.')[0])[over_rate_genes]
mrna_dataset = mrna_encoder.encode_methylation_array(mrna_to_encode)
pickle.dump(mrna_dataset, open("../data/mrna_embedded_pam.pkl", "wb"))

# Just a check on ground truth
gt_check = pd.read_csv("../data/brca_tcga_pub_clinical_data.tsv",
                       sep="\t",
                       na_filter=False,
                       index_col="Patient ID")