Ejemplo n.º 1
0
def train_autoencoder_all(methylation_array, latent_dimension):
    val_size = int(methylation_array.shape[0] * 0.1)
    val_set = AutoencoderGenerator(methylation_array.iloc[:val_size, :])
    train_set = AutoencoderGenerator(methylation_array.iloc[val_size:, :])

    # Autoencoder training
    encoder = Giskard(methylation_array.shape[1],
                      latent_dimension=latent_dimension,
                      model_serialization_path="../data/models/")
    encoder.fit(train_set,
                val_set,
                500,
                callbacks=[
                    EarlyStopping(monitor="val_loss",
                                  min_delta=0.05,
                                  patience=10)
                ])
    return encoder
Ejemplo n.º 2
0
for m in pam50_mirnas:
    for gene in mirnas[m]:
        if gene[2] > 0.8:
            over_rate_genes.append(gene[0])

# dataset = pickle.load(open("../data/mrna_exp_all.pkl", "rb"))[over_rate_mrna]
dataset = dataset[over_rate_genes]

# Generation of training and validation set
val_size = int(dataset.shape[0] * 0.1)
validation_set = AutoencoderGenerator(dataset.iloc[:val_size, :])
training_set = AutoencoderGenerator(dataset.iloc[val_size:, :])

# Autoencoder training
mrna_encoder = Giskard(dataset.shape[1],
                       latent_dimension=ld,
                       model_serialization_path="../data/models/")
mrna_encoder.fit(
    training_set,
    validation_set,
    2000,
    callbacks=[EarlyStopping(monitor="val_loss", min_delta=0.05, patience=10)])

# Creating an embedded representation of the mRNA methylation array
mrna_to_encode = pickle.load(open("../data/mrna_exp_ma.pkl", "rb"))
mrna_to_encode["beta"] = mrna_to_encode["beta"].rename(
    columns=lambda g: g.split('.')[0])[over_rate_genes]
mrna_dataset = mrna_encoder.encode_methylation_array(mrna_to_encode)
pickle.dump(mrna_dataset, open("../data/mrna_embedded_pam.pkl", "wb"))

# Just a check on ground truth