def train_dnn_classifier_deep_cc(classifier, train_set, val_set, test_data): params = { "input_shape": train_set["beta"].shape[1], "model_serialization_path": "../data/models/classifier/", "dropout_rate": 0.3, "output_shape": len(train_set["pheno"]["subtype"].unique()) } model = classifier(**params) model.fit(MethylationArrayGenerator(train_set, "subtype"), MethylationArrayGenerator(val_set, "subtype"), 40, verbose=0, callbacks=[ EarlyStopping(monitor="val_loss", min_delta=0.05, patience=20) ]) dummies = pd.get_dummies(train_set["pheno"]["subtype"]).columns.to_list() tmp_acc = [] for p, r in zip(model.predict(test_data["beta"].to_numpy()), test_data["pheno"].values.ravel()): if dummies[np.argmax(p)] == r: tmp_acc.append(1) else: tmp_acc.append(0) return model, np.mean(tmp_acc)
def train_dnn_classifier(classifier, train_set, val_set, test_data): params = {"input_shape": train_set["beta"].shape[1], "model_serialization_path": "../data/models/classifier/", "dropout_rate": 0.3, "output_shape": len(train_set["pheno"]["subtype"].unique())} model = classifier(**params) model.fit(MethylationArrayGenerator(train_set, "subtype"), MethylationArrayGenerator(val_set, "subtype"), 500, verbose=0, callbacks=[EarlyStopping(monitor="val_loss", min_delta=0.05, patience=20)]) test_accuracy = model.evaluate(test_data["beta"].to_numpy(), pd.get_dummies(test_data["pheno"]["subtype"]).to_numpy()) return model, test_accuracy
def methylation_array_kcv(dataset, model_class, model_params, output_target, k=10, verbose=0, callbacks=[]): """ KCV evaluation of a model that implements AbstractClassifier :param dataset: the methylation array filename or the methylation array itself :param model_class: the model class :param model_params: a dictionary containing the parameters to init the class :param output_target: the label :param k: the folds :param verbose: verbose mode for fit method :param callbacks: the callbacks to pass to the fit method :return: the average accuracy """ test_accuracies, val_accuracies = [], [] for i in range(k): training_set, test_set, validation_set = split_methylation_array_by_pheno( dataset, output_target) model = model_class(**model_params) model.fit(MethylationArrayGenerator(training_set, output_target), MethylationArrayGenerator(validation_set, output_target), 500, verbose=verbose, callbacks=callbacks) test_accuracies.append( model.evaluate( test_set["beta"].to_numpy(), pd.get_dummies(test_set["pheno"][output_target]).to_numpy())) val_accuracies.append( model.evaluate(validation_set["beta"].to_numpy(), pd.get_dummies(validation_set["pheno"].to_numpy()))) return sum(val_accuracies) / len(val_accuracies), sum( test_accuracies) / len(test_accuracies)