def __get_consensus_from_files(self, folder_consensus: str): for sc in self.__scoring_schemes: self.__consensus[sc] = [] for dataset in self._datasets: self.__consensus[sc].append( (dataset, Consensus.get_consensus_from_file( join_paths(folder_consensus, str(sc.b5), name_file(dataset.name)))))
def __init__(self, dataset_folder: str, dataset_selector: DatasetSelector = None): super().__init__(dataset_folder, dataset_selector) self._orphanetParser = OrphanetParser.get_orpha_base_for_vldb(join_paths(get_parent_path( get_parent_path(dataset_folder)), "supplementary_data")) self._datasets_gs = {} self._remove_datasets_empty_goldstandard()
def __init__(self, dataset_folder: str, scoring_schemes: List[ScoringScheme], top_k_to_test: List[int], algo: MedianRanking = get_algorithm(Algorithm.ParCons, parameters={"bound_for_exact": 150}), dataset_selector: DatasetSelector = None, ): super().__init__(dataset_folder=dataset_folder, dataset_selector=dataset_selector) self.__orphanetParser = OrphanetParser.get_orpha_base_for_vldb(join_paths(get_parent_path( get_parent_path(dataset_folder)), "supplementary_data")) self.__algo = algo self.__remove_useless_datasets() self.__scoring_schemes = [] self.__consensus = {} self.__scoring_schemes = scoring_schemes self.__top_k_to_test = top_k_to_test
from corankco.experiments.orphanet_parser import OrphanetParser from corankco.experiments.geneNcbiParser import GeneNcbiParser from corankco.utils import join_paths folder_input = input("folder path containing data NCBI and orphanet ?") orphaParser = OrphanetParser(join_paths(folder_input, "en_product6.xml")) geneNcbiParser = GeneNcbiParser(join_paths(folder_input, "dataGeneNCBI.txt")) res = {} cpt = 0 # print(str(geneNcbiParser.get_gene(285362))) for geneOrpha in orphaParser.get_genes(): cpt += 1 res[geneOrpha] = [] for geneNCBI in geneNcbiParser.get_genes(): sim, nb_db_common = geneOrpha.similarity(geneNCBI) if sim > 0: res[geneOrpha].append((geneNCBI, sim, nb_db_common)) if len(res[geneOrpha]) > 1: if geneOrpha.has_same_name(res[geneOrpha][0][0]): if not geneOrpha.has_same_name(res[geneOrpha][1][0]): res[geneOrpha] = [res[geneOrpha][0]] elif geneOrpha.has_same_name(res[geneOrpha][1][0]): res[geneOrpha] = [res[geneOrpha][1]] mapping_final = open(folder_input + "mapping_genes_geneNCBI_orphanet.csv", "w") for geneOrpha, mapping_ncbi in res.items(): ncbi_associated = res[geneOrpha][0][0]
def get_orpha_base_for_vldb(folder_data_files: str): path_orphanet_base = join_paths(folder_data_files, "en_product6.xml") path_mapping_ncbi = join_paths(folder_data_files, "mapping_genes_geneNCBI_orphanet.csv") path_mapping_diseases = join_paths(folder_data_files, "mappingDiseaseID.csv") return OrphanetParser.get_orpha_base(path_orphanet_base, path_mapping_ncbi, path_mapping_diseases)