예제 #1
0
 def __get_consensus_from_files(self, folder_consensus: str):
     for sc in self.__scoring_schemes:
         self.__consensus[sc] = []
         for dataset in self._datasets:
             self.__consensus[sc].append(
                 (dataset, Consensus.get_consensus_from_file(
                     join_paths(folder_consensus, str(sc.b5), name_file(dataset.name)))))
예제 #2
0
 def __init__(self, dataset_folder: str, dataset_selector: DatasetSelector = None):
     super().__init__(dataset_folder, dataset_selector)
     self._orphanetParser = OrphanetParser.get_orpha_base_for_vldb(join_paths(get_parent_path(
                                                                             get_parent_path(dataset_folder)),
                                                                               "supplementary_data"))
     self._datasets_gs = {}
     self._remove_datasets_empty_goldstandard()
예제 #3
0
 def __init__(self,
              dataset_folder: str,
              scoring_schemes: List[ScoringScheme],
              top_k_to_test: List[int],
              algo: MedianRanking = get_algorithm(Algorithm.ParCons, parameters={"bound_for_exact": 150}),
              dataset_selector: DatasetSelector = None,
              ):
     super().__init__(dataset_folder=dataset_folder, dataset_selector=dataset_selector)
     self.__orphanetParser = OrphanetParser.get_orpha_base_for_vldb(join_paths(get_parent_path(
                                                                             get_parent_path(dataset_folder)),
                                                                               "supplementary_data"))
     self.__algo = algo
     self.__remove_useless_datasets()
     self.__scoring_schemes = []
     self.__consensus = {}
     self.__scoring_schemes = scoring_schemes
     self.__top_k_to_test = top_k_to_test
from corankco.experiments.orphanet_parser import OrphanetParser
from corankco.experiments.geneNcbiParser import GeneNcbiParser
from corankco.utils import join_paths

folder_input = input("folder path containing data NCBI and orphanet ?")

orphaParser = OrphanetParser(join_paths(folder_input, "en_product6.xml"))

geneNcbiParser = GeneNcbiParser(join_paths(folder_input, "dataGeneNCBI.txt"))

res = {}
cpt = 0
# print(str(geneNcbiParser.get_gene(285362)))
for geneOrpha in orphaParser.get_genes():
    cpt += 1
    res[geneOrpha] = []
    for geneNCBI in geneNcbiParser.get_genes():
        sim, nb_db_common = geneOrpha.similarity(geneNCBI)
        if sim > 0:
            res[geneOrpha].append((geneNCBI, sim, nb_db_common))
            if len(res[geneOrpha]) > 1:
                if geneOrpha.has_same_name(res[geneOrpha][0][0]):
                    if not geneOrpha.has_same_name(res[geneOrpha][1][0]):
                        res[geneOrpha] = [res[geneOrpha][0]]
                elif geneOrpha.has_same_name(res[geneOrpha][1][0]):
                    res[geneOrpha] = [res[geneOrpha][1]]

mapping_final = open(folder_input + "mapping_genes_geneNCBI_orphanet.csv", "w")

for geneOrpha, mapping_ncbi in res.items():
    ncbi_associated = res[geneOrpha][0][0]
예제 #5
0
 def get_orpha_base_for_vldb(folder_data_files: str):
     path_orphanet_base = join_paths(folder_data_files, "en_product6.xml")
     path_mapping_ncbi = join_paths(folder_data_files, "mapping_genes_geneNCBI_orphanet.csv")
     path_mapping_diseases = join_paths(folder_data_files, "mappingDiseaseID.csv")
     return OrphanetParser.get_orpha_base(path_orphanet_base, path_mapping_ncbi, path_mapping_diseases)