def test_get_common_ancestors(self):
     self.load_go_ontology()
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00000912", module=Module.GO,
                                           data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue(len(common_ancestors) > 0, "Common ancestors not found")
     associations = [association for subj_associations in self.df.go_associations.associations_by_subj.values() for
                     association in subj_associations]
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043055", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0061065", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043054", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043053", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     self.df.go_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.go_ontology)
     self.conf_parser.config["go_sentences_options"]["exclude_terms"].append("GO:0040024")
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00003931", module=Module.GO,
                                                   data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue("GO:0040024" not in common_ancestors, "Common ancestors contain blacklisted term")
 def test_set_associations(self):
     associations = []
     associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0019901",
                                                              "", "F", "EXP", None, "WB", ""))
     associations.append(DataManager.create_annotation_record("", "2", "b", "protein_coding", "001", "GO:0005515",
                                                              "", "F", "EXP", None, "WB", ""))
     assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology)
     self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser)
     self.assertTrue(self.df.go_associations)
 def test_remap_associations(self):
     associations = []
     associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0018996",
                                                              "", "F", "EXP", None, "WB", ""))
     assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology)
     self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser)
     self.assertEqual(self.df.go_associations.associations_by_subj["1"][0]["object"]["id"], "GO:0042303")
 def _load_expression_cluster_file(
         self,
         file_cache_path,
         file_url,
         load_into_data,
         add_to_expression_ontology_annotations: bool = False):
     expr_clust_file = self._get_cached_file(cache_path=file_cache_path,
                                             file_source_url=file_url)
     header = True
     associations = []
     terms_ids_map = {}
     if add_to_expression_ontology_annotations:
         associations = [
             association for subj_associations in
             self.expression_associations.associations_by_subj.values()
             for association in subj_associations
         ]
     terms_replacement_regex = self.config.get_module_property(
         module=Module.EXPRESSION, prop=ConfigModuleProperty.RENAME_TERMS)
     for line in open(expr_clust_file):
         if not header:
             linearr = line.strip().split("\t")
             load_into_data[linearr[0]] = linearr[1:]
             load_into_data[
                 linearr[0]][2] = WBDataManager.get_replaced_terms_arr(
                     load_into_data[linearr[0]][2].split(","),
                     terms_replacement_regex)
             if load_into_data[linearr[0]] and load_into_data[
                     linearr[0]][3]:
                 load_into_data[linearr[0]][3] = [
                     word.replace(" study", "").replace(" analysis", "")
                     for word in load_into_data[linearr[0]][3].split(",")
                 ]
             if add_to_expression_ontology_annotations:
                 for term in load_into_data[linearr[0]][2]:
                     if term not in terms_ids_map:
                         term_ids = self.expression_ontology.resolve_names(
                             [term])
                         if term_ids:
                             terms_ids_map[term] = term_ids[0]
                         else:
                             terms_ids_map[term] = None
                     if term in terms_ids_map and terms_ids_map[term]:
                         associations.append(
                             DataManager.create_annotation_record(
                                 line, "WB:" + linearr[0], "", "gene", "",
                                 terms_ids_map[term], ["Enriched"], "A",
                                 "IDA", "", "", ""))
         else:
             header = False
     if add_to_expression_ontology_annotations:
         self.set_associations(
             DataType.EXPR,
             associations=AssociationSetFactory().create_from_assocs(
                 assocs=associations, ontology=self.expression_ontology),
             config=self.config)
Beispiel #5
0
 def get_associations(gene_id, term_ids, qualifiers, aspect, ecode):
     return [DataManager.create_annotation_record(source_line="", gene_id=gene_id, gene_symbol="", gene_type="gene",
                                                  taxon_id="", object_id=term_id, qualifiers=qualifiers,
                                                  aspect=aspect, ecode=ecode, references="", prvdr="WB",
                                                  date="") for term_id in term_ids]
Beispiel #6
0
 def load_associations_from_file(
         self,
         associations_type: DataType,
         associations_url: str,
         associations_cache_path: str,
         config: GenedescConfigParser,
         association_additional_url: str = None,
         association_additional_cache_path: str = None) -> None:
     logger.info("Loading associations from file")
     if associations_type == DataType.GO:
         super().load_associations_from_file(
             associations_type=associations_type,
             associations_url=associations_url,
             associations_cache_path=associations_cache_path,
             config=config)
     elif associations_type == DataType.EXPR:
         associations = []
         file_path = self._get_cached_file(
             cache_path=associations_cache_path,
             file_source_url=associations_url)
         for line in open(file_path):
             if not line.strip().startswith("!"):
                 linearr = line.strip().split("\t")
                 if self.expression_ontology.node(linearr[4]):
                     gene_id = linearr[0] + ":" + linearr[1]
                     qualifiers = linearr[3].split("|")
                     if len(
                             qualifiers
                     ) == 0 or "Partial" in qualifiers or "Certain" in qualifiers:
                         qualifiers = ["Verified"]
                     associations.append(
                         DataManager.create_annotation_record(
                             line, gene_id, linearr[2], linearr[11],
                             linearr[12], linearr[4], qualifiers,
                             linearr[8], linearr[6], linearr[5].split("|"),
                             linearr[14], linearr[13]))
         self.expression_associations = AssociationSetFactory(
         ).create_from_assocs(assocs=associations,
                              ontology=self.expression_ontology)
         self.expression_associations = self.remove_blacklisted_annotations(
             association_set=self.expression_associations,
             ontology=self.expression_ontology,
             terms_blacklist=config.get_module_property(
                 module=Module.EXPRESSION,
                 prop=ConfigModuleProperty.EXCLUDE_TERMS))
     elif associations_type == DataType.DO:
         self.do_associations = AssociationSetFactory().create_from_assocs(
             assocs=GafParser().parse(file=self._get_cached_file(
                 cache_path=associations_cache_path,
                 file_source_url=associations_url),
                                      skipheader=True),
             ontology=self.do_ontology)
         if association_additional_cache_path and association_additional_url:
             associations = []
             for subj_associations in self.do_associations.associations_by_subj.values(
             ):
                 for association in subj_associations:
                     if association["evidence"]["type"] == "IEA":
                         associations.append(association)
             file_path = self._get_cached_file(
                 cache_path=association_additional_cache_path,
                 file_source_url=association_additional_url)
             header = True
             for line in open(file_path):
                 if not line.strip().startswith("!"):
                     if not header:
                         linearr = line.strip().split("\t")
                         if self.do_ontology.node(
                                 linearr[10]) and linearr[16] != "IEA":
                             gene_ids = [linearr[2]]
                             if linearr[1] == "allele":
                                 gene_ids = linearr[4].split(",")
                             for gene_id in gene_ids:
                                 associations.append(
                                     DataManager.create_annotation_record(
                                         line, gene_id, linearr[3],
                                         linearr[1], linearr[0],
                                         linearr[10], linearr[9].split("|"),
                                         "D", linearr[16],
                                         linearr[18].split("|"),
                                         linearr[20], linearr[19]))
                     else:
                         header = False
             self.do_associations = AssociationSetFactory(
             ).create_from_assocs(assocs=associations,
                                  ontology=self.do_ontology)
         self.do_associations = self.remove_blacklisted_annotations(
             association_set=self.do_associations,
             ontology=self.do_ontology,
             terms_blacklist=config.get_module_property(
                 module=Module.DO_EXPERIMENTAL,
                 prop=ConfigModuleProperty.EXCLUDE_TERMS))
 def test_set_covering_with_ontology(self):
     self.load_do_ontology()
     self.conf_parser.config["do_via_orth_sentences_options"]["trimming_algorithm"] = "ic"
     self.conf_parser.config["do_via_orth_sentences_options"]["max_num_terms"] = 5
     associations = [DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080028", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080056", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14789", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080026", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14415", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080045", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:3371", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8886", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:674", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5614", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:11830", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8398", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:2256", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5327", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:1123", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date="")]
     self.df.do_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.do_ontology)
     generator = OntologySentenceGenerator(gene_id="MGI:88452", module=Module.DO_ORTHOLOGY,
                                           data_manager=self.df, config=self.conf_parser)
     sentences = generator.get_module_sentences(
         config=self.conf_parser, aspect='D', qualifier='', merge_groups_with_same_prefix=True,
         keep_only_best_group=True, high_priority_term_ids=["DOID:0080028", "DOID:0080056", "DOID:14789",
                                                            "DOID:0080026", "DOID:14415", "DOID:0080045"])
     print(sentences.get_description())