def load_data(organism, conf_parser: GenedescConfigParser): logger = logging.getLogger("WB Gene Description Pipeline - Data loader") sister_df = None df_agr = None organisms_info = conf_parser.get_wb_organisms_info() df = WBDataManager(species=organism, do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=conf_parser) if organism == "c_elegans": df_agr = DataManager(go_relations=["subClassOf", "BFO:0000050"], do_relations=None) df_agr.load_ontology_from_file(ontology_type=DataType.GO, ontology_url=conf_parser.get_wb_human_orthologs_go_ontology(), ontology_cache_path=os.path.join(conf_parser.get_cache_dir(), "wormbase_agr_human", "go_ontology.obo"), config=conf_parser) df_agr.load_associations_from_file(associations_type=DataType.GO, associations_url=conf_parser.get_wb_human_orthologs_go_associations(), associations_cache_path=os.path.join( conf_parser.get_cache_dir(), "wormbase_agr_human", "go_assoc.daf.gz"), config=conf_parser) if "main_sister_species" in organisms_info[organism] and organisms_info[organism]["main_sister_species"]: sister_df = WBDataManager(species=organisms_info[organism]["main_sister_species"], do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=conf_parser) logger.info("Loading GO data for sister species") sister_df.load_ontology_from_file(ontology_type=DataType.GO, ontology_url=sister_df.go_ontology_url, ontology_cache_path=sister_df.go_ontology_cache_path, config=conf_parser) sister_df.load_associations_from_file(associations_type=DataType.GO, associations_url=sister_df.go_associations_url, associations_cache_path=sister_df.go_associations_cache_path, config=conf_parser) logger.info("Loading all data for main species") df.load_all_data_from_file() return df, sister_df, df_agr
class TestGOModule(unittest.TestCase): def setUp(self): logging.basicConfig( filename=None, level="ERROR", format='%(asctime)s - %(name)s - %(levelname)s: %(message)s') logger.info("Starting DataManager tests") self.this_dir = os.path.split(__file__)[0] self.conf_parser = GenedescConfigParser( os.path.join(self.this_dir, "config_test_wb.yml")) self.df = WBDataManager(do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=self.conf_parser, species="c_elegans") def test_load_expression_data(self): self.df.load_ontology_from_file( ontology_type=DataType.EXPR, ontology_url="file://" + os.path.join(self.this_dir, "data", "anatomy_gd_test.obo"), ontology_cache_path=os.path.join(self.this_dir, "cache", "anatomy_gd_test.obo"), config=self.conf_parser) self.df.load_associations_from_file( associations_type=DataType.EXPR, associations_url="file://" + os.path.join(self.this_dir, "data", "anatomy_gd_test.wb"), associations_cache_path=os.path.join(self.this_dir, "cache", "anatomy_gd_test.wb"), config=self.conf_parser) self.assertTrue(self.df.expression_ontology is not None) self.assertTrue('WB:WBGene00000001' in self.df.expression_associations.associations_by_subj) for annotations in self.df.expression_associations.associations_by_subj.values( ): for annotation in annotations: self.assertTrue(annotation["evidence"]["type"] == "IDA")
class TestGOModule(unittest.TestCase): def setUp(self): logging.basicConfig( filename=None, level="ERROR", format='%(asctime)s - %(name)s - %(levelname)s: %(message)s') logger.info("Starting DataManager tests") self.this_dir = os.path.split(__file__)[0] self.conf_parser = GenedescConfigParser( os.path.join(self.this_dir, "config_test_wb.yml")) self.df = WBDataManager(do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=self.conf_parser, species="c_elegans") def test_load_expression_data(self): self.df.load_ontology_from_file( ontology_type=DataType.EXPR, ontology_url="file://" + os.path.join(self.this_dir, "data", "anatomy_gd_test.obo"), ontology_cache_path=os.path.join(self.this_dir, "cache", "anatomy_gd_test.obo"), config=self.conf_parser) self.df.load_associations_from_file( associations_type=DataType.EXPR, associations_url="file://" + os.path.join(self.this_dir, "data", "anatomy_gd_test.wb"), associations_cache_path=os.path.join(self.this_dir, "cache", "anatomy_gd_test.wb"), config=self.conf_parser) self.assertTrue(self.df.expression_ontology is not None) self.assertTrue('WB:WBGene00000001' in self.df.expression_associations.associations_by_subj) for annotations in self.df.expression_associations.associations_by_subj.values( ): for annotation in annotations: self.assertTrue(annotation["evidence"]["type"] == "IDA") def test_load_disease_data(self): self.df.load_ontology_from_file( ontology_type=DataType.DO, ontology_url="file://" + os.path.join(self.this_dir, os.pardir, "data", "doid.obo"), ontology_cache_path=os.path.join(self.this_dir, "cache", "doid.obo"), config=self.conf_parser) self.df.load_associations_from_file( associations_type=DataType.DO, associations_url=self.df.do_associations_url, associations_cache_path=os.path.join(self.this_dir, "cache", "do_ann.gaf"), association_additional_cache_path=os.path.join( self.this_dir, "cache", "do_ann.daf"), association_additional_url=self.df.do_associations_new_url, config=self.conf_parser) self.assertTrue( any([ annotation["evidence"]["type"] == "IMP" for annotations in self.df.expression_associations.associations_by_subj.values() for annotation in annotations ])) def test_load_orthology_data(self): df = WBDataManager(do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=self.conf_parser, species="c_remanei") df.load_orthology_from_file() self.assertTrue(len(df.orthologs) > 0) def test_load_protein_domain_data(self): df = WBDataManager(do_relations=None, go_relations=["subClassOf", "BFO:0000050"], config=self.conf_parser, species="c_elegans") df.load_protein_domain_information() self.assertTrue(True) def test_expression_the_cell_renaming_to_widely(self): self.df.load_ontology_from_file( ontology_type=DataType.EXPR, ontology_url=self.df.expression_ontology_url, ontology_cache_path=self.df.expression_ontology_cache_path, config=self.conf_parser) self.df.load_associations_from_file( associations_type=DataType.EXPR, associations_url=self.df.expression_associations_url, associations_cache_path=self.df.expression_associations_cache_path, config=self.conf_parser) gene_desc = GeneDescription(gene_id="WB:WBGene00007352", gene_name="cdc-48.1", add_gene_name=False) expr_sentence_generator = OntologySentenceGenerator( gene_id=gene_desc.gene_id, module=Module.EXPRESSION, data_manager=self.df, config=self.conf_parser) expression_module_sentences = expr_sentence_generator.get_module_sentences( config=self.conf_parser, aspect='A', qualifier="Verified", merge_groups_with_same_prefix=True, keep_only_best_group=False) gene_desc.set_or_extend_module_description_and_final_stats( module_sentences=expression_module_sentences, module=Module.EXPRESSION) self.assertTrue("is expressed widely" in gene_desc.description)