def load_slim(self, module: Module, slim_url: str, slim_cache_path: str): if slim_url and slim_cache_path: relations = None if module == Module.GO: relations = self.go_relations elif module == Module.DO_EXPERIMENTAL: relations = self.do_relations elif module == Module.EXPRESSION: relations = None slim_onto = OntologyFactory().create( self._get_cached_file(file_source_url=slim_url, cache_path=slim_cache_path)).subontology( relations=relations) slim_set = set([ node for node in slim_onto.nodes() if "type" in slim_onto.node(node) and slim_onto.node(node)["type"] == "CLASS" ]) if module == Module.GO: logger.info("Setting GO Slim") self.go_slim = slim_set elif module == Module.DO_EXPERIMENTAL: logger.info("Setting DO Slim") self.do_slim = slim_set elif module == Module.EXPRESSION: logger.info("Setting Expression Slim") self.exp_slim = slim_set
def test_semsearch(): afa = AssociationSetFactory() f = POMBASE ont = OntologyFactory().create(ONT) parser = GafParser() assocs = parser.parse(POMBASE, skipheader=True) assocs = [a for a in assocs if a['subject']['label'] in GENES] aset = afa.create_from_assocs(assocs, ontology=ont) ont = aset.subontology() aset.ontology = ont logging.info('Genes={} Terms={}'.format(len(aset.subjects), len(ont.nodes()))) print('STATS={}'.format(aset.as_dataframe().describe())) #genes = aset.subjects[0:5] sse = SemSearchEngine(assocmodel=aset) logging.info('Calculating all MICAs') sse.calculate_all_micas() #h5path = 'tests/resources/mica_ic.h5' #logging.info('Saving to {}'.format(h5path)) #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w') #logging.info('Saved to {}'.format(h5path)) logging.info('Doing pairwise') for i in aset.subjects: for j in aset.subjects: sim = sse.pw_score_cosine(i, j) #print('{} x {} = {}'.format(i,j,sim)) if i == j: assert (sim > 0.9999) tups = sse.pw_score_resnik_bestmatches(i, j) print('{} x {} = {} // {}'.format(i, j, sim, tups))