Beispiel #1
0
 def load_slim(self, module: Module, slim_url: str, slim_cache_path: str):
     if slim_url and slim_cache_path:
         relations = None
         if module == Module.GO:
             relations = self.go_relations
         elif module == Module.DO_EXPERIMENTAL:
             relations = self.do_relations
         elif module == Module.EXPRESSION:
             relations = None
         slim_onto = OntologyFactory().create(
             self._get_cached_file(file_source_url=slim_url,
                                   cache_path=slim_cache_path)).subontology(
                                       relations=relations)
         slim_set = set([
             node for node in slim_onto.nodes()
             if "type" in slim_onto.node(node)
             and slim_onto.node(node)["type"] == "CLASS"
         ])
         if module == Module.GO:
             logger.info("Setting GO Slim")
             self.go_slim = slim_set
         elif module == Module.DO_EXPERIMENTAL:
             logger.info("Setting DO Slim")
             self.do_slim = slim_set
         elif module == Module.EXPRESSION:
             logger.info("Setting Expression Slim")
             self.exp_slim = slim_set
Beispiel #2
0
def test_semsearch():
    afa = AssociationSetFactory()
    f = POMBASE
    ont = OntologyFactory().create(ONT)
    parser = GafParser()
    assocs = parser.parse(POMBASE, skipheader=True)
    assocs = [a for a in assocs if a['subject']['label'] in GENES]
    aset = afa.create_from_assocs(assocs, ontology=ont)
    ont = aset.subontology()
    aset.ontology = ont
    logging.info('Genes={} Terms={}'.format(len(aset.subjects),
                                            len(ont.nodes())))

    print('STATS={}'.format(aset.as_dataframe().describe()))

    #genes = aset.subjects[0:5]
    sse = SemSearchEngine(assocmodel=aset)

    logging.info('Calculating all MICAs')
    sse.calculate_all_micas()

    #h5path = 'tests/resources/mica_ic.h5'
    #logging.info('Saving to {}'.format(h5path))
    #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w')
    #logging.info('Saved to {}'.format(h5path))

    logging.info('Doing pairwise')
    for i in aset.subjects:
        for j in aset.subjects:
            sim = sse.pw_score_cosine(i, j)
            #print('{} x {} = {}'.format(i,j,sim))
            if i == j:
                assert (sim > 0.9999)
            tups = sse.pw_score_resnik_bestmatches(i, j)
            print('{} x {} = {} // {}'.format(i, j, sim, tups))