Exemple #1
0
    def testObjectKeyGeneration(self):
        ''' ensures that the diskcache object's location does not change '''
        CACHE_DIR = get_cache_dir(3)
        d = DiskCache(CACHE_DIR)
        getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x))

        d.fetchObjectId(1, str, 1)
        assert exists(getCacheLocation(1))

        d.fetch(str, 2)
        assert exists(getCacheLocation(((2, ), ())))
Exemple #2
0
 def testObjectKeyGeneration(self):
     ''' ensures that the diskcache object's location does not change '''   
     CACHE_DIR = get_cache_dir(3)
     d = DiskCache(CACHE_DIR)
     getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x))
     
     d.fetchObjectId(1, str, 1)
     assert exists( getCacheLocation(1) )
     
     d.fetch(str, 2)
     assert exists( getCacheLocation( ((2,), ()) ))
def evalRelationTypes(fname, fGoldStd, methodLabel, method ):
    """ evaluates the given ontology and writes the results into a file 
    @param[in] fname        file name of the ontology to evaluate
    @param[in] fGoldStd     file name of the gold standard ontology
    @param[in] methodLabel  label of the method used in the evaluation
    @param[in] method       method used in the evaluator
    """
    goldStd  = _readOntology( fGoldStd )
    ontology = _readOntology( fname )

    goldStdConcepts  = set(map(str, extractRelationSet(goldStd)))
    ontologyConcepts = set(map(str, extractRelationSet(ontology)))

    log.info("Comparing the relation set %s to the gold standard %s." % (ontologyConcepts, goldStdConcepts))

    res = [ 1 ]
    for scoringMethod in (EqualRel, EqualGroup, SimilarGroup):
        __cache__ = DiskCache(".diskCache-%s-%s" % (scoringMethod.__name__, os.path.basename(fGoldStd)) )
        c = ConceptScoring(ontologyConcepts, goldStdConcepts, scoringMethod, '|')
        key = "%s, %s |" % (ontologyConcepts, goldStdConcepts)
        score = __cache__.fetchObjectId(key, c.score)
        res.append(score)
        # compute precision and recall
        p = float(score) / len(ontologyConcepts)
        r = float(score) / len(goldStdConcepts)
        if p==0. and r== 0.:
            res.append(0.)
        else:
            res.append( metrics.fMeasure(p,r) )

    #print ">>>", len(goldStdConcepts), len(ontologyConcepts), "***", res
    return res
Exemple #4
0
 def __init__(self, dataSource, cache=True):
     """ @param[in] dataSource implementing the TagInfoService Interface """
     assert isinstance(dataSource, TagInfoService)
     self.dataSource = dataSource
     if cache == True:
         diskCache = DiskCache("./.coherence-tagcount-cache", 2)
         self.getTagCount = lambda tt: diskCache.fetchObjectId(self.dataSource.__class__.__name__ + str(tt),
                                                               self.dataSource.getTagInfo, tt)
     else:
         self.getTagCount = self.dataSource.getTagInfo
Exemple #5
0
 def __init__(self, dataSource, cache=True):
     """ @param[in] dataSource implementing the TagInfoService Interface """
     assert isinstance(dataSource, TagInfoService)
     self.dataSource = dataSource
     if cache == True:
         diskCache = DiskCache("./.coherence-tagcount-cache", 2)
         self.getTagCount = lambda tt: diskCache.fetchObjectId(self.dataSource.__class__.__name__ + str(tt),
                                                               self.dataSource.getTagInfo, tt)
     else:
         self.getTagCount = self.dataSource.getTagInfo
def evalOntology( fname, fGoldStd, methodLabel, method ):
    """ evaluates the given ontology and writes the results into a file 
    @param[in] fname        file name of the ontology to evaluate
    @param[in] fGoldStd     file name of the gold standard ontology
    @param[in] methodLabel  label of the method used in the evaluation
    @param[in] method       method used in the evaluator
    """
    
    goldStd  = _readOntology( fGoldStd )
    ontology = _readOntology( fname )

    goldStdConcepts  = OntologyConcept.sequenceToOntologyConceptList(extractConceptSet(goldStd))
    ontologyConcepts = OntologyConcept.sequenceToOntologyConceptList(extractConceptSet(ontology))

    log.info("Comparing the ontology concepts %s to the gold standard %s." % (ontologyConcepts, goldStdConcepts))

    res = [ conceptTermCount( ontology ) ]
    for scoringMethod in (EqualTerm, StringEditTerm, PhoneticTerm, WordNetTerm, WikipediaTerm, WebDocumentTerm, GoogleDistanceTerm, OntologyTerm, ):
        __cache__ = DiskCache(".diskCache-%s-%s" % (scoringMethod.__name__, os.path.basename(fGoldStd)) )
        # Methods using neighbor concepts
        if scoringMethod in (WebDocumentTerm, ):
            goldNeighborConcepts = OntologyConcept.statementsToDirectNeighborOntologyConceptList( extractSPO(goldStd) )
            ontoNeighborConcepts = OntologyConcept.statementsToDirectNeighborOntologyConceptList( extractSPO(ontology) )
            c = ConceptScoring(ontoNeighborConcepts, goldNeighborConcepts, scoringMethod, '|', poolSize=1)
            key = "%s, %s |" % (ontoNeighborConcepts, goldNeighborConcepts)

        # methods using all concepts
        else:
            ps = 1 if scoringMethod == OntologyTerm else 4
            c = ConceptScoring(ontologyConcepts, goldStdConcepts, scoringMethod, '|', poolSize=ps)
            key = "%s, %s |" % (ontologyConcepts, goldStdConcepts)

        score = __cache__.fetchObjectId(key, c.score)
        print scoringMethod, score
        res.append(score)
        # compute precision and recall
        p = float(score) / len(ontologyConcepts)
        r = float(score) / len(goldStdConcepts)
        if p==0. and r== 0.:
            res.append(0.)
        else:
            res.append( metrics.fMeasure(p,r) )

    return res
Exemple #7
0
class SkipTestDiskCached(TestCached):
    @staticmethod
    @DiskCached(get_cache_dir(1))
    def add(a=1, b=2):
        return a + b

    @staticmethod
    @DiskCached(get_cache_dir(2))
    def sub(a, b):
        return a - b

    def setUp(self):
        self.diskCache = DiskCache(get_cache_dir(4))

    def tearDown(self):
        ''' remove the cache directories '''
        for cacheDirNo in range(10):
            if exists(get_cache_dir(cacheDirNo)):
                rmtree(get_cache_dir(cacheDirNo))

    def testObjectKeyGeneration(self):
        ''' ensures that the diskcache object's location does not change '''
        CACHE_DIR = get_cache_dir(3)
        d = DiskCache(CACHE_DIR)
        getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x))

        d.fetchObjectId(1, str, 1)
        assert exists(getCacheLocation(1))

        d.fetch(str, 2)
        assert exists(getCacheLocation(((2, ), ())))

    def testContains(self):
        ''' verifies that 'key' in cache works '''
        # diskcache
        assert self.diskCache.fetchObjectId(1, str, 1) == "1"

        assert 1 in self.diskCache
        assert 2 not in self.diskCache

        # diskcached
        assert self.add(12, 14) == 26
        assert self.add.getKey(12, 14) in self.add
        assert 9 not in self.add

    def testDelItem(self):
        ''' verifies that delitem works '''
        # diskcache
        assert self.diskCache.fetch(str, 2) == "2"
        key = self.diskCache.getKey(2)
        assert key in self.diskCache
        del self.diskCache[key]
        assert key not in self.diskCache

        # diskcached
        assert self.add(12, 13) == 25
        key = self.add.getKey(12, 13)
        assert key == ((12, 13), ())
        assert key in self.add
        del self.add[key]
        assert key not in self.add

    def testDirectCall(self):
        ''' tests directly calling the cache object using __call__ '''
        CACHE_DIR = get_cache_dir(4)
        cached_str = DiskCache(CACHE_DIR, fn=str)

        assert cached_str(7) == "7"
        assert cached_str.getKey(7) in cached_str

    def testIterableCache(self):
        ''' tests the iterable cache '''
        CACHE_DIR = get_cache_dir(5)
        i = IterableCache(CACHE_DIR)

        getTestIterator = lambda x: list(range(x))

        for iteratorSize in (4, 5, 6):
            cachedIterator = i.fetch(getTestIterator, iteratorSize)

            for x, y in zip(cachedIterator, getTestIterator(iteratorSize)):
                assert x == y

    @pytest.mark.slow
    def testThreadSafety(self):
        '''  tests whether everything is thread safe '''

        for a in range(1000):
            c = DiskCache(get_cache_dir(6))
            p = Pool(12)

            p.map(f, 60 * [c])
            p.map(g, 60 * [c])

            p.close()
            p.join()
Exemple #8
0
class SkipTestDiskCached(TestCached):
    @staticmethod
    @DiskCached(get_cache_dir(1))
    def add(a=1, b=2):
        return a+b

    @staticmethod
    @DiskCached(get_cache_dir(2))
    def sub(a, b):
        return a-b 
    
    def setUp(self):
        self.diskCache = DiskCache(get_cache_dir(4))

    def tearDown(self):
        ''' remove the cache directories '''
        for cacheDirNo in range(10):
            if exists(get_cache_dir(cacheDirNo)):
                rmtree(get_cache_dir(cacheDirNo))
        
    def testObjectKeyGeneration(self):
        ''' ensures that the diskcache object's location does not change '''   
        CACHE_DIR = get_cache_dir(3)
        d = DiskCache(CACHE_DIR)
        getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x))
        
        d.fetchObjectId(1, str, 1)
        assert exists( getCacheLocation(1) )
        
        d.fetch(str, 2)
        assert exists( getCacheLocation( ((2,), ()) ))

    def testContains(self):
        ''' verifies that 'key' in cache works '''
        # diskcache
        assert self.diskCache.fetchObjectId(1, str, 1 ) == "1"
        
        assert 1 in self.diskCache
        assert 2 not in self.diskCache
        
        # diskcached
        assert self.add(12,14) == 26
        assert self.add.getKey(12,14) in self.add
        assert 9 not in self.add
        
    def testDelItem(self):
        ''' verifies that delitem works '''
        # diskcache
        assert self.diskCache.fetch(str, 2) == "2"
        key = self.diskCache.getKey(2)
        assert key in self.diskCache
        del self.diskCache[key]
        assert key not in self.diskCache

        # diskcached
        assert self.add(12,13) == 25
        key = self.add.getKey(12, 13)
        assert key == ((12, 13), ())
        assert key in self.add
        del self.add[key]
        assert key not in self.add     
        
    def testDirectCall(self):
        ''' tests directly calling the cache object using __call__ '''
        CACHE_DIR = get_cache_dir(4)
        cached_str = DiskCache(CACHE_DIR, fn=str)
        
        assert cached_str(7) == "7"
        assert cached_str.getKey(7) in cached_str

            
    def testIterableCache(self):
        ''' tests the iterable cache '''
        CACHE_DIR = get_cache_dir(5)
        i = IterableCache(CACHE_DIR)

        getTestIterator = lambda x: range(x)

        for iteratorSize in (4, 5, 6):
            cachedIterator = i.fetch( getTestIterator, iteratorSize )
            
            for x,y in zip(cachedIterator, getTestIterator(iteratorSize)):
                assert x == y

    @pytest.mark.slow
    def testThreadSafety(self):
        '''  tests whether everything is thread safe '''

        for a in range(1000):
            c = DiskCache(get_cache_dir(6))
            p = Pool(12)

            p.map(f, 60*[c] )
            p.map(g, 60*[c] )

            p.close()
            p.join()