def testObjectKeyGeneration(self): ''' ensures that the diskcache object's location does not change ''' CACHE_DIR = get_cache_dir(3) d = DiskCache(CACHE_DIR) getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x)) d.fetchObjectId(1, str, 1) assert exists(getCacheLocation(1)) d.fetch(str, 2) assert exists(getCacheLocation(((2, ), ())))
def testObjectKeyGeneration(self): ''' ensures that the diskcache object's location does not change ''' CACHE_DIR = get_cache_dir(3) d = DiskCache(CACHE_DIR) getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x)) d.fetchObjectId(1, str, 1) assert exists( getCacheLocation(1) ) d.fetch(str, 2) assert exists( getCacheLocation( ((2,), ()) ))
class Calais(object): submitter = USER_AGENT % "Calais" allow_distro = "false" allow_search = "false" api_key = "" def __init__(self, submitter, api_key=OPENCALAIS_KEY, allow_distro="false", allow_search="false", cache_dir=OPENCALAIS_CACHE_DIR): """ Creates a new handler for communicating with OpenCalais. The parameter 'submitter' must contain a string, identifying your application. 'api_key' must contain a string with your OpenCalais API key (get it here: http://developer.opencalais.com/apps/register). The optional parameter 'allow_distro', if set to 'true' gives OpenCalais permission to distribute the metadata extracted from your submissions. The default value for 'allow_distro' is 'false'. The optional parameter 'allow_search', if set to 'true' tells OpenCalais that future searches can be performed on the extracted metadata. The default value for 'allow_search' is 'false'. """ assert (api_key) self.submitter = submitter self.allow_distro = "false" self.allow_search = "false" self.api_key = api_key if cache_dir: self.cache = DiskCache(cache_dir, cache_nesting_level=2, cache_file_suffix=".xml") @staticmethod def random_id(self): """ Creates a random 10-character ID for your submission. """ chars = str.letters + str.digits return "".join([choice(chars) for i in xrange(10)]) @staticmethod def content_id(text): """ Creates a SHA1 hash of the text of your submission. """ try: import hashlib h = hashlib.sha1() except ImportError: import sha h = sha.new() h.update(text) return h.hexdigest() def analyze(self, text, content_type="text/txt"): """ Submits 'text' to OpenCalais for analysis and memorizes the extracted metadata. Set the content-type to 'text/html' if you are submitting HTML data. """ externalID = self.content_id(text) paramsXML = PARAMS_XML % (content_type, self.allow_distro, self.allow_search, externalID, self.submitter) param = urlencode({ 'licenseID': self.api_key, 'content': text, 'paramsXML': paramsXML }) # do not fetch the data again, if a file exists in the cache get_calais_data = lambda x: Retrieve(Calais.__name__).open( OPENCALAIS_URL, x).read() if self.cache is None: xml_data = self.unpack(get_calais_data(param)) else: xml_data = self.unpack(self.cache.fetch(get_calais_data, param)) return self.parse(xml_data) @staticmethod def unpack(calais_data): """ extracts calais' xml response from the data send by the calais webservice """ dom = minidom.parseString(calais_data) return """<?xml version="1.0" encoding="utf-8"?>\n""" \ + dom.getElementsByTagName("string")[0].firstChild.data @staticmethod def cleanup_xml(xml_data): """ removes comments from xml-data-streams provided by opencalais @param[in] xml_data @returns the xml data without any comments """ result = [] comment = False while '<!--' in xml_data: xml_data = re.sub('<!--[\s\S]*?-->', '', xml_data) if not re.search('<!--', xml_data): break return xml_data @staticmethod def parse(xml_data): """ parses opencalai's xml output and returns it's dictionary representation """ things = [] xml_data = Calais.cleanup_xml(xml_data) # f= open("tmp","w"); f.write(xml_data.encode("utf8")); f.close() dom = minidom.parseString(xml_data.encode("utf8")) for document in dom.getElementsByTagName("CalaisSimpleOutputFormat"): for annotations in document.childNodes: if not annotations.hasChildNodes(): continue if annotations.nodeName == 'Topics': annotations = annotations.firstChild nodeName = annotations.nodeName nodeAttr = dict(annotations.attributes.items()) nodeAttr.update({'data': annotations.firstChild.data}) things.append({nodeName: nodeAttr}) return things
class SkipTestDiskCached(TestCached): @staticmethod @DiskCached(get_cache_dir(1)) def add(a=1, b=2): return a + b @staticmethod @DiskCached(get_cache_dir(2)) def sub(a, b): return a - b def setUp(self): self.diskCache = DiskCache(get_cache_dir(4)) def tearDown(self): ''' remove the cache directories ''' for cacheDirNo in range(10): if exists(get_cache_dir(cacheDirNo)): rmtree(get_cache_dir(cacheDirNo)) def testObjectKeyGeneration(self): ''' ensures that the diskcache object's location does not change ''' CACHE_DIR = get_cache_dir(3) d = DiskCache(CACHE_DIR) getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x)) d.fetchObjectId(1, str, 1) assert exists(getCacheLocation(1)) d.fetch(str, 2) assert exists(getCacheLocation(((2, ), ()))) def testContains(self): ''' verifies that 'key' in cache works ''' # diskcache assert self.diskCache.fetchObjectId(1, str, 1) == "1" assert 1 in self.diskCache assert 2 not in self.diskCache # diskcached assert self.add(12, 14) == 26 assert self.add.getKey(12, 14) in self.add assert 9 not in self.add def testDelItem(self): ''' verifies that delitem works ''' # diskcache assert self.diskCache.fetch(str, 2) == "2" key = self.diskCache.getKey(2) assert key in self.diskCache del self.diskCache[key] assert key not in self.diskCache # diskcached assert self.add(12, 13) == 25 key = self.add.getKey(12, 13) assert key == ((12, 13), ()) assert key in self.add del self.add[key] assert key not in self.add def testDirectCall(self): ''' tests directly calling the cache object using __call__ ''' CACHE_DIR = get_cache_dir(4) cached_str = DiskCache(CACHE_DIR, fn=str) assert cached_str(7) == "7" assert cached_str.getKey(7) in cached_str def testIterableCache(self): ''' tests the iterable cache ''' CACHE_DIR = get_cache_dir(5) i = IterableCache(CACHE_DIR) getTestIterator = lambda x: list(range(x)) for iteratorSize in (4, 5, 6): cachedIterator = i.fetch(getTestIterator, iteratorSize) for x, y in zip(cachedIterator, getTestIterator(iteratorSize)): assert x == y @pytest.mark.slow def testThreadSafety(self): ''' tests whether everything is thread safe ''' for a in range(1000): c = DiskCache(get_cache_dir(6)) p = Pool(12) p.map(f, 60 * [c]) p.map(g, 60 * [c]) p.close() p.join()
class Calais: submitter = USER_AGENT % "Calais" allow_distro = "false" allow_search = "false" api_key = "" def __init__(self, submitter, api_key=OPENCALAIS_KEY, allow_distro="false", allow_search="false", cache_dir=OPENCALAIS_CACHE_DIR): """ Creates a new handler for communicating with OpenCalais. The parameter 'submitter' must contain a string, identifying your application. 'api_key' must contain a string with your OpenCalais API key (get it here: http://developer.opencalais.com/apps/register). The optional parameter 'allow_distro', if set to 'true' gives OpenCalais permission to distribute the metadata extracted from your submissions. The default value for 'allow_distro' is 'false'. The optional parameter 'allow_search', if set to 'true' tells OpenCalais that future searches can be performed on the extracted metadata. The default value for 'allow_search' is 'false'. """ assert(api_key) self.submitter = submitter self.allow_distro = "false" self.allow_search = "false" self.api_key = api_key if cache_dir: self.cache = DiskCache(cache_dir, cache_nesting_level=2, cache_file_suffix=".xml") @staticmethod def random_id(self): """ Creates a random 10-character ID for your submission. """ chars = str.letters + str.digits return "".join( [ choice(chars) for i in xrange(10) ] ) @staticmethod def content_id(text): """ Creates a SHA1 hash of the text of your submission. """ try: import hashlib h = hashlib.sha1() except ImportError: import sha h = sha.new() h.update(text) return h.hexdigest() def analyze(self, text, content_type="text/txt"): """ Submits 'text' to OpenCalais for analysis and memorizes the extracted metadata. Set the content-type to 'text/html' if you are submitting HTML data. """ externalID = self.content_id( text ) paramsXML = PARAMS_XML % (content_type, self.allow_distro, self.allow_search, externalID, self.submitter) param = urlencode({'licenseID':self.api_key, 'content':text, 'paramsXML':paramsXML}) # do not fetch the data again, if a file exists in the cache get_calais_data = lambda x: Retrieve(Calais.__name__).open(OPENCALAIS_URL, x).read() if self.cache is None: xml_data = self.unpack( get_calais_data( param ) ) else: xml_data = self.unpack( self.cache.fetch( get_calais_data, param ) ) return self.parse( xml_data ) @staticmethod def unpack(calais_data): """ extracts calais' xml response from the data send by the calais webservice """ dom = minidom.parseString(calais_data) return """<?xml version="1.0" encoding="utf-8"?>\n""" \ + dom.getElementsByTagName("string")[0].firstChild.data @staticmethod def cleanup_xml(xml_data): """ removes comments from xml-data-streams provided by opencalais @param[in] xml_data @returns the xml data without any comments """ result = [] comment = False while '<!--' in xml_data: xml_data = re.sub('<!--[\s\S]*?-->', '', xml_data) if not re.search('<!--', xml_data): break return xml_data @staticmethod def parse(xml_data): """ parses opencalai's xml output and returns it's dictionary representation """ things = [] xml_data = Calais.cleanup_xml(xml_data) # f= open("tmp","w"); f.write(xml_data.encode("utf8")); f.close() dom = minidom.parseString( xml_data.encode("utf8" )) for document in dom.getElementsByTagName("CalaisSimpleOutputFormat"): for annotations in document.childNodes: if not annotations.hasChildNodes(): continue if annotations.nodeName == 'Topics': annotations = annotations.firstChild nodeName = annotations.nodeName nodeAttr = dict(annotations.attributes.items()) nodeAttr.update( {'data': annotations.firstChild.data } ) things.append( {nodeName: nodeAttr } ) return things
class SkipTestDiskCached(TestCached): @staticmethod @DiskCached(get_cache_dir(1)) def add(a=1, b=2): return a+b @staticmethod @DiskCached(get_cache_dir(2)) def sub(a, b): return a-b def setUp(self): self.diskCache = DiskCache(get_cache_dir(4)) def tearDown(self): ''' remove the cache directories ''' for cacheDirNo in range(10): if exists(get_cache_dir(cacheDirNo)): rmtree(get_cache_dir(cacheDirNo)) def testObjectKeyGeneration(self): ''' ensures that the diskcache object's location does not change ''' CACHE_DIR = get_cache_dir(3) d = DiskCache(CACHE_DIR) getCacheLocation = lambda x: join(CACHE_DIR, Cache.getObjectId(x)) d.fetchObjectId(1, str, 1) assert exists( getCacheLocation(1) ) d.fetch(str, 2) assert exists( getCacheLocation( ((2,), ()) )) def testContains(self): ''' verifies that 'key' in cache works ''' # diskcache assert self.diskCache.fetchObjectId(1, str, 1 ) == "1" assert 1 in self.diskCache assert 2 not in self.diskCache # diskcached assert self.add(12,14) == 26 assert self.add.getKey(12,14) in self.add assert 9 not in self.add def testDelItem(self): ''' verifies that delitem works ''' # diskcache assert self.diskCache.fetch(str, 2) == "2" key = self.diskCache.getKey(2) assert key in self.diskCache del self.diskCache[key] assert key not in self.diskCache # diskcached assert self.add(12,13) == 25 key = self.add.getKey(12, 13) assert key == ((12, 13), ()) assert key in self.add del self.add[key] assert key not in self.add def testDirectCall(self): ''' tests directly calling the cache object using __call__ ''' CACHE_DIR = get_cache_dir(4) cached_str = DiskCache(CACHE_DIR, fn=str) assert cached_str(7) == "7" assert cached_str.getKey(7) in cached_str def testIterableCache(self): ''' tests the iterable cache ''' CACHE_DIR = get_cache_dir(5) i = IterableCache(CACHE_DIR) getTestIterator = lambda x: range(x) for iteratorSize in (4, 5, 6): cachedIterator = i.fetch( getTestIterator, iteratorSize ) for x,y in zip(cachedIterator, getTestIterator(iteratorSize)): assert x == y @pytest.mark.slow def testThreadSafety(self): ''' tests whether everything is thread safe ''' for a in range(1000): c = DiskCache(get_cache_dir(6)) p = Pool(12) p.map(f, 60*[c] ) p.map(g, 60*[c] ) p.close() p.join()