class CachedPersistentStorage(PersistentStorage): def __init__(self, bucket_name=None): PersistentStorage.__init__(self, bucket_name) self.cache = LocalStorage(folder_name="/mnt/molnsarea/cache") def get(self, name, validate=False): self.setup_provider() # Try to read it form cache try: data = cloudpickle.loads(self.cache.get(name)) except: # if not there, read it from the Object Store and write it to the cache data = cloudpickle.loads(self.provider.get(name, validate)) try: self.cache.put(name, data) except: # For now, we just ignore errors here, like if the disk is full... pass return data
class CachedPersistentStorage(PersistentStorage): def __init__(self, bucket_name=None): PersistentStorage.__init__(self,bucket_name) self.cache = LocalStorage(folder_name = "/mnt/molnsarea/cache") def get(self, name, validate=False): self.setup_provider() # Try to read it form cache try: data = cloudpickle.loads(self.cache.get(name)) except: # if not there, read it from the Object Store and write it to the cache data = cloudpickle.loads(self.provider.get(name, validate)) try: self.cache.put(name, data) except: # For now, we just ignore errors here, like if the disk is full... pass return data
def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed = 0 res = None result = None for i, filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format( type(e), e) if result is None: notes = "Error could not find file '{0}' in storage\n".format( filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed += 1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format( type(e), e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return { 'result': res, 'param_set_id': param_set_id, 'num_sucessful': num_processed, 'num_failed': len(results) - num_processed }
def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed=0 res = None result = None for i,filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format(type(e),e) if result is None: notes = "Error could not find file '{0}' in storage\n".format(filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed +=1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format(type(e),e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return {'result':res, 'param_set_id':param_set_id, 'num_sucessful':num_processed, 'num_failed':len(results)-num_processed}