def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed = 0 res = None result = None for i, filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format( type(e), e) if result is None: notes = "Error could not find file '{0}' in storage\n".format( filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed += 1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format( type(e), e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return { 'result': res, 'param_set_id': param_set_id, 'num_sucessful': num_processed, 'num_failed': len(results) - num_processed }
def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed=0 res = None result = None for i,filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format(type(e),e) if result is None: notes = "Error could not find file '{0}' in storage\n".format(filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed +=1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format(type(e),e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return {'result':res, 'param_set_id':param_set_id, 'num_sucessful':num_processed, 'num_failed':len(results)-num_processed}
class ParameterSweepResult(): """TODO""" def __init__(self, result, parameters): self.result = result self.parameters = parameters def __str__(self): return "{0} => {1}".format(self.parameters, self.result) class ParameterSweepResultList(list): def __str__(self): l = [] for i in self: l.append(str(i)) return "[{0}]".format(", ".join(l)) if __name__ == '__main__': ga = PersistentStorage() #print ga.list_buckets() ga.put('testtest.pyb', "fdkjshfkjdshfjdhsfkjhsdkjfhdskjf") print ga.get('testtest.pyb') ga.delete('testtest.pyb') ga.list() ga.put('file1', "fdlsfjdkls") ga.put('file2', "fdlsfjdkls") ga.put('file2', "fdlsfjdkls") ga.delete_all()
class ParameterSweepResult(): """TODO""" def __init__(self, result, parameters): self.result = result self.parameters = parameters def __str__(self): return "{0} => {1}".format(self.parameters, self.result) class ParameterSweepResultList(list): def __str__(self): l = [] for i in self: l.append(str(i)) return "[{0}]".format(", ".join(l)) if __name__ == '__main__': ga = PersistentStorage() #print ga.list_buckets() ga.put('testtest.pyb',"fdkjshfkjdshfjdhsfkjhsdkjfhdskjf") print ga.get('testtest.pyb') ga.delete('testtest.pyb') ga.list() ga.put('file1', "fdlsfjdkls") ga.put('file2', "fdlsfjdkls") ga.put('file2', "fdlsfjdkls") ga.delete_all()