def run_ensemble(model_class, parameters, param_set_id, seed_base, number_of_trajectories, storage_mode="Shared"): """ Generates an ensemble consisting of number_of_trajectories realizations by running the model 'nt' number of times. The resulting result objects are serialized and written to one of the MOLNs storage locations, each assigned a random filename. The default behavior is to write the files to the Shared storage location (global non-persistent). Optionally, files can be written to the Object Store (global persistent), storage_model="Persistent" Returns: a list of filenames for the serialized result objects. """ import sys import uuid from molnsutil import PersistentStorage, LocalStorage, SharedStorage if storage_mode == "Shared": storage = SharedStorage() elif storage_mode == "Persistent": storage = PersistentStorage() else: raise MolnsUtilException( "Unknown storage type '{0}'".format(storage_mode)) # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format( type(e), e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver filenames = [] processes = [] results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] for result in results: try: # We should try to thread this to hide latency in file upload... filename = str(uuid.uuid1()) storage.put(filename, result) filenames.append(filename) except: raise return {'filenames': filenames, 'param_set_id': param_set_id}
def write_file(storage_mode,filename, result): from molnsutil import LocalStorage, SharedStorage, PersistentStorage if storage_mode=="Shared": storage = SharedStorage() elif storage_mode=="Persistent": storage = PersistentStorage() else: raise MolnsUtilException("Unknown storage type '{0}'".format(storage_mode)) storage.put(filename, result)
def write_file(storage_mode, filename, result): from molnsutil import LocalStorage, SharedStorage, PersistentStorage if storage_mode == "Shared": storage = SharedStorage() elif storage_mode == "Persistent": storage = PersistentStorage() else: raise MolnsUtilException( "Unknown storage type '{0}'".format(storage_mode)) storage.put(filename, result)
def run_ensemble(model_class, parameters, param_set_id, seed_base, number_of_trajectories, storage_mode="Shared"): """ Generates an ensemble consisting of number_of_trajectories realizations by running the model 'nt' number of times. The resulting result objects are serialized and written to one of the MOLNs storage locations, each assigned a random filename. The default behavior is to write the files to the Shared storage location (global non-persistent). Optionally, files can be written to the Object Store (global persistent), storage_model="Persistent" Returns: a list of filenames for the serialized result objects. """ import sys import uuid from molnsutil import PersistentStorage, LocalStorage, SharedStorage if storage_mode=="Shared": storage = SharedStorage() elif storage_mode=="Persistent": storage = PersistentStorage() else: raise MolnsUtilException("Unknown storage type '{0}'".format(storage_mode)) # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format(type(e),e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver filenames = [] processes=[] results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] for result in results: try: # We should try to thread this to hide latency in file upload... filename = str(uuid.uuid1()) storage.put(filename, result) filenames.append(filename) except: raise return {'filenames':filenames, 'param_set_id':param_set_id}
def delete_realizations(self): """ Delete realizations from the storage. """ if self.storage_mode is None: return elif self.storage_mode == "Shared": ss = SharedStorage() elif self.storage_mode == "Persistent": ss = PersistentStorage() for param_set_id in self.result_list: for filename in self.result_list[param_set_id]: try: ss.delete(filename) except OSError as e: pass
def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed = 0 res = None result = None for i, filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format( type(e), e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format( type(e), e) if result is None: notes = "Error could not find file '{0}' in storage\n".format( filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed += 1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format( type(e), e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return { 'result': res, 'param_set_id': param_set_id, 'num_sucessful': num_processed, 'num_failed': len(results) - num_processed }
def map_and_aggregate(results, param_set_id, mapper, aggregator=None, cache_results=False): """ Reduces a list of results by applying the map function 'mapper'. When this function is applied on an engine, it will first look for the result object in the local ephemeral storage (cache), then in the Shared area (global non-persistent), then in the Object Store (global persistent). If cache_results=True, then result objects will be written to the local epehemeral storage (file cache), so subsequent postprocessing jobs may run faster. """ import dill import numpy from molnsutil import PersistentStorage, LocalStorage, SharedStorage ps = PersistentStorage() ss = SharedStorage() ls = LocalStorage() if aggregator is None: aggregator = builtin_aggregator_list_append num_processed=0 res = None result = None for i,filename in enumerate(results): enotes = '' result = None try: result = ls.get(filename) except Exception as e: enotes += "In fetching from local store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ss.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from shared store, caught {0}: {1}\n".format(type(e),e) if result is None: try: result = ps.get(filename) if cache_results: ls.put(filename, result) except Exception as e: enotes += "In fetching from global store, caught {0}: {1}\n".format(type(e),e) if result is None: notes = "Error could not find file '{0}' in storage\n".format(filename) notes += enotes raise MolnsUtilException(notes) try: mapres = mapper(result) res = aggregator(mapres, res) num_processed +=1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format(type(e),e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return {'result':res, 'param_set_id':param_set_id, 'num_sucessful':num_processed, 'num_failed':len(results)-num_processed}