def get(self, name, validate=False): self.setup_provider() # Try to read it form cache try: data = cloudpickle.loads(self.cache.get(name)) except: # if not there, read it from the Object Store and write it to the cache data = cloudpickle.loads(self.provider.get(name, validate)) try: self.cache.put(name, data) except: # For now, we just ignore errors here, like if the disk is full... pass return data
def get(self, filename): with open(self.folder_name + "/" + filename, 'rb') as fh: if self.serialization_method == "cloudpickle": data = cloudpickle.loads(fh.read()) elif self.serialization_method == "json": data = json.loads(fh.read()) return data
def run_ensemble(model_class, parameters, param_set_id, seed_base, number_of_trajectories, storage_mode="Shared"): """ Generates an ensemble consisting of number_of_trajectories realizations by running the model 'nt' number of times. The resulting result objects are serialized and written to one of the MOLNs storage locations, each assigned a random filename. The default behavior is to write the files to the Shared storage location (global non-persistent). Optionally, files can be written to the Object Store (global persistent), storage_model="Persistent" Returns: a list of filenames for the serialized result objects. """ import sys import uuid from molnsutil import PersistentStorage, LocalStorage, SharedStorage if storage_mode == "Shared": storage = SharedStorage() elif storage_mode == "Persistent": storage = PersistentStorage() else: raise MolnsUtilException( "Unknown storage type '{0}'".format(storage_mode)) # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format( type(e), e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver filenames = [] processes = [] results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] for result in results: try: # We should try to thread this to hide latency in file upload... filename = str(uuid.uuid1()) storage.put(filename, result) filenames.append(filename) except: raise return {'filenames': filenames, 'param_set_id': param_set_id}
def create_model(model_class, parameters): try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: print "here *****************************************************" model = model_class_cls() return model except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format(type(e), e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes)
def run_ensemble_map_and_aggregate(model_class, parameters, param_set_id, seed_base, number_of_trajectories, mapper, aggregator=None): """ Generate an ensemble, then run the mappers are aggreator. This will not store the results. """ import sys import uuid if aggregator is None: aggregator = builtin_aggregator_list_append # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format( type(e), e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver res = None num_processed = 0 results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] #for i in range(number_of_trajectories): for result in results: try: mapres = mapper(result) res = aggregator(mapres, res) num_processed += 1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format( type(e), e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return { 'result': res, 'param_set_id': param_set_id, 'num_sucessful': num_processed, 'num_failed': number_of_trajectories - num_processed }
def run_ensemble(model_class, parameters, param_set_id, seed_base, number_of_trajectories, storage_mode="Shared"): """ Generates an ensemble consisting of number_of_trajectories realizations by running the model 'nt' number of times. The resulting result objects are serialized and written to one of the MOLNs storage locations, each assigned a random filename. The default behavior is to write the files to the Shared storage location (global non-persistent). Optionally, files can be written to the Object Store (global persistent), storage_model="Persistent" Returns: a list of filenames for the serialized result objects. """ import sys import uuid from molnsutil import PersistentStorage, LocalStorage, SharedStorage if storage_mode=="Shared": storage = SharedStorage() elif storage_mode=="Persistent": storage = PersistentStorage() else: raise MolnsUtilException("Unknown storage type '{0}'".format(storage_mode)) # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format(type(e),e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver filenames = [] processes=[] results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] for result in results: try: # We should try to thread this to hide latency in file upload... filename = str(uuid.uuid1()) storage.put(filename, result) filenames.append(filename) except: raise return {'filenames':filenames, 'param_set_id':param_set_id}
def run_ensemble_map_and_aggregate(model_class, parameters, param_set_id, seed_base, number_of_trajectories, mapper, aggregator=None): """ Generate an ensemble, then run the mappers are aggreator. This will not store the results. """ import sys import uuid if aggregator is None: aggregator = builtin_aggregator_list_append # Create the model try: model_class_cls = cloudpickle.loads(model_class) if parameters is not None: model = model_class_cls(**parameters) else: model = model_class_cls() except Exception as e: notes = "Error instantiation the model class, caught {0}: {1}\n".format(type(e),e) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) # Run the solver res = None num_processed = 0 results = model.run(seed=seed_base, number_of_trajectories=number_of_trajectories) if not isinstance(results, list): results = [results] #for i in range(number_of_trajectories): for result in results: try: mapres = mapper(result) res = aggregator(mapres, res) num_processed +=1 except Exception as e: notes = "Error running mapper and aggregator, caught {0}: {1}\n".format(type(e),e) notes += "type(mapper) = {0}\n".format(type(mapper)) notes += "type(aggregator) = {0}\n".format(type(aggregator)) notes += "dir={0}\n".format(dir()) raise MolnsUtilException(notes) return {'result':res, 'param_set_id':param_set_id, 'num_sucessful':num_processed, 'num_failed':number_of_trajectories-num_processed}
def get(self, name, validate=False): self.setup_provider() return cloudpickle.loads(self.provider.get(name, validate))