def run(self, **kwargs): logger.progress((4, "Starting tasks")) self.result = self.start_tasks(**kwargs) if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # Store results in hdf5 file for processing later output_h5_file = None if kwargs.get('output_path') is not None: output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5') if self.thread_result_listener is True: rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count())) rl.daemon = True rl.start() rl.join() # This blocks until the tasks are all done. else: self.listen_for_results(output_h5_file, self.total_particle_count( )) # This blocks until the tasks are all done. logger.info('Tasks are all finished... Cleaning up!!') self.cleanup() # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for fmt in formats: logger.info("Exporting to: %s" % fmt) try: # Calls the export function fmt.export(output_path, output_h5_file) except: logger.exception("Failed to export to: %s" % fmt) else: logger.warn( 'The output_formats parameter should be a list, not saving any output!' ) else: logger.warn('No output path defined, not saving any output!') else: logger.warn( 'No output_formats parameter was defined, not saving any output!' ) logger.progress((97, "Model Run Complete")) return
def start_tasks(self, **kwargs): try: logger.info('Adding %i particles as tasks' % self.total_particle_count()) tasks = [] for part in self.particles: forcer = BaseForcer(self.hydrodataset, particle=part, common_variables=self.common_variables, timevar=self.timevar, times=self.times, start_time=self.start, models=self._models, release_location_centroid=self.reference_location.point, usebathy=self._use_bathymetry, useshore=self._use_shoreline, usesurface=self._use_seasurface, reverse_distance=self.reverse_distance, bathy_path=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, time_method=self.time_method, shoreline_index_buffer=self.shoreline_index_buffer) tasks.append(forcer) logger.progress((5, 'Running model')) return self.pool.map_async(Runner(), tasks) except Exception: logger.exception("Something didn't start correctly!") raise
def fill_polygon_with_points(cls, goal=None, polygon=None): """ Fill a shapely polygon with X number of points """ if goal is None: raise ValueError("Must specify the number of points (goal) to fill the polygon with") if polygon is None or (not isinstance(polygon, Polygon) and not isinstance(polygon, MultiPolygon)): raise ValueError("Must specify a polygon to fill points with") minx = polygon.bounds[0] maxx = polygon.bounds[2] miny = polygon.bounds[1] maxy = polygon.bounds[3] points = [] now = time.time() while len(points) < goal: random_x = random.uniform(minx, maxx) random_y = random.uniform(miny, maxy) p = Point(random_x, random_y) if p.within(polygon): points.append(p) logger.info("Filling polygon with points took %f seconds" % (time.time() - now)) return points
def listen_for_logs(): pubsub = r.pubsub() pubsub.subscribe("%s:log" % run_id) for msg in pubsub.listen(): if msg['type'] != "message": continue if msg["data"] == "FINISHED": break try: prog = json.loads(msg["data"]) if prog is not None: if prog.get("level", "").lower() == "progress": job.meta["progress"] = float(prog.get("value", job.meta.get("progress", None))) job.meta["message"] = prog.get("message", job.meta.get("message", "")) job.meta["updated"] = prog.get("time", datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(pytz.utc)) job.save() logger.info("PROGRESS: %(value).2f - %(message)s" % prog) else: getattr(logger, prog["level"].lower())(prog.get("message")) except Exception: logger.info("Got strange result: %s" % msg["data"]) pass pubsub.close() sys.exit()
def listen_for_results(output_h5_file, total_particles): # Create output file (hdf5) particles_finished = 0 results = ResultsPyTable(output_h5_file) pubsub = r.pubsub() pubsub.subscribe("%s:results" % run_id) for msg in pubsub.listen(): if msg['type'] != "message": continue if msg["data"] == "FINISHED": break try: json_msg = json.loads(msg["data"]) if json_msg.get("status", None): # "COMPLETED" or "FAILED" when a particle finishes particles_finished += 1 percent_complete = 90. * (float(particles_finished) / float(total_particles)) + 5 # Add the 5 progress that was used prior to the particles starting (controller) r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : percent_complete, "message" : "Particle #%s %s!" % (particles_finished, json_msg.get("status"))})) if particles_finished == total_particles: break else: # Write to HDF file results.write(json_msg) except Exception: logger.info("Got strange result: %s" % msg["data"]) pass pubsub.close() results.compute() results.close() sys.exit()
def run(self): while True: try: next_task = self.task_queue.get(True, 10) except queue.Empty: logger.info("No tasks left to complete, closing %s" % self.name) break else: answer = (None, None) try: answer = (1, next_task(self.active)) except Exception: logger.exception("Disabling Error") if isinstance(next_task, CachingDataController): answer = (-2, "CachingDataController") # Tell the particles that the CachingDataController is releasing file self.get_data.value = False # The data controller has died, so don't process any more tasks self.active.value = False elif isinstance(next_task, BaseForcer): answer = (-1, next_task.particle) else: logger.warn("Strange task raised an exception: %s" % str(next_task.__class__)) answer = (None, None) finally: self.result_queue.put(answer) self.nproc_lock.acquire() self.n_run.value = self.n_run.value - 1 self.nproc_lock.release() self.task_queue.task_done()
def listen_for_results(self, output_h5_file, total_particles): logger.info("Waiting for %i particle results" % total_particles) particles = [] retrieved = 0 timeout = 200 while retrieved < total_particles: try: # self.result is an iterator that can timeout on next() particle = self.result.next(timeout) retrieved += 1 particles.append(particle) except StopIteration: assert retrieved >= total_particles break except: logger.exception("Particle has FAILED!!") continue # We multiply by 90 here to save 10% for the exporting logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved)) logger.info(particles) results = ex.ResultsPyTable(output_h5_file) for p in particles: for x in range(len(p.locations)): results.write(p.timestep_index_dump(x)) results.compute() results.close() return
def __call__(self, active): if active.value is True: while self.get_data.value is True: logger.info("Waiting for DataController to start...") timer.sleep(5) pass return super(CachingForcer, self).__call__(active)
def run(self, **kwargs): logger.progress((4, "Starting tasks")) self.result = self.start_tasks(**kwargs) if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # Store results in hdf5 file for processing later output_h5_file = None if kwargs.get('output_path') is not None: output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5') if self.thread_result_listener is True: rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count())) rl.daemon = True rl.start() rl.join() # This blocks until the tasks are all done. else: self.listen_for_results(output_h5_file, self.total_particle_count()) # This blocks until the tasks are all done. logger.info('Tasks are all finished... Cleaning up!!') self.cleanup() # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for fmt in formats: logger.info("Exporting to: %s" % fmt) try: # Calls the export function fmt.export(output_path, output_h5_file) except: logger.exception("Failed to export to: %s" % fmt) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output_formats parameter was defined, not saving any output!') logger.progress((97, "Model Run Complete")) return
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0, 0, 0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug( "Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn( "Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u, v, w)
def setup_run(self, **kwargs): super(CachingModelController, self).setup_run(**kwargs) # Should we remove the cache file at the end of the run? self.remove_cache = kwargs.get("remove_cache", False) self.cache_path = kwargs.get("cache_path", None) # Create a temp file for the cache if nothing was passed in if self.cache_path is None: default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) # Be sure the cache directory exists if not os.path.exists(os.path.dirname(self.cache_path)): logger.info("Creating cache directory: %s" % self.cache_path) os.makedirs(os.path.dirname(self.cache_path)) # Create the shared state objects # Particles use this to tell the Data Controller to "get_data". # The CachingDataController sets this to False when it is done writing to the cache file. # Particles will wait for this to be False before reading from the cache file. # If we are caching, this starts as True so the Particles don't take off. If we # are not caching, this is False so the Particles can start immediatly. self.get_data = self.mgr.Value('bool', True) # Particles use this to tell the DataContoller which indices to 'get_data' for self.point_get = self.mgr.Value('list', [0, 0, 0]) # This locks access to the 'has_data_request_lock' value self.data_request_lock = self.mgr.Lock() # This tracks which Particle PID is asking the CachingDataController for data self.has_data_request_lock = self.mgr.Value('int', -1) # The lock that controls access to modifying 'has_read_lock' and 'read_count' self.read_lock = self.mgr.Lock() # List of Particle PIDs that are reading from the cache self.has_read_lock = self.mgr.list() # The number of Particles that are reading from the cache self.read_count = self.mgr.Value('int', 0) # When something is writing to the cache file self.write_lock = self.mgr.Lock() # PID of process with lock self.has_write_lock = self.mgr.Value('int', -1)
def start_tasks(self): # @TODO: this is more initialization, but need to prevent derived classes from doing this if self.pool is None: self.pool = multiprocessing.Pool() try: logger.info('Adding %i particles as tasks' % len(self.particles)) tasks = [] for part in self.particles: forcer = BaseForcer(self.hydrodataset, particle=part, common_variables=self.common_variables, timevar=self.timevar, times=self.times, start_time=self.start, models=self._models, release_location_centroid=self.reference_location.point, usebathy=self._use_bathymetry, useshore=self._use_shoreline, usesurface=self._use_seasurface, reverse_distance=self.reverse_distance, bathy_path=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, time_method=self.time_method, redis_url=self.redis_url, redis_results_channel=self.redis_results_channel, shoreline_index_buffer=self.shoreline_index_buffer) tasks.append(forcer) # @TODO: better mechanism than switching on type if isinstance(self.pool, multiprocessing.pool.Pool): aiter = self.pool.imap(Runner(), tasks) else: # IPython parallel View aiter = self.pool.map_async(Runner(), tasks) return aiter except Exception: logger.exception("Something didn't start correctly!") raise
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0,0,0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug("Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn("Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u,v,w)
def listen_for_results(self): logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) particles = [] retrieved = 0 timeout = 200 while retrieved < len(self.particles): try: # @TODO: better mechanism than switching on type if isinstance(self.pool, multiprocessing.pool.Pool): # self.result is an iterator that can timeout on next() particle = self.result.next(timeout) retrieved += 1 particles.append(particle) else: # IPython parallel View # self.result is an AsyncMapResult from IPython.parallel import TimeoutError try: new_particles = self.result.get(timeout=1) except TimeoutError: pass # this is fine, get incremental progress below else: particles = new_particles # progress is absolute, not incremental retrieved = self.result.progress except StopIteration: assert retrieved >= len(self.particles) break except: logger.exception("Particle has FAILED!!") #logger.warn("Particle %s has FAILED!!" % particle.uid) continue # We multiply by 95 here to save 5% for the exporting logger.progress((round((float(retrieved) / self.number_of_tasks) * 90., 1), "%s Particle(s) complete" % retrieved)) return particles
def listen_for_results(self, output_h5_file, total_particles): logger.info("Waiting for %i particle results" % total_particles) particles = [] retrieved = 0 while retrieved < total_particles: try: # IPython parallel View # self.result is an AsyncMapResult from IPython.parallel import TimeoutError try: new_particles = self.result.get(timeout=1) except TimeoutError: pass # this is fine, get incremental progress below else: particles = new_particles # progress is absolute, not incremental retrieved = self.result.progress except StopIteration: assert retrieved >= total_particles break except: logger.exception("Particle has FAILED!!") continue # We multiply by 90 here to save 10% for the exporting logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved)) results = ex.ResultsPyTable(output_h5_file) for p in particles: for x in range(len(p.locations)): results.write(p.timestep_index_dump(x)) results.compute() results.close() return
def start_tasks(self): try: logger.info('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcer = BaseForcer(self.hydrodataset, particle=part, common_variables=self.common_variables, timevar=self.timevar, times=self.times, start_time=self.start, models=self._models, release_location_centroid=self.reference_location.point, usebathy=self._use_bathymetry, useshore=self._use_shoreline, usesurface=self._use_seasurface, reverse_distance=self.reverse_distance, bathy_path=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, time_method=self.time_method, redis_url=self.redis_url, redis_results_channel=self.redis_results_channel, shoreline_index_buffer=self.shoreline_index_buffer ) self.tasks.put(forcer) # Create workers for the particles. self.procs = [ Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name="BaseForcer-%d" % i) for i in xrange(self.nproc - 1) ] for w in self.procs: w.start() logger.info('Started %s' % w.name) return True except Exception: logger.exception("Something didn't start correctly!") return False
def run(self): while True: try: next_task = self.task_queue.get(True, 10) except Queue.Empty: logger.info("No tasks left to complete, closing %s" % self.name) break else: answer = (None, None) try: answer = (1, next_task(self.name, self.active)) except Exception as detail: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Disabling Error: " +\ repr(traceback.format_exception(exc_type, exc_value, exc_traceback))) if isinstance(next_task, DataController): answer = (-2, "DataController") # Tell the particles that the DataController is releasing file self.get_data.value = False # The data controller has died, so don't process any more tasks self.active.value = False elif isinstance(next_task, ForceParticle): answer = (-1, next_task.part) else: logger.warn("Strange task raised an exception: %s" % str(next_task.__class__)) answer = (None, None) finally: self.result_queue.put(answer) self.nproc_lock.acquire() self.n_run.value = self.n_run.value - 1 self.nproc_lock.release() self.task_queue.task_done()
def start_tasks(self): try: logger.info('Starting CachingDataController') # Add data controller to the queue first so that it # can get the initial data and is not blocked data_controller = CachingDataController(self.hydrodataset, self.common_variables, self.n_run, self.get_data, self.write_lock, self.has_write_lock, self.read_lock, self.read_count, self.time_chunk, self.horiz_chunk, self.times, self.start, self.point_get, self.reference_location, cache_path=self.cache_path) self.tasks.put(data_controller) # Create CachingDataController worker self.data_controller_process = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingDataController") self.data_controller_process.start() logger.info('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcer = CachingForcer(self.cache_path, particle=part, common_variables=self.common_variables, timevar=self.timevar, times=self.times, start_time=self.start, models=self._models, release_location_centroid=self.reference_location.point, usebathy=self._use_bathymetry, useshore=self._use_shoreline, usesurface=self._use_seasurface, reverse_distance=self.reverse_distance, bathy_path=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, time_method=self.time_method, redis_url=self.redis_url, redis_results_channel=self.redis_results_channel, shoreline_index_buffer=self.shoreline_index_buffer, get_data=self.get_data, read_lock=self.read_lock, has_read_lock=self.has_read_lock, read_count=self.read_count, point_get=self.point_get, data_request_lock=self.data_request_lock, has_data_request_lock=self.has_data_request_lock ) self.tasks.put(forcer) # Create workers for the particles. self.procs = [Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingForcer-%d" % i) for i in xrange(self.nproc - 1) ] for w in self.procs: w.start() logger.info('Started %s' % w.name) return True except Exception: logger.exception("Something didn't start correctly!") return False
def run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset self.setup_run(**kwargs) logger.progress((4, "Starting tasks")) self.result = self.start_tasks() if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # This blocks until the tasks are all done. self.particles = self.listen_for_results() logger.info('Consumers are all finished!') logger.info('Cleaning up') self.cleanup() if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.exception("Failed to export to: %s" % format) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if self.error_code == -2: raise BaseDataControllerError("Error in the BaseDataController") else: raise ModelError("Error in the model") logger.progress((97, "Model Run Complete")) return self.particles
def listen_for_results(self): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < self.number_of_tasks: try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive() and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn("CachingDataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: self.error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.particles = return_particles finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate()
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline( path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect( start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception( "Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception( "Time indexes are messed up. Need to have equal datetime and time indexes" ) raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data( i, newtimes[loop_i]) else: logger.warn( "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported." ) tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth( self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i + 1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction( particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish( self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info( textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def setup_run(self, **kwargs): logger.setLevel(logging.PROGRESS) self.redis_url = None self.redis_log_channel = None self.redis_results_channel = None if "redis" in kwargs.get("output_formats", []): from paegan.logger.redis_handler import RedisHandler self.redis_url = kwargs.get("redis_url") self.redis_log_channel = kwargs.get("redis_log_channel") self.redis_results_channel = kwargs.get("redis_results_channel") rhandler = RedisHandler(self.redis_log_channel, self.redis_url) rhandler.setLevel(logging.PROGRESS) logger.addHandler(rhandler) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(unicode(self)) # Add the model descriptions to logfile for m in self._models: logger.info(unicode(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = range(0, (self._step*self._nstep)+1, self._step) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) if kwargs.get("manager", True): # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers self.mgr = multiprocessing.Manager() # This tracks if the system is 'alive'. Most looping whiles will check this # and break out if it is False. This is True until something goes very wrong. self.active = self.mgr.Value('bool', True) # Each particle is a task, plus the CachingDataController self.number_of_tasks = self.get_number_of_tasks() # Either spin up the number of cores, or the number of tasks self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks) # Number of tasks that we need to run. This is decremented everytime something exits. self.n_run = self.mgr.Value('int', self.number_of_tasks) # The lock that controls access to the 'n_run' variable self.nproc_lock = self.mgr.Lock() # Create the task queue for all of the particles and the CachingDataController self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks) # Create the result queue for all of the particles and the CachingDataController self.results = self.mgr.Queue(self.number_of_tasks) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(self.hydrodataset) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) except AssertionError: logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables)) raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end)) ds.closenc()
def setup_run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset logger.setLevel(logging.PROGRESS) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(str(self)) # Add the model descriptions to logfile for m in self._models: logger.info(str(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = list(range(0, (self._step*self._nstep)+1, self._step)) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in range(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset)) try: ds = CommonDataset.open(self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] except AssertionError: logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables)) raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset) finally: ds.closenc() try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
def listen_for_results(self): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < self.number_of_tasks: try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except Queue.Empty: new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "Zombie")) # Decrement nproc (Consumer exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name=p.name) new_procs.append(np) old_procs.append(p) for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -3: self.error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) else: logger.info("Got a strange result on results queue: %s" % str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.particles = return_particles finally: # Join all processes logger.info("Joining the processes") for w in self.procs: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate()
def setup_run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset logger.setLevel(logging.PROGRESS) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(str(self)) # Add the model descriptions to logfile for m in self._models: logger.info(str(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = list(range(0, (self._step * self._nstep) + 1, self._step)) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance( self.geometry, MultiPolygon): point_locations = [ Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points( goal=self._npart, polygon=self.geometry) ] # Initialize the particles logger.progress((2, "Initializing particles")) for x in range(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset)) try: ds = CommonDataset.open(self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] except AssertionError: logger.exception( "Could not locate variables needed to run model: %s" % str(self.common_variables)) raise BaseDataControllerError( "A required data variable was not found in %s" % self.hydrodataset) finally: ds.closenc() try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError( "Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError( "End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
def setup_run(self, **kwargs): super(CachingModelController, self).setup_run(**kwargs) # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers self.mgr = multiprocessing.Manager() # This tracks if the system is 'alive'. Most looping whiles will check this # and break out if it is False. This is True until something goes very wrong. self.active = self.mgr.Value('bool', True) # Either spin up the number of cores, or the number of tasks self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks) # Number of tasks that we need to run. This is decremented everytime something exits. self.n_run = self.mgr.Value('int', self.number_of_tasks) # The lock that controls access to the 'n_run' variable self.nproc_lock = self.mgr.Lock() # Create the task queue for all of the particles and the CachingDataController self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks) # Create the result queue for all of the particles and the CachingDataController self.results = self.mgr.Queue(self.number_of_tasks) # Should we remove the cache file at the end of the run? self.remove_cache = kwargs.get("remove_cache", False) self.cache_path = kwargs.get("cache_path", None) # Create a temp file for the cache if nothing was passed in if self.cache_path is None: default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) # Be sure the cache directory exists if not os.path.exists(os.path.dirname(self.cache_path)): logger.info("Creating cache directory: %s" % self.cache_path) os.makedirs(os.path.dirname(self.cache_path)) # Create the shared state objects # Particles use this to tell the Data Controller to "get_data". # The CachingDataController sets this to False when it is done writing to the cache file. # Particles will wait for this to be False before reading from the cache file. # If we are caching, this starts as True so the Particles don't take off. If we # are not caching, this is False so the Particles can start immediatly. self.get_data = self.mgr.Value('bool', True) # Particles use this to tell the DataContoller which indices to 'get_data' for self.point_get = self.mgr.Value('list', [0, 0, 0]) # This locks access to the 'has_data_request_lock' value self.data_request_lock = self.mgr.Lock() # This tracks which Particle PID is asking the CachingDataController for data self.has_data_request_lock = self.mgr.Value('int', -1) # The lock that controls access to modifying 'has_read_lock' and 'read_count' self.read_lock = self.mgr.Lock() # List of Particle PIDs that are reading from the cache self.has_read_lock = self.mgr.list() # The number of Particles that are reading from the cache self.read_count = self.mgr.Value('int', 0) # When something is writing to the cache file self.write_lock = self.mgr.Lock() # PID of process with lock self.has_write_lock = self.mgr.Value('int', -1)
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0, (self._step * self._nstep) + 1, self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps( times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str( datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance( self.geometry, MultiPolygon): point_locations = [ Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points( goal=self._npart, polygon=self.geometry) ] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError( "Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int', -1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int', -1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle( part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d" % i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive( ) and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn( "DataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90., 1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn( 'The output_formats parameter should be a list, not saving any output!' ) else: logger.warn( 'No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0,(self._step*self._nstep)+1,self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError("Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int',-1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int',-1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle(part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive() and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn("DataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def export(cls, folder, particles, datetimes): shape_schema = {'geometry': 'Point', 'properties': OrderedDict([('particle', 'int'), ('date', 'str'), ('latitude', 'float'), ('longitude', 'float'), ('depth', 'float'), ('u_vector', 'float'), ('v_vector', 'float'), ('w_vector', 'float'), ('temp', 'float'), ('salinity', 'float'), ('age', 'float'), ('settled', 'str'), ('dead', 'str'), ('halted', 'str'), ('notes', 'str')])} shape_crs = {'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat'} if not os.path.exists(folder): os.makedirs(folder) filepath = os.path.join(folder, "gdalshape.shp") with collection(filepath, "w", driver='ESRI Shapefile', schema=shape_schema, crs=shape_crs) as shape: for particle in particles: normalized_locations = particle.normalized_locations(datetimes) normalized_temps = particle.temps normalized_salts = particle.salts normalized_u = particle.u_vectors normalized_v = particle.v_vectors normalized_w = particle.w_vectors normalized_settled = particle.settles normalized_dead = particle.deads normalized_halted = particle.halts normalized_ages = particle.ages normalized_notes = particle.notes if len(normalized_locations) != len(normalized_temps): logger.info("No temperature being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_temps = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_temps = (-9999.9 if not x else x for x in normalized_temps) if len(normalized_locations) != len(normalized_salts): logger.info("No salinity being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_salts = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_salts = (-9999.9 if not x else x for x in normalized_salts) if len(normalized_locations) != len(normalized_u): logger.info("No U being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_u = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_u = (-9999.9 if not x else x for x in normalized_u) if len(normalized_locations) != len(normalized_v): logger.info("No V being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_v = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_v = (-9999.9 if not x else x for x in normalized_v) if len(normalized_locations) != len(normalized_w): logger.info("No W being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_w = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_w = (-9999.9 if not x else x for x in normalized_w) if len(normalized_locations) != len(normalized_settled): logger.info("No Settled being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_settled = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_dead): logger.info("No Dead being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_dead = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_halted): logger.info("No Halted being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_halted = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_ages): logger.info("No W being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_ages = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_ages = (-9999.9 if not x else round(x, 3) for x in normalized_ages) if len(normalized_locations) != len(normalized_notes): logger.info("No Notes being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_notes = [None] * len(normalized_locations) for loc, temp, salt, u, v, w, settled, dead, halted, age, note in zip(normalized_locations, normalized_temps, normalized_salts, normalized_u, normalized_v, normalized_w, normalized_settled, normalized_dead, normalized_halted, normalized_ages, normalized_notes): shape.write({ 'geometry': mapping(loc.point), 'properties': OrderedDict([('particle', particle.uid), ('date', unicode(loc.time.isoformat())), ('latitude', float(loc.latitude)), ('longitude', float(loc.longitude)), ('depth', float(loc.depth)), ('temp', float(temp)), ('salinity', float(salt)), ('u_vector', float(u)), ('v_vector', float(v)), ('w_vector', float(w)), ('settled', unicode(settled)), ('dead', unicode(dead)), ('halted', unicode(halted)), ('age', float(age)), ('notes' , unicode(note))])}) # Zip the output shpzip = zipfile.ZipFile(os.path.join(folder, "shapefile.shp.zip"), mode='w') for f in glob.glob(os.path.join(folder, "gdalshape*")): shpzip.write(f, os.path.basename(f)) os.remove(f) shpzip.close()
def __call__(self, proc, active): self.active = active if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy) except Exception: logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy) self.usebathy = False self._shoreline = None if self.useshore is True: self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=0.25) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) if self.active.value is True: while self.get_data.value is True: logger.info("Waiting for DataController to start...") timer.sleep(5) pass # Initialize commondataset of local cache, then # close the related netcdf file try: if self.caching is True: with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset = CommonDataset.open(self.hydrodataset) self.dataset.closenc() except StandardError: logger.warn("No source dataset: %s. Particle exiting" % self.hydrodataset) raise finally: if self.caching is True: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.error("Time indexes are messed up. Need to have equal datetime and time indexes") raise # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i]) elif self.time_method == 'interp': u, v, w, temp, salt = self.data_interp(i, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.") # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth(self.part.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.part.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: movement = model.move(self.part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: self.boundary_interaction(particle=self.part, starting=self.part.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) logger.debug("%s - was forced by %s and is now at %s" % (self.part.logstring(), model.__class__.__name__, self.part.location.logstring())) self.part.note = self.part.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.part.save() self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.part.fill_environment_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() return self.part
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes") raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.") tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth(self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info(textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def setup_run(self, hydrodataset, **kwargs): super(CachingModelController, self).setup_run(hydrodataset, **kwargs) # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers self.mgr = multiprocessing.Manager() # This tracks if the system is 'alive'. Most looping whiles will check this # and break out if it is False. This is True until something goes very wrong. self.active = self.mgr.Value('bool', True) # Either spin up the number of cores, or the number of tasks self.nproc = min(multiprocessing.cpu_count() - 1, self.total_task_count()) # Number of tasks that we need to run. This is decremented everytime something exits. self.n_run = self.mgr.Value('int', self.total_task_count()) # The lock that controls access to the 'n_run' variable self.nproc_lock = self.mgr.Lock() # Create the task queue for all of the particles and the CachingDataController self.tasks = multiprocessing.JoinableQueue(self.total_task_count()) # Create the result queue for all of the particles and the CachingDataController self.results = self.mgr.Queue(self.total_task_count()) # Should we remove the cache file at the end of the run? self.remove_cache = kwargs.get("remove_cache", False) self.cache_path = kwargs.get("cache_path", None) # Create a temp file for the cache if nothing was passed in if self.cache_path is None: default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str( datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) # Be sure the cache directory exists if not os.path.exists(os.path.dirname(self.cache_path)): logger.info("Creating cache directory: %s" % self.cache_path) os.makedirs(os.path.dirname(self.cache_path)) # Create the shared state objects # Particles use this to tell the Data Controller to "get_data". # The CachingDataController sets this to False when it is done writing to the cache file. # Particles will wait for this to be False before reading from the cache file. # If we are caching, this starts as True so the Particles don't take off. If we # are not caching, this is False so the Particles can start immediatly. self.get_data = self.mgr.Value('bool', True) # Particles use this to tell the DataContoller which indices to 'get_data' for self.point_get = self.mgr.Value('list', [0, 0, 0]) # This locks access to the 'has_data_request_lock' value self.data_request_lock = self.mgr.Lock() # This tracks which Particle PID is asking the CachingDataController for data self.has_data_request_lock = self.mgr.Value('int', -1) # The lock that controls access to modifying 'has_read_lock' and 'read_count' self.read_lock = self.mgr.Lock() # List of Particle PIDs that are reading from the cache self.has_read_lock = self.mgr.list() # The number of Particles that are reading from the cache self.read_count = self.mgr.Value('int', 0) # When something is writing to the cache file self.write_lock = self.mgr.Lock() # PID of process with lock self.has_write_lock = self.mgr.Value('int', -1)
def listen_for_results(self, output_h5_file, total_particles): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % total_particles) while retrieved < self.total_task_count( ): # One for the CachingDataController logger.info("looping in listen_for_results") try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive( ) and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn( "Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn( "CachingDataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: self.error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.total_task_count())) # Relax time.sleep(1) if len(return_particles) != total_particles: logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.tasks.close() self.tasks.join_thread() finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() if self.error_code == -2: raise ValueError( "Error in the BaseDataController (error_code was -2)") results = ex.ResultsPyTable(output_h5_file) for p in return_particles: for x in range(len(p.locations)): results.write(p.timestep_index_dump(x)) results.compute() results.close() return
def start_tasks(self, **kwargs): try: logger.info('Starting CachingDataController') # Add data controller to the queue first so that it # can get the initial data and is not blocked data_controller = CachingDataController(self.hydrodataset, self.common_variables, self.n_run, self.get_data, self.write_lock, self.has_write_lock, self.read_lock, self.read_count, self.time_chunk, self.horiz_chunk, self.times, self.start, self.point_get, self.reference_location, cache_path=self.cache_path) self.tasks.put(data_controller) # Create CachingDataController worker self.data_controller_process = Consumer( self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingDataController") self.data_controller_process.start() logger.info('Adding %i particles as tasks' % self.total_particle_count()) for part in self.particles: forcer = CachingForcer( self.cache_path, particle=part, common_variables=self.common_variables, timevar=self.timevar, times=self.times, start_time=self.start, models=self._models, release_location_centroid=self.reference_location.point, usebathy=self._use_bathymetry, useshore=self._use_shoreline, usesurface=self._use_seasurface, reverse_distance=self.reverse_distance, bathy_path=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, time_method=self.time_method, shoreline_index_buffer=self.shoreline_index_buffer, get_data=self.get_data, read_lock=self.read_lock, has_read_lock=self.has_read_lock, read_count=self.read_count, point_get=self.point_get, data_request_lock=self.data_request_lock, has_data_request_lock=self.has_data_request_lock) self.tasks.put(forcer) # Create workers for the particles. self.procs = [ Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingForcer-%d" % i) for i in range(self.nproc - 1) ] logger.progress((5, 'Running model')) for w in self.procs: w.start() logger.info('Started %s' % w.name) return True except Exception: logger.exception("Something didn't start correctly!") return False
def export(cls, folder, particles, datetimes, summary, **kwargs): """ Export particle data to CF trajectory convention netcdf file """ time_units = 'seconds since 1990-01-01 00:00:00' # Create netcdf file, overwrite existing if not os.path.exists(folder): os.makedirs(folder) filepath = os.path.join(folder, 'trajectories.nc') nc = netCDF4.Dataset(filepath, 'w') # Create netcdf dimensions nc.createDimension('time', None) nc.createDimension('particle', None) fillvalue = -9999.9 # Create netcdf variables time = nc.createVariable('time', 'i', ('time',)) part = nc.createVariable('particle', 'i', ('particle',)) depth = nc.createVariable('depth', 'f', ('time', 'particle')) lat = nc.createVariable('lat', 'f', ('time', 'particle'), fill_value=fillvalue) lon = nc.createVariable('lon', 'f', ('time', 'particle'), fill_value=fillvalue) salt = nc.createVariable('salt', 'f', ('time', 'particle'), fill_value=fillvalue) temp = nc.createVariable('temp', 'f', ('time', 'particle'), fill_value=fillvalue) u = nc.createVariable('u', 'f', ('time', 'particle'), fill_value=fillvalue) v = nc.createVariable('v', 'f', ('time', 'particle'), fill_value=fillvalue) w = nc.createVariable('w', 'f', ('time', 'particle'), fill_value=fillvalue) settled = nc.createVariable('settled', 'f', ('time', 'particle'), fill_value=fillvalue) dead = nc.createVariable('dead', 'f', ('time', 'particle'), fill_value=fillvalue) halted = nc.createVariable('halted', 'f', ('time', 'particle'), fill_value=fillvalue) # Loop through locations in each particle, # add to netcdf file for j, particle in enumerate(particles): part[j] = particle.uid i = 0 normalized_locations = particle.normalized_locations(datetimes) normalized_temps = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.temps] normalized_salts = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.salts] normalized_u = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.u_vectors] normalized_v = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.v_vectors] normalized_w = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.w_vectors] normalized_settled = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.settles] normalized_dead = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.deads] normalized_halted = [x if x is not None and not math.isnan(x) else fillvalue for x in particle.halts] if len(normalized_locations) != len(normalized_temps): logger.info("No temperature being added to netcdf.") # Create list of 'fillvalue' equal to the length of locations normalized_temps = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_salts): logger.info("No salinity being added to netcdf.") # Create list of 'fillvalue' equal to the length of locations normalized_salts = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_u): logger.info("No U being added to netcdf.") # Create list of 'fillvalue' equal to the length of locations normalized_u = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_v): logger.info("No V being added to netcdf.") # Create list of 'fillvalue' equal to the length of locations normalized_v = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_w): logger.info("No W being added to netcdf.") # Create list of 'fillvalue' equal to the length of locations normalized_w = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_settled): logger.info("No Settled being added to shapefile.") # Create list of 'fillvalue' equal to the length of locations normalized_settled = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_dead): logger.info("No Dead being added to shapefile.") # Create list of 'fillvalue' equal to the length of locations normalized_dead = [fillvalue] * len(normalized_locations) if len(normalized_locations) != len(normalized_halted): logger.info("No Halted being added to shapefile.") # Create list of 'fillvalue' equal to the length of locations normalized_halted = [fillvalue] * len(normalized_locations) for loc, _temp, _salt, _u, _v, _w, _settled, _dead, _halted in zip(normalized_locations, normalized_temps, normalized_salts, normalized_u, normalized_v, normalized_w, normalized_settled, normalized_dead, normalized_halted): if j == 0: time[i] = int(round(netCDF4.date2num(loc.time, time_units))) depth[i, j] = loc.depth lat[i, j] = loc.latitude lon[i, j] = loc.longitude salt[i, j] = _salt temp[i, j] = _temp u[i, j] = _u v[i, j] = _v w[i, j] = _w settled[i, j] = _settled dead[i, j] = _dead halted[i, j] = _halted i += 1 # Variable attributes depth.coordinates = "time particle lat lon" depth.standard_name = "depth_below_sea_surface" depth.units = "m" depth.POSITIVE = "up" depth.positive = "up" salt.coordinates = "time particle lat lon" salt.standard_name = "sea_water_salinity" salt.units = "psu" temp.coordinates = "time particle lat lon" temp.standard_name = "sea_water_temperature" temp.units = "degrees_C" u.coordinates = "time particle lat lon" u.standard_name = "eastward_sea_water_velocity" u.units = "m/s" v.coordinates = "time particle lat lon" v.standard_name = "northward_sea_water_velocity" v.units = "m/s" w.coordinates = "time particle lat lon" w.standard_name = "upward_sea_water_velocity" w.units = "m/s" settled.coordinates = "time particle lat lon" settled.description = "Is the particle settled" settled.standard_name = "particle_settled" dead.coordinates = "time particle lat lon" dead.description = "Is the particle dead" dead.standard_name = "particle_dead" halted.coordinates = "time particle lat lon" halted.description = "Is the particle prevented from being forced by currents" halted.standard_name = "particle_halted" time.units = time_units time.standard_name = "time" lat.units = "degrees_north" lon.units = "degrees_east" part.cf_role = "trajectory_id" # Global attributes nc.featureType = "trajectory" nc.summary = str(summary) for key in kwargs: nc.__setattr__(key, kwargs.get(key)) nc.sync() nc.close()
def export(cls, folder, particles, datetimes): shape_schema = { 'geometry': 'Point', 'properties': OrderedDict([('Particle', 'int'), ('Date', 'str'), ('Lat', 'float'), ('Lon', 'float'), ('Depth', 'float'), ('Temp', 'float'), ('Salt', 'float'), ('U', 'float'), ('V', 'float'), ('W', 'float'), ('Settled', 'str'), ('Dead', 'str'), ('Halted', 'str'), ('Age', 'float'), ('Notes', 'str')]) } shape_crs = { 'no_defs': True, 'ellps': 'WGS84', 'datum': 'WGS84', 'proj': 'longlat' } filepath = os.path.join(folder, "gdalshape.shp") with collection(filepath, "w", driver='ESRI Shapefile', schema=shape_schema, crs=shape_crs) as shape: for particle in particles: normalized_locations = particle.normalized_locations(datetimes) normalized_temps = particle.temps normalized_salts = particle.salts normalized_u = particle.u_vectors normalized_v = particle.v_vectors normalized_w = particle.w_vectors normalized_settled = particle.settles normalized_dead = particle.deads normalized_halted = particle.halts normalized_ages = particle.ages normalized_notes = particle.notes if len(normalized_locations) != len(normalized_temps): logger.info("No temperature being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_temps = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_temps = (-9999.9 if not x else x for x in normalized_temps) if len(normalized_locations) != len(normalized_salts): logger.info("No salinity being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_salts = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_salts = (-9999.9 if not x else x for x in normalized_salts) if len(normalized_locations) != len(normalized_u): logger.info("No U being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_u = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_u = (-9999.9 if not x else x for x in normalized_u) if len(normalized_locations) != len(normalized_v): logger.info("No V being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_v = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_v = (-9999.9 if not x else x for x in normalized_v) if len(normalized_locations) != len(normalized_w): logger.info("No W being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_w = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_w = (-9999.9 if not x else x for x in normalized_w) if len(normalized_locations) != len(normalized_settled): logger.info("No Settled being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_settled = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_dead): logger.info("No Dead being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_dead = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_halted): logger.info("No Halted being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_halted = [None] * len(normalized_locations) if len(normalized_locations) != len(normalized_ages): logger.info("No W being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_ages = [-9999.9] * len(normalized_locations) else: # Replace any None with fill value normalized_ages = (-9999.9 if not x else round(x, 3) for x in normalized_ages) if len(normalized_locations) != len(normalized_notes): logger.info("No Notes being added to shapefile.") # Create list of 'None' equal to the length of locations normalized_notes = [None] * len(normalized_locations) for loc, temp, salt, u, v, w, settled, dead, halted, age, note in zip( normalized_locations, normalized_temps, normalized_salts, normalized_u, normalized_v, normalized_w, normalized_settled, normalized_dead, normalized_halted, normalized_ages, normalized_notes): shape.write({ 'geometry': mapping(loc.point), 'properties': OrderedDict([('Particle', particle.uid), ('Date', unicode(loc.time.isoformat())), ('Lat', float(loc.latitude)), ('Lon', float(loc.longitude)), ('Depth', float(loc.depth)), ('Temp', float(temp)), ('Salt', float(salt)), ('U', float(u)), ('V', float(v)), ('W', float(w)), ('Settled', unicode(settled)), ('Dead', unicode(dead)), ('Halted', unicode(halted)), ('Age', float(age)), ('Notes', unicode(note))]) }) # Zip the output shpzip = zipfile.ZipFile(os.path.join(folder, "shapefile.shp.zip"), mode='w') for f in glob.glob(os.path.join(folder, "gdalshape*")): shpzip.write(f, os.path.basename(f)) os.remove(f) shpzip.close()