def __reverse(self, **kwargs): """ Reverse particle just off of the shore in the direction that it came in. Adds a slight random factor to the distance and angle it is reversed in. """ #st = time.clock() start_point = kwargs.pop('start_point') hit_point = kwargs.pop('hit_point') reverse_azimuth = kwargs.pop('reverse_azimuth') reverse_distance = kwargs.get('reverse_distance', None) if reverse_distance is None: reverse_distance = 100 # Randomize the reverse angle slightly (+/- 5 degrees) random_azimuth = reverse_azimuth + AsaRandom.random() * 5 count = 0 nudge_distance = 0.01 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) # Find point just offshore to do testing with. Try 15 times (~350m). This makes sure the start_point is in the water # for the next call to intersect (next while loop). while self.intersect(single_point=nudge_loc.point) and count < 16: nudge_distance *= 2 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) count += 1 # We tried 16 times and couldn't find a point. This should totally never happen. if count == 16: logger.warn("LOOK: Could not find location in water to do shoreline calculation with. Assuming particle did not move from original location") return start_point # Keep trying to throw particle back, halfing the distance each time until it is in water. # Only half it 6 times before giving up and returning the point which the particle came from. count = 0 # Distance amount to half each iteration changing_distance = reverse_distance new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) # We don't want to reverse further than the current spatial buffer, because we will reindex the # source file everytime we reverse, which will slow down the calculations considerably. while (not self._spatial_query_object.contains(new_loc.point) or self.intersect(start_point=nudge_loc.point, end_point=new_loc.point)) and count < 6: changing_distance /= 2 new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) count += 1 # We tried 10 times and the particle was still on shore, return the point the particle started from. # No randomization. if count == 6: logger.warn("LOOK: Could not react particle with shoreline. Assuming particle did not move from original location") return start_point #logger.info("Reaction time: %f" % (time.clock() - st)) return new_loc
def index(self, **kwargs): """ This queries the shapefile around a buffer of a point The results of this spatial query are used for shoreline detection. Using the entire shapefile without the spatial query takes over 30 times the time with world land polygons. """ point = kwargs.pop("point", None) spatialbuffer = kwargs.pop("spatialbuffer", self._spatialbuffer) self._layer.SetSpatialFilter(None) self._spatial_query_object = None if point: self._spatial_query_object = point.buffer(spatialbuffer) poly = ogr.CreateGeometryFromWkt(self._spatial_query_object.wkt) self._layer.SetSpatialFilter(poly) poly.Destroy() self._geoms = [] # The _geoms should be only Polygons, not MultiPolygons for element in self._layer: try: geom = wkb.loads(element.GetGeometryRef().ExportToWkb()) if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn("Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def index(self, point=None, spatialbuffer=None): """ This queries the shapefile around a buffer of a point The results of this spatial query are used for shoreline detection. Using the entire shapefile without the spatial query takes over 30 times the time with world land polygons. """ spatialbuffer = spatialbuffer or self._spatialbuffer self._spatial_query_object = None geoms = [] if point: self._spatial_query_object = point.buffer(spatialbuffer) geoms = self.get_geoms_for_bounds(self._spatial_query_object.envelope.bounds) self._geoms = [] # The _geoms should be only Polygons, not MultiPolygons for geom in geoms: try: if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn("Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def index(self, point=None, spatialbuffer=None): """ This queries the shapefile around a buffer of a point The results of this spatial query are used for shoreline detection. Using the entire shapefile without the spatial query takes over 30 times the time with world land polygons. """ spatialbuffer = spatialbuffer or self._spatialbuffer self._spatial_query_object = None geoms = [] if point: self._spatial_query_object = point.buffer(spatialbuffer) geoms = self.get_geoms_for_bounds( self._spatial_query_object.envelope.bounds) self._geoms = [] # The _geoms should be only Polygons, not MultiPolygons for geom in geoms: try: if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn( "Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def run(self): while True: try: next_task = self.task_queue.get(True, 10) except queue.Empty: logger.info("No tasks left to complete, closing %s" % self.name) break else: answer = (None, None) try: answer = (1, next_task(self.active)) except Exception: logger.exception("Disabling Error") if isinstance(next_task, CachingDataController): answer = (-2, "CachingDataController") # Tell the particles that the CachingDataController is releasing file self.get_data.value = False # The data controller has died, so don't process any more tasks self.active.value = False elif isinstance(next_task, BaseForcer): answer = (-1, next_task.particle) else: logger.warn("Strange task raised an exception: %s" % str(next_task.__class__)) answer = (None, None) finally: self.result_queue.put(answer) self.nproc_lock.acquire() self.n_run.value = self.n_run.value - 1 self.nproc_lock.release() self.task_queue.task_done()
def load_initial_dataset(self): """ Initialize self.dataset, then close it A cacher will have to wrap this in locks, while a straight runner will not. """ try: self.dataset = CommonDataset.open(self.hydrodataset) if self.timevar is None: self.timevar = self.dataset.gettimevar(self.common_variables.get("u")) except Exception: logger.warn("No source dataset: %s. Particle exiting" % self.hydrodataset) raise
def load_initial_dataset(self): """ Initialize self.dataset, then close it A cacher will have to wrap this in locks, while a straight runner will not. """ try: self.dataset = CommonDataset.open(self.hydrodataset) if self.timevar is None: self.timevar = self.dataset.gettimevar( self.common_variables.get("u")) except Exception: logger.warn("No source dataset: %s. Particle exiting" % self.hydrodataset) raise
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0, 0, 0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug( "Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn( "Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u, v, w)
def react(self, **kwargs): """ Bounce off of a shoreline feature = Linestring of two points, being the line segment the particle hit. angle = decimal degrees from 0 (x-axis), couter-clockwise (math style) """ if self._type == "bounce": logger.warn("This shoreline type is NOT SUPPORTED and is broken") return self.__bounce(**kwargs) elif self._type == "reverse": return self.__reverse(**kwargs) else: logger.warn("Not reacting to shoreline (sticky with inifinite concentration)") return kwargs.get('hit_point')
def react(self, **kwargs): """ Bounce off of a shoreline feature = Linestring of two points, being the line segment the particle hit. angle = decimal degrees from 0 (x-axis), couter-clockwise (math style) """ if self._type == "bounce": logger.warn("This shoreline type is NOT SUPPORTED and is broken") return self.__bounce(**kwargs) elif self._type == "reverse": return self.__reverse(**kwargs) else: logger.warn( "Not reacting to shoreline (sticky with inifinite concentration)" ) return kwargs.get('hit_point')
def load_initial_dataset(self): """ Initialize self.dataset, then close it A cacher will have to wrap this in locks, while a straight runner will not. """ try: with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset = CommonDataset.open(self.hydrodataset) self.dataset.closenc() except Exception: logger.warn("No source dataset: %s. Particle exiting" % self.hydrodataset) raise finally: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid())
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0,0,0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug("Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn("Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u,v,w)
def run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset self.setup_run(**kwargs) logger.progress((4, "Starting tasks")) self.result = self.start_tasks() if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # This blocks until the tasks are all done. self.particles = self.listen_for_results() logger.info('Consumers are all finished!') logger.info('Cleaning up') self.cleanup() if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.exception("Failed to export to: %s" % format) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if self.error_code == -2: raise BaseDataControllerError("Error in the BaseDataController") else: raise ModelError("Error in the model") logger.progress((97, "Model Run Complete")) return self.particles
def __reverse(self, **kwargs): """ Reverse particle just off of the shore in the direction that it came in. Adds a slight random factor to the distance and angle it is reversed in. """ start_point = kwargs.pop('start_point') hit_point = kwargs.pop('hit_point') distance = kwargs.pop('distance') azimuth = kwargs.pop('azimuth') reverse_azimuth = kwargs.pop('reverse_azimuth') reverse_distance = kwargs.get('reverse_distance', None) if reverse_distance is None: reverse_distance = 100 # Randomize the reverse angle slightly (+/- 5 degrees) random_azimuth = reverse_azimuth + AsaRandom.random() * 5 # Nudge the hitpoint off of the shore by a tiny bit to test shoreline intersection in while loop. nudged_hit_point = AsaGreatCircle.great_circle(distance=0.01, azimuth=random_azimuth, start_point=hit_point) nudged_hit_location = Location4D(latitude=nudged_hit_point['latitude'], longitude=nudged_hit_point['longitude'], depth=start_point.depth) new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) # Keep trying to throw particle back, halfing the distance each time until it is in water. # Only half it 10 times before giving up and returning the point which the particle came from. count = 0 # Distance amount to half each iteration changing_distance = reverse_distance while self.intersect(start_point=nudged_hit_location.point, end_point=new_loc.point) and count < 10: changing_distance /= 2 new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) count += 1 # We tried 10 times and the particle was still on shore, return the point the particle started from. # No randomization. if count == 10: logger.warn("Could not react particle with shoreline. Assuming particle did not move from original location") new_loc = start_point return new_loc
def index(self, point=None, spatialbuffer=None): spatialbuffer = spatialbuffer or self._spatialbuffer self._spatial_query_object = None geoms = [] if point: self._spatial_query_object = point.buffer(spatialbuffer) bounds = point.buffer(spatialbuffer).envelope.wkt geoms = self.get_geoms_for_bounds(bounds) self._geoms = [] for geom in geoms: try: if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn("Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def run(self, **kwargs): logger.progress((4, "Starting tasks")) self.result = self.start_tasks(**kwargs) if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # Store results in hdf5 file for processing later output_h5_file = None if kwargs.get('output_path') is not None: output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5') if self.thread_result_listener is True: rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count())) rl.daemon = True rl.start() rl.join() # This blocks until the tasks are all done. else: self.listen_for_results(output_h5_file, self.total_particle_count( )) # This blocks until the tasks are all done. logger.info('Tasks are all finished... Cleaning up!!') self.cleanup() # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for fmt in formats: logger.info("Exporting to: %s" % fmt) try: # Calls the export function fmt.export(output_path, output_h5_file) except: logger.exception("Failed to export to: %s" % fmt) else: logger.warn( 'The output_formats parameter should be a list, not saving any output!' ) else: logger.warn('No output path defined, not saving any output!') else: logger.warn( 'No output_formats parameter was defined, not saving any output!' ) logger.progress((97, "Model Run Complete")) return
def index(self, point=None, spatialbuffer=None): spatialbuffer = spatialbuffer or self._spatialbuffer self._spatial_query_object = None geoms = [] if point: self._spatial_query_object = point.buffer(spatialbuffer) bounds = point.buffer(spatialbuffer).envelope.wkt geoms = self.get_geoms_for_bounds(bounds) self._geoms = [] for geom in geoms: try: if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn( "Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def run(self): while True: try: next_task = self.task_queue.get(True, 10) except Queue.Empty: logger.info("No tasks left to complete, closing %s" % self.name) break else: answer = (None, None) try: answer = (1, next_task(self.name, self.active)) except Exception as detail: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Disabling Error: " +\ repr(traceback.format_exception(exc_type, exc_value, exc_traceback))) if isinstance(next_task, DataController): answer = (-2, "DataController") # Tell the particles that the DataController is releasing file self.get_data.value = False # The data controller has died, so don't process any more tasks self.active.value = False elif isinstance(next_task, ForceParticle): answer = (-1, next_task.part) else: logger.warn("Strange task raised an exception: %s" % str(next_task.__class__)) answer = (None, None) finally: self.result_queue.put(answer) self.nproc_lock.acquire() self.n_run.value = self.n_run.value - 1 self.nproc_lock.release() self.task_queue.task_done()
def index(self, **kwargs): """ This queries the shapefile around a buffer of a point The results of this spatial query are used for shoreline detection. Using the entire shapefile without the spatial query takes over 30 times the time with world land polygons. """ point = kwargs.pop("point", None) spatialbuffer = kwargs.pop("spatialbuffer", self._spatialbuffer) self._layer.SetSpatialFilter(None) self._spatial_query_object = None if point: self._spatial_query_object = point.buffer(spatialbuffer) poly = ogr.CreateGeometryFromWkt(self._spatial_query_object.wkt) self._layer.SetSpatialFilter(poly) poly.Destroy() self._geoms = [] # The _geoms should be only Polygons, not MultiPolygons for element in self._layer: try: geom = wkb.loads(element.GetGeometryRef().ExportToWkb()) if isinstance(geom, Polygon): self._geoms.append(geom) elif isinstance(geom, MultiPolygon): for poly in geom: self._geoms.append(poly) except: logger.warn( "Could not find valid geometry in shoreline element. Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
def run(self, **kwargs): logger.progress((4, "Starting tasks")) self.result = self.start_tasks(**kwargs) if self.result is None: raise BaseDataControllerError("Not all tasks started! Exiting.") # Store results in hdf5 file for processing later output_h5_file = None if kwargs.get('output_path') is not None: output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5') if self.thread_result_listener is True: rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count())) rl.daemon = True rl.start() rl.join() # This blocks until the tasks are all done. else: self.listen_for_results(output_h5_file, self.total_particle_count()) # This blocks until the tasks are all done. logger.info('Tasks are all finished... Cleaning up!!') self.cleanup() # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: logger.progress((96, "Exporting results")) # Make sure output_path is also included if kwargs.get("output_path", None) is not None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for fmt in formats: logger.info("Exporting to: %s" % fmt) try: # Calls the export function fmt.export(output_path, output_h5_file) except: logger.exception("Failed to export to: %s" % fmt) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output_formats parameter was defined, not saving any output!') logger.progress((97, "Model Run Complete")) return
def __reverse(self, **kwargs): """ Reverse particle just off of the shore in the direction that it came in. Adds a slight random factor to the distance and angle it is reversed in. """ #st = time.clock() start_point = kwargs.pop('start_point') hit_point = kwargs.pop('hit_point') reverse_azimuth = kwargs.pop('reverse_azimuth') reverse_distance = kwargs.get('reverse_distance', None) if reverse_distance is None: reverse_distance = 100 # Randomize the reverse angle slightly (+/- 5 degrees) random_azimuth = reverse_azimuth + AsaRandom.random() * 5 count = 0 nudge_distance = 0.01 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) # Find point just offshore to do testing with. Try 15 times (~350m). This makes sure the start_point is in the water # for the next call to intersect (next while loop). while self.intersect(single_point=nudge_loc.point) and count < 16: nudge_distance *= 2 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) count += 1 # We tried 16 times and couldn't find a point. This should totally never happen. if count == 16: logger.warn( "LOOK: Could not find location in water to do shoreline calculation with. Assuming particle did not move from original location" ) return start_point # Keep trying to throw particle back, halfing the distance each time until it is in water. # Only half it 6 times before giving up and returning the point which the particle came from. count = 0 # Distance amount to half each iteration changing_distance = reverse_distance new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) # We don't want to reverse further than the current spatial buffer, because we will reindex the # source file everytime we reverse, which will slow down the calculations considerably. while (not self._spatial_query_object.contains(new_loc.point) or self.intersect(start_point=nudge_loc.point, end_point=new_loc.point)) and count < 6: changing_distance /= 2 new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) count += 1 # We tried 10 times and the particle was still on shore, return the point the particle started from. # No randomization. if count == 6: logger.warn( "LOOK: Could not react particle with shoreline. Assuming particle did not move from original location" ) return start_point #logger.info("Reaction time: %f" % (time.clock() - st)) return new_loc
def fill_cache_with_linterp_data(self, i, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(2) pass if self.need_data(i+1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i+1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.particle.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to update cache with the CURRENT time index") timer.sleep(2) pass # Do we still need to get the next timestep? if self.need_data(i+1): # get the next time index data self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to update cache with the NEXT time index") timer.sleep(2) pass except Exception: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release()
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline( path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect( start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception( "Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception( "Time indexes are messed up. Need to have equal datetime and time indexes" ) raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data( i, newtimes[loop_i]) else: logger.warn( "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported." ) tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth( self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i + 1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction( particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish( self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info( textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes") raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.") tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth(self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info(textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0, (self._step * self._nstep) + 1, self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps( times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str( datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance( self.geometry, MultiPolygon): point_locations = [ Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points( goal=self._npart, polygon=self.geometry) ] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError( "Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int', -1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int', -1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle( part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d" % i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive( ) and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn( "DataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90., 1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn( 'The output_formats parameter should be a list, not saving any output!' ) else: logger.warn( 'No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def listen_for_results(self): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < self.number_of_tasks: try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive() and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn("CachingDataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: self.error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.particles = return_particles finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate()
def manager(run_id): with app.app_context(): job = get_current_job() output_path = os.path.join(current_app.config['OUTPUT_PATH'], run_id) shutil.rmtree(output_path, ignore_errors=True) os.makedirs(output_path) cache_path = os.path.join(current_app.config['CACHE_PATH'], run_id) shutil.rmtree(cache_path, ignore_errors=True) os.makedirs(cache_path) f, log_file = tempfile.mkstemp(dir=cache_path, prefix=run_id, suffix=".log") os.close(f) os.chmod(log_file, 0644) # Set up Logger logger = logging.getLogger(run_id) handler = FileHandler(log_file) handler.setLevel(logging.INFO) formatter = logging.Formatter('[%(asctime)s] - %(levelname)s - %(name)s - %(processName)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) res = urlparse(current_app.config.get("RESULTS_REDIS_URI")) redis_pool = redis.ConnectionPool(host=res.hostname, port=res.port, db=res.path[1:]) r = redis.Redis(connection_pool=redis_pool) run = db.Run.find_one( { '_id' : ObjectId(run_id) } ) if run is None: return "Failed to locate run %s. May have been deleted while task was in the queue?" % run_id def listen_for_logs(): pubsub = r.pubsub() pubsub.subscribe("%s:log" % run_id) for msg in pubsub.listen(): if msg['type'] != "message": continue if msg["data"] == "FINISHED": break try: prog = json.loads(msg["data"]) if prog is not None: if prog.get("level", "").lower() == "progress": job.meta["progress"] = float(prog.get("value", job.meta.get("progress", None))) job.meta["message"] = prog.get("message", job.meta.get("message", "")) job.meta["updated"] = prog.get("time", datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(pytz.utc)) job.save() logger.info("PROGRESS: %(value).2f - %(message)s" % prog) else: getattr(logger, prog["level"].lower())(prog.get("message")) except Exception: logger.info("Got strange result: %s" % msg["data"]) pass pubsub.close() sys.exit() def listen_for_results(output_h5_file, total_particles): # Create output file (hdf5) particles_finished = 0 results = ResultsPyTable(output_h5_file) pubsub = r.pubsub() pubsub.subscribe("%s:results" % run_id) for msg in pubsub.listen(): if msg['type'] != "message": continue if msg["data"] == "FINISHED": break try: json_msg = json.loads(msg["data"]) if json_msg.get("status", None): # "COMPLETED" or "FAILED" when a particle finishes particles_finished += 1 percent_complete = 90. * (float(particles_finished) / float(total_particles)) + 5 # Add the 5 progress that was used prior to the particles starting (controller) r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : percent_complete, "message" : "Particle #%s %s!" % (particles_finished, json_msg.get("status"))})) if particles_finished == total_particles: break else: # Write to HDF file results.write(json_msg) except Exception: logger.info("Got strange result: %s" % msg["data"]) pass pubsub.close() results.compute() results.close() sys.exit() pl = threading.Thread(name="LogListener", target=listen_for_logs) pl.daemon = True pl.start() output_h5_file = os.path.join(output_path, "results.h5") rl = threading.Thread(name="ResultListener", target=listen_for_results, args=(output_h5_file, run['particles'])) rl.daemon = True rl.start() # Wait for PubSub listening to begin time.sleep(1) model = None try: r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : 0, "message" : "Setting up model"})) hydropath = run['hydro_path'] geometry = loads(run['geometry']) start_depth = run['release_depth'] num_particles = run['particles'] time_step = run['timestep'] num_steps = int(math.ceil((run['duration'] * 24 * 60 * 60) / time_step)) start_time = run['start'].replace(tzinfo = pytz.utc) shoreline_path = run['shoreline_path'] or app.config.get("SHORE_PATH") shoreline_feat = run['shoreline_feature'] # Setup Models models = [] if run['cached_behavior'] is not None and run['cached_behavior'].get('results', None) is not None: behavior_data = run['cached_behavior']['results'][0] l = LarvaBehavior(data=behavior_data) models.append(l) models.append(Transport(horizDisp=run['horiz_dispersion'], vertDisp=run['vert_dispersion'])) model = DistributedModelController(geometry=geometry, depth=start_depth, start=start_time, step=time_step, nstep=num_steps, npart=num_particles, models=models, use_bathymetry=True, bathy_path=current_app.config['BATHY_PATH'], use_shoreline=True, time_method=run['time_method'], shoreline_path=shoreline_path, shoreline_feature=shoreline_feat, shoreline_index_buffer=0.05) model.setup_run(hydropath, output_formats=["redis"], redis_url=current_app.config.get("RESULTS_REDIS_URI"), redis_results_channel="%s:results" % run_id, redis_log_channel="%s:log" % run_id) except Exception as exception: logger.warn("Run failed to initialize, cleaning up.") logger.warn(exception.message) job.meta["outcome"] = "failed" job.save() raise try: r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : 4, "message" : "Adding particles to queue"})) for part in model.particles: particle_queue.enqueue_call(func=particle, args=(hydropath, part, model,)) except Exception, exception: logger.warn("Failed to start particles, cleaning up.") logger.warn(exception.message) r.publish("%s:results" % run_id, "FINISHED") job.meta["outcome"] = "failed" job.save() raise finally:
def fill_cache_with_linterp_data(self, i, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to release cache file so I can read from it..." ) timer.sleep(2) pass if self.need_data(i + 1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i + 1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices( 'u', timeinds=[np.asarray([i - 1])], point=self.particle.location) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [ indices[0] + 1, indices[-2], indices[-1] ] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to update cache with the CURRENT time index" ) timer.sleep(2) pass # Do we still need to get the next timestep? if self.need_data(i + 1): # get the next time index data self.point_get.value = [ indices[0] + 2, indices[-2], indices[-1] ] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to update cache with the NEXT time index" ) timer.sleep(2) pass except Exception: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release()
def __call__(self, proc, active): self.active = active if self.usebathy == True: self._bathymetry = Bathymetry(file=self.bathy) self._shoreline = None if self.useshore == True: self._shoreline = Shoreline(file=self.shoreline_path, point=self.release_location_centroid, spatialbuffer=0.25) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) self.proc = proc part = self.part if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to start...") timer.sleep(10) pass # Initialize commondataset of local cache, then # close the related netcdf file try: with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset = CommonDataset.open(self.localpath) self.dataset.closenc() except StandardError: logger.warn("No cache file: %s. Particle exiting" % self.localpath) raise finally: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) # Load Timevar from pickle serialization f = open(self.timevar_pickle_path,"rb") timevar = pickle.load(f) f.close() if self.time_method == 'interp': time_indexs = timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.error("Time indexes are messed up. Need to have equal datetime and time indexes") raise # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active.value == False: raise ValueError("Particle exiting due to Failure.") newloc = None # if need a time that is outside of what we have #if self.active.value == True: # while self.get_data.value == True: # logger.info("Waiting for DataController to get out...") # timer.sleep(4) # pass # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i]) elif self.time_method == 'interp': u, v, w, temp, salt = self.data_interp(i, timevar, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") #logger.info("U: %.4f, V: %.4f, W: %.4f" % (u,v,w)) #logger.info("Temp: %.4f, Salt: %.4f" % (temp,salt)) # Get the bathy value at the particles location if self.usebathy == True: bathymetry_value = self._bathymetry.get_depth(part.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. part.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: movement = model.move(part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: self.boundary_interaction(particle=part, starting=part.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) logger.debug("%s - was forced by %s and is now at %s" % (part.logstring(), model.__class__.__name__, part.location.logstring())) part.note = part.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps part.save() # We won't pull data for the last entry in locations, but we need to populate it with fill data. part.fill_environment_gap() if self.usebathy == True: self._bathymetry.close() if self.useshore == True: self._shoreline.close() return part
def data_interp(self, i, timevar, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(4) pass if self.need_data(i+1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i+1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.part.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to update cache with the CURRENT time index") timer.sleep(4) pass # get the next time index data self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to update cache with the NEXT time index") timer.sleep(4) pass except StandardError: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release() # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) try: # Open netcdf file on disk from commondataset self.dataset.opennc() # Grab data at time index closest to particle location u = [np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location )))] v = [np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location )))] # if there is vertical velocity inthe dataset, get it if 'w' in self.dataset.nc.variables: w = [np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location )))] else: w = [0.0, 0.0] # If there is salt and temp in the dataset, get it if self.temp_name != None and self.salt_name != None: temp = [np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location )))] salt = [np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location )))] # Check for nans that occur in the ocean (happens because # of model and coastline resolution mismatches) if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any(): # Take the mean of the closest 4 points # If this includes nan which it will, result is nan uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location, num=2) varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location, num=2) uarray2 = self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) varray2 = self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) if 'w' in self.dataset.nc.variables: warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location, num=2) warray2 = self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) w = [warray1.mean(), warray2.mean()] else: w = [0.0, 0.0] if self.temp_name != None and self.salt_name != None: temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location, num=2) saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location, num=2) temparray2 = self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) saltarray2 = self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) temp = [temparray1.mean(), temparray2.mean()] salt = [saltarray1.mean(), saltarray2.mean()] u = [uarray1.mean(), uarray2.mean()] v = [varray1.mean(), varray2.mean()] # Linear interp of data between timesteps currenttime = date2num(currenttime) timevar = timevar.datenum u = self.linterp(timevar[i:i+2], u, currenttime) v = self.linterp(timevar[i:i+2], v, currenttime) w = self.linterp(timevar[i:i+2], w, currenttime) if self.temp_name != None and self.salt_name != None: temp = self.linterp(timevar[i:i+2], temp, currenttime) salt = self.linterp(timevar[i:i+2], salt, currenttime) if self.temp_name is None: temp = np.nan if self.salt_name is None: salt = np.nan #logger.info(self.dataset.get_xyind_from_point('u', self.part.location, num=1)) except StandardError: logger.error("Error in data_interp method on ForceParticle") raise finally: self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) return u, v, w, temp, salt
def __call__(self, proc, active): c = 0 self.dataset = CommonDataset.open(self.url) self.proc = proc self.remote = self.dataset.nc cachepath = self.cache_path # Calculate the datetimes of the model timesteps like # the particle objects do, so we can figure out unique # time indices modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) timevar = self.dataset.gettimevar(self.uname) # Don't need to grab the last datetime, as it is not needed for forcing, only # for setting the time of the final particle forcing time_indexs = timevar.nearest_index(newtimes[0:-1], select='before') # Have to make sure that we get the plus 1 for the # linear interpolation of u,v,w,temp,salt self.inds = np.unique(time_indexs) self.inds = np.append(self.inds, self.inds.max()+1) # While there is at least 1 particle still running, # stay alive, if not break while self.n_run.value > 1: logger.debug("Particles are still running, waiting for them to request data...") timer.sleep(2) # If particle asks for data, do the following if self.get_data.value == True: logger.debug("Particle asked for data!") # Wait for particles to get out while True: self.read_lock.acquire() logger.debug("Read count: %d" % self.read_count.value) if self.read_count.value > 0: logger.debug("Waiting for write lock on cache file (particles must stop reading)...") self.read_lock.release() timer.sleep(4) else: break # Get write lock on the file. Already have read lock. self.write_lock.acquire() self.has_write_lock.value = os.getpid() if c == 0: logger.debug("Creating cache file") try: # Open local cache for writing, overwrites # existing file with same name self.local = netCDF4.Dataset(cachepath, 'w') indices = self.dataset.get_indices(self.uname, timeinds=[np.asarray([0])], point=self.start) self.point_get.value = [self.inds[0], indices[-2], indices[-1]] # Create dimensions for u and v variables self.local.createDimension('time', None) self.local.createDimension('level', None) self.local.createDimension('x', None) self.local.createDimension('y', None) # Create 3d or 4d u and v variables if self.remote.variables[self.uname].ndim == 4: self.ndim = 4 dimensions = ('time', 'level', 'y', 'x') coordinates = "time z lon lat" elif self.remote.variables[self.uname].ndim == 3: self.ndim = 3 dimensions = ('time', 'y', 'x') coordinates = "time lon lat" shape = self.remote.variables[self.uname].shape # If there is no FillValue defined in the dataset, use np.nan. # Sometimes it will work out correctly and other times we will # have a huge cache file. try: fill = self.remote.variables[self.uname].missing_value except Exception: fill = np.nan # Create domain variable that specifies # where there is data geographically/by time # and where there is not data, # Used for testing if particle needs to # ask cache to update domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0) domain.coordinates = coordinates # Create local u and v variables u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill) v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill) v.coordinates = coordinates u.coordinates = coordinates localvars = [u, v,] remotevars = [self.remote.variables[self.uname], self.remote.variables[self.vname]] # Create local w variable if self.wname != None: w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill) w.coordinates = coordinates localvars.append(w) remotevars.append(self.remote.variables[self.wname]) if self.temp_name != None and self.salt_name != None: # Create local temp and salt vars temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill) salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill) temp.coordinates = coordinates salt.coordinates = coordinates localvars.append(temp) localvars.append(salt) remotevars.append(self.remote.variables[self.temp_name]) remotevars.append(self.remote.variables[self.salt_name]) # Create local lat/lon coordinate variables if self.remote.variables[self.xname].ndim == 2: lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False) lon[:] = self.remote.variables[self.xname][:, :] lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False) lat[:] = self.remote.variables[self.yname][:, :] if self.remote.variables[self.xname].ndim == 1: lon = self.local.createVariable('lon', 'f', ("x"), zlib=False) lon[:] = self.remote.variables[self.xname][:] lat = self.local.createVariable('lat', 'f', ("y"), zlib=False) lat[:] = self.remote.variables[self.yname][:] # Create local z variable if self.zname != None: if self.remote.variables[self.zname].ndim == 4: z = self.local.createVariable('z', 'f', ("time","level","y","x"), zlib=False) remotez = self.remote.variables[self.zname] localvars.append(z) remotevars.append(remotez) elif self.remote.variables[self.zname].ndim == 3: z = self.local.createVariable('z', 'f', ("level","y","x"), zlib=False) z[:] = self.remote.variables[self.zname][:, :, :] elif self.remote.variables[self.zname].ndim ==1: z = self.local.createVariable('z', 'f', ("level",), zlib=False) z[:] = self.remote.variables[self.zname][:] # Create local time variable time = self.local.createVariable('time', 'f8', ("time",), zlib=False) if self.tname != None: time[:] = self.remote.variables[self.tname][self.inds] if self.point_get.value[0]+self.time_size > np.max(self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1) else: current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn("DataController failed to get remote data. Trying again in 30 seconds") timer.sleep(30) else: break c += 1 except StandardError: logger.error("DataController failed to get data (first request)") raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug("Done updating cache file, closing file, and releasing locks") else: logger.debug("Updating cache file") try: # Open local cache dataset for appending self.local = netCDF4.Dataset(cachepath, 'a') # Create local and remote variable objects # for the variables of interest u = self.local.variables['u'] v = self.local.variables['v'] time = self.local.variables['time'] remoteu = self.remote.variables[self.uname] remotev = self.remote.variables[self.vname] # Create lists of variable objects for # the data updater localvars = [u, v, ] remotevars = [remoteu, remotev, ] if self.salt_name != None and self.temp_name != None: salt = self.local.variables['salt'] temp = self.local.variables['temp'] remotesalt = self.remote.variables[self.salt_name] remotetemp = self.remote.variables[self.temp_name] localvars.append(salt) localvars.append(temp) remotevars.append(remotesalt) remotevars.append(remotetemp) if self.wname != None: w = self.local.variables['w'] remotew = self.remote.variables[self.wname] localvars.append(w) remotevars.append(remotew) if self.zname != None: remotez = self.remote.variables[self.zname] if remotez.ndim == 4: z = self.local.variables['z'] localvars.append(z) remotevars.append(remotez) if self.tname != None: remotetime = self.remote.variables[self.tname] time[self.inds] = self.remote.variables[self.inds] if self.point_get.value[0]+self.time_size > np.max(self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1) else: current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn("DataController failed to get remote data. Trying again in 30 seconds") timer.sleep(30) else: break c += 1 except StandardError: logger.error("DataController failed to get data (not first request)") raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug("Done updating cache file, closing file, and releasing locks") else: pass self.dataset.closenc() return "DataController"
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0,(self._step*self._nstep)+1,self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError("Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int',-1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int',-1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle(part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive() and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn("DataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def run(run_id): # Sleep to give the Run object enough time to save time.sleep(10) with app.app_context(): from paegan.logger import logger job = get_current_job() output_path = os.path.join(current_app.config['OUTPUT_PATH'], run_id) shutil.rmtree(output_path, ignore_errors=True) os.makedirs(output_path) cache_path = os.path.join(current_app.config['CACHE_PATH'], run_id) shutil.rmtree(cache_path, ignore_errors=True) os.makedirs(cache_path) temp_animation_path = os.path.join(current_app.config['OUTPUT_PATH'], "temp_images_" + run_id) shutil.rmtree(temp_animation_path, ignore_errors=True) os.makedirs(temp_animation_path) # Set up Logger queue = multiprocessing.Queue(-1) f, log_file = tempfile.mkstemp(dir=cache_path, prefix=run_id, suffix=".log") os.close(f) # Close any existing handlers (hand.close() for hand in logger.handlers) # Remove any existing handlers logger.handlers = [] logger.setLevel(logging.PROGRESS) handler = MultiProcessingLogHandler(log_file, queue) handler.setLevel(logging.PROGRESS) formatter = logging.Formatter('[%(asctime)s] - %(levelname)s - %(name)s - %(processName)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) # Progress stuff. Hokey! progress_deque = collections.deque(maxlen=1) progress_handler = ProgressHandler(progress_deque) progress_handler.setLevel(logging.PROGRESS) logger.addHandler(progress_handler) e = threading.Event() def save_progress(): while e.wait(5) is not True: try: record = progress_deque.pop() if record == StopIteration: break job.meta["updated"] = record[0] if record is not None and record[1] >= 0: job.meta["progress"] = record[1] if isinstance(record[2], unicode) or isinstance(record[2], str): job.meta["message"] = record[2] job.save() except IndexError: pass except Exception: raise return t = threading.Thread(name="ProgressUpdater", target=save_progress) t.daemon = True t.start() model = None try: logger.progress((0, "Configuring model")) run = db.Run.find_one( { '_id' : ObjectId(run_id) } ) if run is None: return "Failed to locate run %s. May have been deleted while task was in the queue?" % run_id geometry = loads(run['geometry']) start_depth = run['release_depth'] num_particles = run['particles'] time_step = run['timestep'] num_steps = int(math.ceil((run['duration'] * 24 * 60 * 60) / time_step)) start_time = run['start'].replace(tzinfo = pytz.utc) shoreline_path = run['shoreline_path'] or app.config.get("SHORE_PATH") shoreline_feat = run['shoreline_feature'] # Set up output directory/bucket for run output_formats = ['Shapefile', 'NetCDF', 'Trackline'] # Setup Models models = [] if run['cached_behavior'] is not None and run['cached_behavior'].get('results', None) is not None: behavior_data = run['cached_behavior']['results'][0] l = LarvaBehavior(data=behavior_data) models.append(l) models.append(Transport(horizDisp=run['horiz_dispersion'], vertDisp=run['vert_dispersion'])) # Setup ModelController model = ModelController(geometry=geometry, depth=start_depth, start=start_time, step=time_step, nstep=num_steps, npart=num_particles, models=models, use_bathymetry=True, use_shoreline=True, time_chunk=run['time_chunk'], horiz_chunk=run['horiz_chunk'], time_method=run['time_method'], shoreline_path=shoreline_path, shoreline_feature=shoreline_feat, reverse_distance=1500) # Run the model cache_file = os.path.join(cache_path, run_id + ".nc.cache") bathy_file = current_app.config['BATHY_PATH'] model.run(run['hydro_path'], output_path=output_path, bathy=bathy_file, output_formats=output_formats, cache=cache_file, remove_cache=False, caching=run['caching']) # Skip creating movie output_path """ from paegan.viz.trajectory import CFTrajectory logger.info("Creating animation...") for filename in os.listdir(output_path): if os.path.splitext(filename)[1][1:] == "nc": # Found netCDF file netcdf_file = os.path.join(output_path,filename) traj = CFTrajectory(netcdf_file) success = traj.plot_animate(os.path.join(output_path,'animation.avi'), temp_folder=temp_animation_path, bathy=app.config['BATHY_PATH']) if not success: logger.info("Could not create animation") else: logger.info("Animation saved") """ job.meta["outcome"] = "success" job.save() return "Successfully ran %s" % run_id except Exception as exception: logger.warn("Run FAILED, cleaning up and uploading log.") logger.warn(exception.message) job.meta["outcome"] = "failed" job.save() raise finally: logger.progress((99, "Processing output files")) # Close the handler so we can upload the log file without a file lock (hand.close() for hand in logger.handlers) queue.put(StopIteration) # Break out of the progress loop e.set() t.join() # Move logfile to output directory shutil.move(log_file, os.path.join(output_path, 'model.log')) # Move cachefile to output directory if we made one if run['caching']: shutil.move(cache_file, output_path) output_files = [] for filename in os.listdir(output_path): outfile = os.path.join(output_path, filename) output_files.append(outfile) result_files = [] base_access_url = current_app.config.get('NON_S3_OUTPUT_URL', None) # Handle results and cleanup if current_app.config['USE_S3'] is True: base_access_url = urljoin("http://%s.s3.amazonaws.com/output/" % current_app.config['S3_BUCKET'], run_id) # Upload results to S3 and remove the local copies conn = S3Connection() bucket = conn.get_bucket(current_app.config['S3_BUCKET']) for outfile in output_files: # Don't upload the cache file if os.path.basename(outfile) == os.path.basename(cache_file): continue # Upload the outfile with the same as the run name _, ext = os.path.splitext(outfile) new_filename = slugify(unicode(run['name'])) + ext k = Key(bucket) k.key = "output/%s/%s" % (run_id, new_filename) k.set_contents_from_filename(outfile) k.set_acl('public-read') result_files.append(base_access_url + "/" + new_filename) os.remove(outfile) shutil.rmtree(output_path, ignore_errors=True) else: for outfile in output_files: result_files.append(urljoin(base_access_url, run_id) + "/" + os.path.basename(outfile)) shutil.rmtree(temp_animation_path, ignore_errors=True) # Set output fields run.output = result_files run.ended = datetime.utcnow() run.compute() run.save() # Cleanup logger.removeHandler(handler) del formatter del handler del logger del model queue.close() job.meta["message"] = "Complete" job.save()
def __call__(self, active): c = 0 self.dataset = CommonDataset.open(self.hydrodataset) self.remote = self.dataset.nc # Calculate the datetimes of the model timesteps like # the particle objects do, so we can figure out unique # time indices modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start_time) timevar = self.dataset.gettimevar(self.uname) # Don't need to grab the last datetime, as it is not needed for forcing, only # for setting the time of the final particle forcing time_indexs = timevar.nearest_index(newtimes[0:-1], select='before') # Have to make sure that we get the plus 1 for the # linear interpolation of u,v,w,temp,salt self.inds = np.unique(time_indexs) self.inds = np.append(self.inds, self.inds.max() + 1) # While there is at least 1 particle still running, # stay alive, if not break while self.n_run.value > 1: if self.caching is False: logger.debug( "Caching is False, not doing much. Just hanging out until all of the particles finish." ) timer.sleep(10) continue # If particle asks for data, do the following if self.get_data.value is True: logger.debug("Particle asked for data!") # Wait for particles to get out while True: self.read_lock.acquire() logger.debug("Read count: %d" % self.read_count.value) if self.read_count.value > 0: logger.debug( "Waiting for write lock on cache file (particles must stop reading)..." ) self.read_lock.release() timer.sleep(2) else: break # Get write lock on the file. Already have read lock. self.write_lock.acquire() self.has_write_lock.value = os.getpid() if c == 0: logger.debug("Creating cache file") try: # Open local cache for writing, overwrites # existing file with same name self.local = netCDF4.Dataset(self.cache_path, 'w') indices = self.dataset.get_indices( self.uname, timeinds=[np.asarray([0])], point=self.start) self.point_get.value = [ self.inds[0], indices[-2], indices[-1] ] # Create dimensions for u and v variables self.local.createDimension('time', None) self.local.createDimension('level', None) self.local.createDimension('x', None) self.local.createDimension('y', None) # Create 3d or 4d u and v variables if self.remote.variables[self.uname].ndim == 4: self.ndim = 4 dimensions = ('time', 'level', 'y', 'x') coordinates = "time z lon lat" elif self.remote.variables[self.uname].ndim == 3: self.ndim = 3 dimensions = ('time', 'y', 'x') coordinates = "time lon lat" shape = self.remote.variables[self.uname].shape # If there is no FillValue defined in the dataset, use np.nan. # Sometimes it will work out correctly and other times we will # have a huge cache file. try: fill = self.remote.variables[ self.uname].missing_value except Exception: fill = np.nan # Create domain variable that specifies # where there is data geographically/by time # and where there is not data, # Used for testing if particle needs to # ask cache to update domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0) domain.coordinates = coordinates # Create local u and v variables u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill) v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill) v.coordinates = coordinates u.coordinates = coordinates localvars = [ u, v, ] remotevars = [ self.remote.variables[self.uname], self.remote.variables[self.vname] ] # Create local w variable if self.wname is not None: w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill) w.coordinates = coordinates localvars.append(w) remotevars.append( self.remote.variables[self.wname]) if self.temp_name is not None and self.salt_name is not None: # Create local temp and salt vars temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill) salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill) temp.coordinates = coordinates salt.coordinates = coordinates localvars.append(temp) localvars.append(salt) remotevars.append( self.remote.variables[self.temp_name]) remotevars.append( self.remote.variables[self.salt_name]) # Create local lat/lon coordinate variables if self.remote.variables[self.xname].ndim == 2: lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False) lon[:] = self.remote.variables[self.xname][:, :] lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False) lat[:] = self.remote.variables[self.yname][:, :] if self.remote.variables[self.xname].ndim == 1: lon = self.local.createVariable('lon', 'f', ("x"), zlib=False) lon[:] = self.remote.variables[self.xname][:] lat = self.local.createVariable('lat', 'f', ("y"), zlib=False) lat[:] = self.remote.variables[self.yname][:] # Create local z variable if self.zname is not None: if self.remote.variables[self.zname].ndim == 4: z = self.local.createVariable( 'z', 'f', ("time", "level", "y", "x"), zlib=False) remotez = self.remote.variables[self.zname] localvars.append(z) remotevars.append(remotez) elif self.remote.variables[self.zname].ndim == 3: z = self.local.createVariable( 'z', 'f', ("level", "y", "x"), zlib=False) z[:] = self.remote.variables[ self.zname][:, :, :] elif self.remote.variables[self.zname].ndim == 1: z = self.local.createVariable('z', 'f', ("level", ), zlib=False) z[:] = self.remote.variables[self.zname][:] # Create local time variable time = self.local.createVariable('time', 'f8', ("time", ), zlib=False) if self.tname is not None: time[:] = self.remote.variables[self.tname][ self.inds] if self.point_get.value[0] + self.time_size > np.max( self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds) + 1) else: current_inds = np.arange( self.point_get.value[0], self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache. # Try 20 times on the first attempt current_attempt = 1 max_attempts = 20 while True: try: assert current_attempt <= max_attempts self.get_remote_data(localvars, remotevars, current_inds, shape) except AssertionError: raise except: logger.warn( "CachingDataController failed to get remote data. Trying again in 20 seconds. %s attempts left." % str(max_attempts - current_attempt)) logger.exception("Data Access Error") timer.sleep(20) current_attempt += 1 else: break c += 1 except (Exception, AssertionError): logger.error( "CachingDataController failed to get data (first request)" ) raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug( "Done updating cache file, closing file, and releasing locks" ) else: logger.debug("Updating cache file") try: # Open local cache dataset for appending self.local = netCDF4.Dataset(self.cache_path, 'a') # Create local and remote variable objects # for the variables of interest u = self.local.variables['u'] v = self.local.variables['v'] time = self.local.variables['time'] remoteu = self.remote.variables[self.uname] remotev = self.remote.variables[self.vname] # Create lists of variable objects for # the data updater localvars = [ u, v, ] remotevars = [ remoteu, remotev, ] if self.salt_name is not None and self.temp_name is not None: salt = self.local.variables['salt'] temp = self.local.variables['temp'] remotesalt = self.remote.variables[self.salt_name] remotetemp = self.remote.variables[self.temp_name] localvars.append(salt) localvars.append(temp) remotevars.append(remotesalt) remotevars.append(remotetemp) if self.wname is not None: w = self.local.variables['w'] remotew = self.remote.variables[self.wname] localvars.append(w) remotevars.append(remotew) if self.zname is not None: remotez = self.remote.variables[self.zname] if remotez.ndim == 4: z = self.local.variables['z'] localvars.append(z) remotevars.append(remotez) if self.tname is not None: # remotetime = self.remote.variables[self.tname] time[self.inds] = self.remote.variables[self.inds] if self.point_get.value[0] + self.time_size > np.max( self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds) + 1) else: current_inds = np.arange( self.point_get.value[0], self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn( "CachingDataController failed to get remote data. Trying again in 30 seconds" ) timer.sleep(30) else: break c += 1 except Exception: logger.error( "CachingDataController failed to get data (not first request)" ) raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug( "Done updating cache file, closing file, and releasing locks" ) else: logger.debug( "Particles are still running, waiting for them to request data..." ) timer.sleep(2) self.dataset.closenc() return "CachingDataController"
def listen_for_results(self): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < self.number_of_tasks: try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except Queue.Empty: new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "Zombie")) # Decrement nproc (Consumer exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name=p.name) new_procs.append(np) old_procs.append(p) for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -3: self.error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) else: logger.info("Got a strange result on results queue: %s" % str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.particles = return_particles finally: # Join all processes logger.info("Joining the processes") for w in self.procs: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate()
def listen_for_results(self, output_h5_file, total_particles): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % total_particles) while retrieved < self.total_task_count( ): # One for the CachingDataController logger.info("looping in listen_for_results") try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive( ) and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn( "Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn( "CachingDataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: self.error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.total_task_count())) # Relax time.sleep(1) if len(return_particles) != total_particles: logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.tasks.close() self.tasks.join_thread() finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() if self.error_code == -2: raise ValueError( "Error in the BaseDataController (error_code was -2)") results = ex.ResultsPyTable(output_h5_file) for p in return_particles: for x in range(len(p.locations)): results.write(p.timestep_index_dump(x)) results.compute() results.close() return