def cleanup(self): super(CachingModelController, self).cleanup() # Remove the cache file if self.remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed")
def fill_cache_with_nearest_data(self, i): """ Method to streamline request for data from cache, Uses nearest time to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to release cache file so I can read from it..." ) timer.sleep(2) pass if self.need_data(i): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: if self.need_data(i): with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices( 'u', timeinds=[np.asarray([i - 1])], point=self.particle.location) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time self.point_get.value = [ indices[0] + 1, indices[-2], indices[-1] ] # Request that the data controller update the cache # DATA CONTOLLER STARTS self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to update cache..." ) timer.sleep(2) pass except Exception: raise finally: self.has_data_request_lock.value = -1 self.data_request_lock.release()
def move(self, particle, u, v, w, modelTimestep, **kwargs): bathymetry_value = kwargs.pop("bathymetry_value", None) if bathymetry_value is None: logger.debug("No bathymetry so can not attempt to settle particle") return {'u': 0, 'v': 0, 'w': 0} u, v, w = self.attempt(particle, bathymetry_value) return {'u': u, 'v': v, 'w': w}
def move(self, particle, u, v, w, modelTimestep, **kwargs): bathymetry_value = kwargs.pop("bathymetry_value", None) if bathymetry_value is None: logger.debug("No bathymetry so can not attempt to settle particle") return { 'u': 0, 'v': 0, 'w': 0 } u,v,w = self.attempt(particle, bathymetry_value) return { 'u': u, 'v': v, 'w': w }
def __reverse(self, **kwargs): """ Reverse particle just off of the shore in the direction that it came in. Adds a slight random factor to the distance and angle it is reversed in. """ start_point = kwargs.pop('start_point') hit_point = kwargs.pop('hit_point') distance = kwargs.pop('distance') azimuth = kwargs.pop('azimuth') reverse_azimuth = kwargs.pop('reverse_azimuth') reverse_distance = kwargs.get('reverse_distance', None) if reverse_distance is None: reverse_distance = 100 # Randomize the reverse angle slightly (+/- 5 degrees) random_azimuth = reverse_azimuth + AsaRandom.random() * 5 count = 0 nudge_distance = 0.01 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) # Find point just offshore to do testing with. Try 15 times (~350m). This makes sure the start_point is in the water # for the next call to intersect (next while loop). while self.intersect(single_point=nudge_loc.point) and count < 16: nudge_distance *= 2 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) count += 1 # We tried 16 times and couldn't find a point. This should totally never happen. if count == 16: logger.debug("WOW. Could not find location in water to do shoreline calculation with. Assuming particle did not move from original location") return start_point # Keep trying to throw particle back, halfing the distance each time until it is in water. # Only half it 12 times before giving up and returning the point which the particle came from. count = 0 # Distance amount to half each iteration changing_distance = reverse_distance new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) while self.intersect(start_point=nudge_loc.point, end_point=new_loc.point) and count < 12: changing_distance /= 2 new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) count += 1 # We tried 10 times and the particle was still on shore, return the point the particle started from. # No randomization. if count == 12: logger.debug("Could not react particle with shoreline. Assuming particle did not move from original location") return start_point return new_loc
def move(self, particle, u, v, w, modelTimestep, **kwargs): """ Returns the lat, lon, H, and velocity of a projected point given a starting lat and lon (dec deg), a depth (m) below sea surface (positive up), u, v, and w velocity components (m/s), a horizontal and vertical displacement coefficient (m^2/s) H (m), and a model timestep (s). GreatCircle calculations are done based on the Vincenty Direct method. Returns a dict like: { 'latitude': x, 'azimuth': x, 'reverse_azimuth': x, 'longitude': x, 'depth': x, 'u': x 'v': x, 'w': x, 'distance': x, 'angle': x, 'vertical_distance': x, 'vertical_angle': x } """ logger.debug("U: %s, V: %s, W: %s" % (str(u), str(v), str(w))) # IMPORTANT: # If we got no data from the model, we are using the last available value stored in the particles! if (u is None) or (u is not None and math.isnan(u)): u = particle.last_u() if (v is None) or (v is not None and math.isnan(v)): v = particle.last_v() if (w is None) or (w is not None and math.isnan(w)): w = particle.last_w() particle.u_vector = u particle.v_vector = v particle.w_vector = w if particle.halted: u, v, w = 0, 0, 0 else: u += AsaRandom.random() * ((2 * self._horizDisp / modelTimestep)** 0.5) # u transformation calcualtions v += AsaRandom.random() * ((2 * self._horizDisp / modelTimestep)** 0.5) # v transformation calcualtions w += AsaRandom.random() * ((2 * self._vertDisp / modelTimestep)** 0.5) # w transformation calculations result = AsaTransport.distance_from_location_using_u_v_w( u=u, v=v, w=w, timestep=modelTimestep, location=particle.location) result['u'] = u result['v'] = v result['w'] = w return result
def cleanup(self): super(CachingModelController, self).cleanup() # Remove Manager so it shuts down del self.mgr # Remove the cache file if self.remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed")
def get_remote_data(self, localvars, remotevars, inds, shape): """ Method that does the updating of local netcdf cache with remote data """ # If user specifies 'all' then entire xy domain is # grabbed, default is 4, specified in the model controller if self.horiz_size == 'all': y, y_1 = 0, shape[-2] x, x_1 = 0, shape[-1] else: r = self.horiz_size x, x_1 = self.point_get.value[2] - r, self.point_get.value[ 2] + r + 1 y, y_1 = self.point_get.value[1] - r, self.point_get.value[ 1] + r + 1 x, x_1 = x[0], x_1[0] y, y_1 = y[0], y_1[0] if y < 0: y = 0 if x < 0: x = 0 if y_1 > shape[-2]: y_1 = shape[-2] if x_1 > shape[-1]: x_1 = shape[-1] # Update domain variable for where we will add data domain = self.local.variables['domain'] if len(shape) == 4: domain[inds[0]:inds[-1] + 1, 0:shape[1], y:y_1, x:x_1] = np.ones( (inds[-1] + 1 - inds[0], shape[1], y_1 - y, x_1 - x)) elif len(shape) == 3: domain[inds[0]:inds[-1] + 1, y:y_1, x:x_1] = np.ones( (inds[-1] + 1 - inds[0], y_1 - y, x_1 - x)) # Update the local variables with remote data if logger.isEnabledFor(logging.DEBUG): logger.debug( "Filling cache with: Time - %s:%s, Lat - %s:%s, Lon - %s:%s" % (str(inds[0]), str(inds[-1] + 1), str(y), str(y_1), str(x), str(x_1))) for local, remote in zip(localvars, remotevars): if len(shape) == 4: local[inds[0]:inds[-1] + 1, 0:shape[1], y:y_1, x:x_1] = remote[inds[0]:inds[-1] + 1, 0:shape[1], y:y_1, x:x_1] else: local[inds[0]:inds[-1] + 1, y:y_1, x:x_1] = remote[inds[0]:inds[-1] + 1, y:y_1, x:x_1]
def move(self, particle, u, v, w, modelTimestep, **kwargs): """ Returns the lat, lon, H, and velocity of a projected point given a starting lat and lon (dec deg), a depth (m) below sea surface (positive up), u, v, and w velocity components (m/s), a horizontal and vertical displacement coefficient (m^2/s) H (m), and a model timestep (s). GreatCircle calculations are done based on the Vincenty Direct method. Returns a dict like: { 'latitude': x, 'azimuth': x, 'reverse_azimuth': x, 'longitude': x, 'depth': x, 'u': x 'v': x, 'w': x, 'distance': x, 'angle': x, 'vertical_distance': x, 'vertical_angle': x } """ logger.debug("U: %s, V: %s, W: %s" % (str(u),str(v),str(w))) # IMPORTANT: # If we got no data from the model, we are using the last available value stored in the particles! if (u is None) or (u is not None and math.isnan(u)): u = particle.last_u() if (v is None) or (v is not None and math.isnan(v)): v = particle.last_v() if (w is None) or (w is not None and math.isnan(w)): w = particle.last_w() particle.u_vector = u particle.v_vector = v particle.w_vector = w if particle.halted: u,v,w = 0,0,0 else: u += AsaRandom.random() * ((2 * self._horizDisp / modelTimestep) ** 0.5) # u transformation calcualtions v += AsaRandom.random() * ((2 * self._horizDisp / modelTimestep) ** 0.5) # v transformation calcualtions w += AsaRandom.random() * ((2 * self._vertDisp / modelTimestep) ** 0.5) # w transformation calculations result = AsaTransport.distance_from_location_using_u_v_w(u=u, v=v, w=w, timestep=modelTimestep, location=particle.location) result['u'] = u result['v'] = v result['w'] = w return result
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0, 0, 0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug( "Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn( "Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u, v, w)
def boundary_interaction(self, **kwargs): """ Returns a list of Location4D objects """ particle = kwargs.pop('particle') starting = kwargs.pop('starting') ending = kwargs.pop('ending') # shoreline if self.useshore: intersection_point = self._shoreline.intersect(start_point=starting.point, end_point=ending.point) if intersection_point: # Set the intersection point. hitpoint = Location4D(point=intersection_point['point'], time=starting.time + (ending.time - starting.time)) particle.location = hitpoint # This relies on the shoreline to put the particle in water and not on shore. resulting_point = self._shoreline.react(start_point=starting, end_point=ending, hit_point=hitpoint, reverse_distance=self.reverse_distance, feature=intersection_point['feature'], distance=kwargs.get('distance'), angle=kwargs.get('angle'), azimuth=kwargs.get('azimuth'), reverse_azimuth=kwargs.get('reverse_azimuth')) ending.latitude = resulting_point.latitude ending.longitude = resulting_point.longitude ending.depth = resulting_point.depth logger.debug("%s - hit the shoreline at %s. Setting location to %s." % (particle.logstring(), hitpoint.logstring(), ending.logstring())) # bathymetry if self.usebathy: if not particle.settled: bintersect = self._bathymetry.intersect(start_point=starting, end_point=ending) if bintersect: pt = self._bathymetry.react(type='reverse', start_point=starting, end_point=ending) logger.debug("%s - hit the bottom at %s. Setting location to %s." % (particle.logstring(), ending.logstring(), pt.logstring())) ending.latitude = pt.latitude ending.longitude = pt.longitude ending.depth = pt.depth # sea-surface if self.usesurface: if ending.depth > 0: #logger.debug("%s - rose out of the water. Setting depth to 0." % particle.logstring()) ending.depth = 0 particle.location = ending return
def fill_cache_with_nearest_data(self, i): """ Method to streamline request for data from cache, Uses nearest time to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(2) pass if self.need_data(i): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: if self.need_data(i): with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.particle.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time self.point_get.value = [indices[0]+1, indices[-2], indices[-1]] # Request that the data controller update the cache # DATA CONTOLLER STARTS self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to update cache...") timer.sleep(2) pass except Exception: raise finally: self.has_data_request_lock.value = -1 self.data_request_lock.release()
def attempt(self, particle, depth): # We may want to have settlement affect the u/v/w in the future u = 0 v = 0 w = 0 # If the particle is settled, don't move it anywhere if particle.settled: return (0,0,0) # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface. # We are assuming here that the bathymetry is also negative down. if self.type.lower() == "benthic": # Is the sea floor within the upper and lower bounds? if self.upper >= depth >= self.lower: # Move the particle to the sea floor. # TODO: Should the particle just swim downwards? newloc = Location4D(location=particle.location) newloc.depth = depth particle.location = newloc particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) elif self.type.lower() == "pelagic": # Are we are in enough water to settle # Ignore this bathymetry test since we would need a high resolution # dataset for this to work. #if self.upper >= depth: # Is the particle within the range? if self.upper >= particle.location.depth >= self.lower: # Just settle the particle particle.settle() logger.info("Particle %d settled in %s mode" % (particle.uid, self.type)) else: logger.debug("Particle did NOT settle. Depth conditions not met. Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth)) #else: # logger.info("Particle did NOT settle. Water not deep enough. Upper limit: %d - Bathymetry: %d" % (self.upper, depth)) else: logger.warn("Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid)) return (u,v,w)
def get_remote_data(self, localvars, remotevars, inds, shape): """ Method that does the updating of local netcdf cache with remote data """ # If user specifies 'all' then entire xy domain is # grabbed, default is 4, specified in the model controller if self.horiz_size == 'all': y, y_1 = 0, shape[-2] x, x_1 = 0, shape[-1] else: r = self.horiz_size x, x_1 = self.point_get.value[2]-r, self.point_get.value[2]+r+1 y, y_1 = self.point_get.value[1]-r, self.point_get.value[1]+r+1 x, x_1 = x[0], x_1[0] y, y_1 = y[0], y_1[0] if y < 0: y = 0 if x < 0: x = 0 if y_1 > shape[-2]: y_1 = shape[-2] if x_1 > shape[-1]: x_1 = shape[-1] # Update domain variable for where we will add data domain = self.local.variables['domain'] if len(shape) == 4: domain[inds[0]:inds[-1]+1, 0:shape[1], y:y_1, x:x_1] = np.ones((inds[-1]+1-inds[0], shape[1], y_1-y, x_1-x)) elif len(shape) == 3: domain[inds[0]:inds[-1]+1, y:y_1, x:x_1] = np.ones((inds[-1]+1-inds[0], y_1-y, x_1-x)) # Update the local variables with remote data if logger.isEnabledFor(logging.DEBUG): logger.debug("Filling cache with: Time - %s:%s, Lat - %s:%s, Lon - %s:%s" % (str(inds[0]), str(inds[-1]+1), str(y), str(y_1), str(x), str(x_1))) for local, remote in zip(localvars, remotevars): if len(shape) == 4: local[inds[0]:inds[-1]+1, 0:shape[1], y:y_1, x:x_1] = remote[inds[0]:inds[-1]+1, 0:shape[1], y:y_1, x:x_1] else: local[inds[0]:inds[-1]+1, y:y_1, x:x_1] = remote[inds[0]:inds[-1]+1, y:y_1, x:x_1]
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0, (self._step * self._nstep) + 1, self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps( times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str( datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance( self.geometry, MultiPolygon): point_locations = [ Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points( goal=self._npart, polygon=self.geometry) ] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError( "Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int', -1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int', -1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle( part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d" % i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive( ) and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn( "DataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90., 1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn( 'The output_formats parameter should be a list, not saving any output!' ) else: logger.warn( 'No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline( path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect( start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception( "Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception( "Time indexes are messed up. Need to have equal datetime and time indexes" ) raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data( i, newtimes[loop_i]) else: logger.warn( "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported." ) tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth( self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i + 1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction( particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish( self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info( textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def move(self, particle, u, v, w, modelTimestep, **kwargs): temp = kwargs.get('temperature', None) salt = kwargs.get('salinity', None) logger.debug("Temp: %.4f, Salt: %.4f" % (temp, salt)) # IMPORTANT: # If we got no data from the model, we are using the last available value stored in the particles! if (temp is None) or (temp is not None and math.isnan(temp)): temp = particle.last_temp() if (salt is None) or (salt is not None and math.isnan(salt)): salt = particle.last_salt() particle.temp = temp particle.salt = salt # Grow the particle. Growth affects which lifestage the particle is in. growth = 0. do_duration_growth = True modelTimestepDays = modelTimestep / 60. / 60. / 24. if self.linear_a is not None and self.linear_b is not None: if particle.temp is not None and not math.isnan(particle.temp): # linear growth, compute q = t / (Ax+B) # Where timestep t (days), at temperature x (deg C), proportion of stage completed (q) growth = modelTimestepDays / (self.linear_a * particle.temp + self.linear_b) particle.grow(growth) do_duration_growth = False else: logger.debug( "No temperature found for Particle %s at this location and timestep, skipping linear temperature growth and using duration growth" % particle.uid) pass if do_duration_growth is True: growth = modelTimestepDays / self.duration particle.grow(growth) active_diel = self.get_active_diel(particle.location) # Run the active diel behavior and all of the taxis behaviors # u, v, and w store the continuous results from all of the behavior models. u = 0 v = 0 w = 0 behaviors_to_run = [ _f for _f in [self.settlement] + [active_diel] + self.taxis if _f ] # Sort these in the order you want them to be run. try: vss = self.capability.calculated_vss except AttributeError: logger.debug( "No VSS found, vertical behaviors will not act upon particle") vss = 0 for behave in behaviors_to_run: behave_results = behave.move(particle, 0, 0, vss, modelTimestep, **kwargs) u += behave_results['u'] v += behave_results['v'] w += behave_results['w'] # Do the calculation to determine the new location after running the behaviors result = AsaTransport.distance_from_location_using_u_v_w( u=u, v=v, w=w, timestep=modelTimestep, location=particle.location) result['u'] = u result['v'] = v result['w'] = w return result
def setup_run(self, **kwargs): logger.setLevel(logging.PROGRESS) self.redis_url = None self.redis_log_channel = None self.redis_results_channel = None if "redis" in kwargs.get("output_formats", []): from paegan.logger.redis_handler import RedisHandler self.redis_url = kwargs.get("redis_url") self.redis_log_channel = kwargs.get("redis_log_channel") self.redis_results_channel = kwargs.get("redis_results_channel") rhandler = RedisHandler(self.redis_log_channel, self.redis_url) rhandler.setLevel(logging.PROGRESS) logger.addHandler(rhandler) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(unicode(self)) # Add the model descriptions to logfile for m in self._models: logger.info(unicode(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = range(0, (self._step*self._nstep)+1, self._step) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) if kwargs.get("manager", True): # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers self.mgr = multiprocessing.Manager() # This tracks if the system is 'alive'. Most looping whiles will check this # and break out if it is False. This is True until something goes very wrong. self.active = self.mgr.Value('bool', True) # Each particle is a task, plus the CachingDataController self.number_of_tasks = self.get_number_of_tasks() # Either spin up the number of cores, or the number of tasks self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks) # Number of tasks that we need to run. This is decremented everytime something exits. self.n_run = self.mgr.Value('int', self.number_of_tasks) # The lock that controls access to the 'n_run' variable self.nproc_lock = self.mgr.Lock() # Create the task queue for all of the particles and the CachingDataController self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks) # Create the result queue for all of the particles and the CachingDataController self.results = self.mgr.Queue(self.number_of_tasks) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(self.hydrodataset) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) except AssertionError: logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables)) raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end)) ds.closenc()
def listen_for_results(self): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < self.number_of_tasks: try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive() and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn("CachingDataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: self.error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.particles = return_particles finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate()
def move(self, particle, u, v, w, modelTimestep, **kwargs): # If the particle is settled, don't move it anywhere if particle.settled: return {'u': 0, 'v': 0, 'w': 0} # If the particle is halted (but not settled), don't move it anywhere if particle.halted: return {'u': 0, 'v': 0, 'w': 0} # How far could I move? We don't want to overshoot our desired depth. vertical_potential = w * modelTimestep """ This only works if min is less than max. No checks are done here, so it should be done before calling this function. """ """ I'm below my desired max depth, so i need to go down ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -------------------------------------- min -------------------------------------- max x me ______________________________________ """ if particle.location.depth < self.max_depth: logger.debug("DIEL: %s - Moving UP to desired depth from %f" % (self.logstring(), particle.location.depth)) # If we are going to overshoot the desired minimum depth, # calculate a new w to land in the middle of the range. overshoot_distance = abs(particle.location.depth - self.min_depth) if overshoot_distance < abs(vertical_potential): halfway_distance = abs((self.max_depth - self.min_depth) / 2) w = ((overshoot_distance - halfway_distance) / modelTimestep) return {'u': u, 'v': v, 'w': w} """ I'm above my desired min depth, so i need to go down ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ x me -------------------------------------- min -------------------------------------- max ______________________________________ """ if particle.location.depth > self.min_depth: logger.debug("DIEL: %s - Moving DOWN to desired depth from %f" % (self.logstring(), particle.location.depth)) # If we are going to overshoot the desired maximum depth, # calculate a new w to land in the middle of the range. overshoot_distance = abs(particle.location.depth - self.max_depth) if overshoot_distance < abs(vertical_potential): halfway_distance = abs((self.max_depth - self.min_depth) / 2) w = ((overshoot_distance - halfway_distance) / modelTimestep) return {'u': u, 'v': v, 'w': -w} """ I'm in my desired depth, so I'm just gonna chill here ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -------------------------------------- min x me -------------------------------------- max ______________________________________ """ return {'u': u, 'v': v, 'w': 0}
def listen_for_results(self, output_h5_file, total_particles): try: # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. self.error_code = 0 logger.info("Waiting for %i particle results" % total_particles) while retrieved < self.total_task_count( ): # One for the CachingDataController logger.info("looping in listen_for_results") try: # Returns a tuple of code, result code, tempres = self.results.get(timeout=240) except queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not self.data_controller_process.is_alive( ) and self.data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. self.get_data.value is False self.results.put((-2, "CachingDataController")) new_procs = [] old_procs = [] for p in self.procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue self.results.put((-3, "ZombieParticle")) # Decrement nproc (CachingDataController exits when this is 0) with self.nproc_lock: self.n_run.value = self.n_run.value - 1 # Remove task from queue (so they can be joined later on) self.tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in self.has_read_lock: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(p.pid) if self.has_data_request_lock.value == p.pid: self.has_data_request_lock.value = -1 try: self.data_request_lock.release() except: pass if self.has_write_lock.value == p.pid: self.has_write_lock.value = -1 try: self.write_lock.release() except: pass for p in old_procs: try: self.procs.remove(p) except ValueError: logger.warn( "Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: self.procs.append(p) logger.warn( "Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code is None: logger.warn( "Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: self.error_code = code logger.warn( "CachingDataController has FAILED!! Removing cache file so the particles fail." ) try: os.remove(self.cache_path) except OSError: logger.debug( "Could not remove cache file, it probably never existed" ) pass elif code == -3: self.error_code = code logger.info( "A zombie process was caught and task was removed from queue" ) elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "Particle %d finished" % tempres.uid)) elif tempres == "CachingDataController": logger.info("CachingDataController finished") logger.progress( (round((retrieved / self.total_task_count()) * 90., 1), "CachingDataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved), self.total_task_count())) # Relax time.sleep(1) if len(return_particles) != total_particles: logger.warn( "Some particles failed and are not included in the output") # The results queue should be empty at this point assert self.results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") self.tasks.join() self.tasks.close() self.tasks.join_thread() finally: # Join all processes logger.info("Joining the processes") for w in self.procs + [self.data_controller_process]: # Wait 20 seconds w.join(20.) if w.is_alive(): # Process is hanging, kill it. logger.info( "Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() if self.error_code == -2: raise ValueError( "Error in the BaseDataController (error_code was -2)") results = ex.ResultsPyTable(output_h5_file) for p in return_particles: for x in range(len(p.locations)): results.write(p.timestep_index_dump(x)) results.compute() results.close() return
def data_nearest(self, i, currenttime): """ Method to streamline request for data from cache, Uses nearest time to get u,v,w,temp,salt """ if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(4) pass if self.need_data(i): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: if self.need_data(i): with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.part.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time self.point_get.value = [indices[0]+1, indices[-2], indices[-1]] # Request that the data controller update the cache # DATA CONTOLLER STARTS self.get_data.value = True # Wait until the data controller is done if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to update cache...") timer.sleep(4) pass except StandardError: raise finally: self.has_data_request_lock.value = -1 self.data_request_lock.release() # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) try: # Open netcdf file on disk from commondataset self.dataset.opennc() # Grab data at time index closest to particle location u = np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location ))) v = np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location ))) # if there is vertical velocity inthe dataset, get it if 'w' in self.dataset.nc.variables: w = np.mean(np.mean(self.dataset.get_values('w', timeindsf=[np.asarray([i])], point=self.part.location ))) else: w = 0.0 # If there is salt and temp in the dataset, get it if self.temp_name != None and self.salt_name != None: temp = np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location ))) salt = np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location ))) # Check for nans that occur in the ocean (happens because # of model and coastline resolution mismatches) if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any(): # Take the mean of the closest 4 points # If this includes nan which it will, result is nan uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location, num=2) varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location, num=2) if 'w' in self.dataset.nc.variables: warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location, num=2) w = warray1.mean() else: w = 0.0 if self.temp_name != None and self.salt_name != None: temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location, num=2) saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location, num=2) temp = temparray1.mean() salt = saltarray1.mean() u = uarray1.mean() v = varray1.mean() if self.temp_name is None: temp = np.nan if self.salt_name is None: salt = np.nan #logger.info(self.dataset.get_xyind_from_point('u', self.part.location, num=1)) except StandardError: logger.error("Error in data_nearest on ForceParticle") raise finally: self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) return u, v, w, temp, salt
def __call__(self, proc, active): self.active = active if self.usebathy == True: self._bathymetry = Bathymetry(file=self.bathy) self._shoreline = None if self.useshore == True: self._shoreline = Shoreline(file=self.shoreline_path, point=self.release_location_centroid, spatialbuffer=0.25) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) self.proc = proc part = self.part if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to start...") timer.sleep(10) pass # Initialize commondataset of local cache, then # close the related netcdf file try: with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset = CommonDataset.open(self.localpath) self.dataset.closenc() except StandardError: logger.warn("No cache file: %s. Particle exiting" % self.localpath) raise finally: with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) # Load Timevar from pickle serialization f = open(self.timevar_pickle_path,"rb") timevar = pickle.load(f) f.close() if self.time_method == 'interp': time_indexs = timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.error("Time indexes are messed up. Need to have equal datetime and time indexes") raise # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active.value == False: raise ValueError("Particle exiting due to Failure.") newloc = None # if need a time that is outside of what we have #if self.active.value == True: # while self.get_data.value == True: # logger.info("Waiting for DataController to get out...") # timer.sleep(4) # pass # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i]) elif self.time_method == 'interp': u, v, w, temp, salt = self.data_interp(i, timevar, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") #logger.info("U: %.4f, V: %.4f, W: %.4f" % (u,v,w)) #logger.info("Temp: %.4f, Salt: %.4f" % (temp,salt)) # Get the bathy value at the particles location if self.usebathy == True: bathymetry_value = self._bathymetry.get_depth(part.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. part.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: movement = model.move(part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: self.boundary_interaction(particle=part, starting=part.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) logger.debug("%s - was forced by %s and is now at %s" % (part.logstring(), model.__class__.__name__, part.location.logstring())) part.note = part.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps part.save() # We won't pull data for the last entry in locations, but we need to populate it with fill data. part.fill_environment_gap() if self.usebathy == True: self._bathymetry.close() if self.useshore == True: self._shoreline.close() return part
def fill_cache_with_linterp_data(self, i, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to release cache file so I can read from it..." ) timer.sleep(2) pass if self.need_data(i + 1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i + 1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices( 'u', timeinds=[np.asarray([i - 1])], point=self.particle.location) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [ indices[0] + 1, indices[-2], indices[-1] ] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to update cache with the CURRENT time index" ) timer.sleep(2) pass # Do we still need to get the next timestep? if self.need_data(i + 1): # get the next time index data self.point_get.value = [ indices[0] + 2, indices[-2], indices[-1] ] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug( "Waiting for DataController to update cache with the NEXT time index" ) timer.sleep(2) pass except Exception: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release()
def data_interp(self, i, timevar, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(4) pass if self.need_data(i+1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i+1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.part.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to update cache with the CURRENT time index") timer.sleep(4) pass # get the next time index data self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value == True: while self.get_data.value == True: logger.debug("Waiting for DataController to update cache with the NEXT time index") timer.sleep(4) pass except StandardError: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release() # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) try: # Open netcdf file on disk from commondataset self.dataset.opennc() # Grab data at time index closest to particle location u = [np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location )))] v = [np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location )))] # if there is vertical velocity inthe dataset, get it if 'w' in self.dataset.nc.variables: w = [np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location )))] else: w = [0.0, 0.0] # If there is salt and temp in the dataset, get it if self.temp_name != None and self.salt_name != None: temp = [np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location )))] salt = [np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location ))), np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location )))] # Check for nans that occur in the ocean (happens because # of model and coastline resolution mismatches) if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any(): # Take the mean of the closest 4 points # If this includes nan which it will, result is nan uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location, num=2) varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location, num=2) uarray2 = self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) varray2 = self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) if 'w' in self.dataset.nc.variables: warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location, num=2) warray2 = self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) w = [warray1.mean(), warray2.mean()] else: w = [0.0, 0.0] if self.temp_name != None and self.salt_name != None: temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location, num=2) saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location, num=2) temparray2 = self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) saltarray2 = self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location, num=2) temp = [temparray1.mean(), temparray2.mean()] salt = [saltarray1.mean(), saltarray2.mean()] u = [uarray1.mean(), uarray2.mean()] v = [varray1.mean(), varray2.mean()] # Linear interp of data between timesteps currenttime = date2num(currenttime) timevar = timevar.datenum u = self.linterp(timevar[i:i+2], u, currenttime) v = self.linterp(timevar[i:i+2], v, currenttime) w = self.linterp(timevar[i:i+2], w, currenttime) if self.temp_name != None and self.salt_name != None: temp = self.linterp(timevar[i:i+2], temp, currenttime) salt = self.linterp(timevar[i:i+2], salt, currenttime) if self.temp_name is None: temp = np.nan if self.salt_name is None: salt = np.nan #logger.info(self.dataset.get_xyind_from_point('u', self.part.location, num=1)) except StandardError: logger.error("Error in data_interp method on ForceParticle") raise finally: self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) return u, v, w, temp, salt
def boundary_interaction(self, **kwargs): """ Returns a list of Location4D objects """ particle = kwargs.pop('particle') starting = kwargs.pop('starting') ending = kwargs.pop('ending') # shoreline if self.useshore: intersection_point = self._shoreline.intersect( start_point=starting.point, end_point=ending.point) if intersection_point is not None: # Set the intersection point. hitpoint = Location4D(point=intersection_point['point'], time=starting.time + (ending.time - starting.time)) particle.location = hitpoint # This relies on the shoreline to put the particle in water and not on shore. resulting_point = self._shoreline.react( start_point=starting, end_point=ending, hit_point=hitpoint, reverse_distance=self.reverse_distance, feature=intersection_point['feature'], distance=kwargs.get('distance'), angle=kwargs.get('angle'), azimuth=kwargs.get('azimuth'), reverse_azimuth=kwargs.get('reverse_azimuth')) ending.latitude = resulting_point.latitude ending.longitude = resulting_point.longitude ending.depth = resulting_point.depth if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - hit the shoreline at %s. Setting location to %s." % (particle.logstring(), hitpoint.logstring(), ending.logstring())) # bathymetry if self.usebathy: if not particle.settled: bintersect = self._bathymetry.intersect(start_point=starting, end_point=ending) if bintersect: pt = self._bathymetry.react(type='reverse', start_point=starting, end_point=ending) if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - hit the bottom at %s. Setting location to %s." % (particle.logstring(), ending.logstring(), pt.logstring())) ending.latitude = pt.latitude ending.longitude = pt.longitude ending.depth = pt.depth # sea-surface if self.usesurface: if ending.depth > 0: if logger.isEnabledFor(logging.DEBUG): logger.debug( "%s - rose out of the water. Setting depth to 0." % particle.logstring()) ending.depth = 0 particle.location = ending
def move(self, particle, u, v, w, modelTimestep, **kwargs): # If the particle is settled, don't move it anywhere if particle.settled: return { 'u': 0, 'v': 0, 'w': 0 } # If the particle is halted (but not settled), don't move it anywhere if particle.halted: return { 'u': 0, 'v': 0, 'w': 0 } # How far could I move? We don't want to overshoot our desired depth. vertical_potential = w * modelTimestep """ This only works if min is less than max. No checks are done here, so it should be done before calling this function. """ """ I'm below my desired max depth, so i need to go down ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -------------------------------------- min -------------------------------------- max x me ______________________________________ """ if particle.location.depth < self.max_depth: logger.debug("DIEL: %s - Moving UP to desired depth from %f" % (self.logstring(), particle.location.depth)) # If we are going to overshoot the desired minimum depth, # calculate a new w to land in the middle of the range. overshoot_distance = abs(particle.location.depth - self.min_depth) if overshoot_distance < abs(vertical_potential): halfway_distance = abs((self.max_depth - self.min_depth) / 2) w = ((overshoot_distance - halfway_distance) / modelTimestep) return { 'u': u, 'v': v, 'w': w } """ I'm above my desired min depth, so i need to go down ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ x me -------------------------------------- min -------------------------------------- max ______________________________________ """ if particle.location.depth > self.min_depth: logger.debug("DIEL: %s - Moving DOWN to desired depth from %f" % (self.logstring(), particle.location.depth)) # If we are going to overshoot the desired maximum depth, # calculate a new w to land in the middle of the range. overshoot_distance = abs(particle.location.depth - self.max_depth) if overshoot_distance < abs(vertical_potential): halfway_distance = abs((self.max_depth - self.min_depth) / 2) w = ((overshoot_distance - halfway_distance) / modelTimestep) return { 'u': u, 'v': v, 'w': -w } """ I'm in my desired depth, so I'm just gonna chill here ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -------------------------------------- min x me -------------------------------------- max ______________________________________ """ return { 'u': u, 'v': v, 'w': 0 }
def setup_run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset logger.setLevel(logging.PROGRESS) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(str(self)) # Add the model descriptions to logfile for m in self._models: logger.info(str(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = list(range(0, (self._step*self._nstep)+1, self._step)) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in range(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset)) try: ds = CommonDataset.open(self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] except AssertionError: logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables)) raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset) finally: ds.closenc() try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
def run(self): self.load_initial_dataset() redis_connection = None if self.redis_url is not None and self.redis_results_channel is not None: import redis redis_connection = redis.from_url(self.redis_url) # Setup shoreline self._shoreline = None if self.useshore is True: self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer) # Make sure we are not starting on land. Raises exception if we are. self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid) # Setup Bathymetry if self.usebathy is True: try: self._bathymetry = Bathymetry(file=self.bathy_path) except Exception: logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path) self.usebathy = False # Calculate datetime at every timestep modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) if self.time_method == 'interp': time_indexs = self.timevar.nearest_index(newtimes, select='before') elif self.time_method == 'nearest': time_indexs = self.timevar.nearest_index(newtimes) else: logger.warn("Method for computing u,v,w,temp,salt not supported!") try: assert len(newtimes) == len(time_indexs) except AssertionError: logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes") raise # Keep track of how much time we spend in each area. tot_boundary_time = 0. tot_model_time = {} tot_read_data = 0. for m in self.models: tot_model_time[m.name] = 0. # Set the base conditions # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) # loop over timesteps # We don't loop over the last time_index because # we need to query in the time_index and set the particle's # location as the 'newtime' object. for loop_i, i in enumerate(time_indexs[0:-1]): if self.active and self.active.value is False: raise ValueError("Particle exiting due to Failure.") newloc = None st = time.clock() # Get the variable data required by the models if self.time_method == 'nearest': u, v, w, temp, salt = self.get_nearest_data(i) elif self.time_method == 'interp': u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i]) else: logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.") tot_read_data += (time.clock() - st) # Get the bathy value at the particles location if self.usebathy is True: bathymetry_value = self._bathymetry.get_depth(self.particle.location) else: bathymetry_value = -999999999999999 # Age the particle by the modelTimestep (seconds) # 'Age' meaning the amount of time it has been forced. self.particle.age(seconds=modelTimestep[loop_i]) # loop over models - sort these in the order you want them to run for model in self.models: st = time.clock() movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value) newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1]) tot_model_time[m.name] += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat())) if newloc: st = time.clock() self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc, distance=movement['distance'], angle=movement['angle'], azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle']) tot_boundary_time += (time.clock() - st) if logger.isEnabledFor(logging.DEBUG): logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring())) self.particle.note = self.particle.outputstring() # Each timestep, save the particles status and environmental variables. # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps self.particle.save() # If using Redis, send the results if redis_connection is not None: redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump())) self.dataset.closenc() # We won't pull data for the last entry in locations, but we need to populate it with fill data. self.particle.fill_gap() if self.usebathy is True: self._bathymetry.close() if self.useshore is True: self._shoreline.close() logger.info(textwrap.dedent('''Particle %i Stats: Data read: %f seconds Model forcing: %s seconds Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time))) return self.particle
def need_data(self, i): """ Method to test if cache contains the data that the particle needs """ if logger.isEnabledFor(logging.DEBUG): logger.debug("Checking cache for data availability at %s." % self.particle.location.logstring()) try: # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset.opennc() # Test if the cache has the data we need # If the point we request contains fill values, # we need data cached_lookup = self.dataset.get_values( 'domain', timeinds=[np.asarray([i])], point=self.particle.location) if logger.isEnabledFor(logging.DEBUG): logger.debug("Type of result: %s" % type(cached_lookup)) logger.debug("Double mean of result: %s" % np.mean(np.mean(cached_lookup))) logger.debug("Type of Double mean of result: %s" % type(np.mean(np.mean(cached_lookup)))) if type(np.mean( np.mean(cached_lookup))) == np.ma.core.MaskedConstant: need = True if logger.isEnabledFor(logging.DEBUG): logger.debug("I NEED data. Got back: %s" % cached_lookup) else: need = False logger.debug("I DO NOT NEED data") except Exception: # If the time index doesnt even exist, we need need = True logger.debug("I NEED data (no time index exists in cache)") finally: self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) return need # Returns True if we need data or False if we dont
def need_data(self, i): """ Method to test if cache contains the data that the particle needs """ if logger.isEnabledFor(logging.DEBUG): logger.debug("Checking cache for data availability at %s." % self.particle.location.logstring()) try: # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) self.dataset.opennc() # Test if the cache has the data we need # If the point we request contains fill values, # we need data cached_lookup = self.dataset.get_values('domain', timeinds=[np.asarray([i])], point=self.particle.location) if logger.isEnabledFor(logging.DEBUG): logger.debug("Type of result: %s" % type(cached_lookup)) logger.debug("Double mean of result: %s" % np.mean(np.mean(cached_lookup))) logger.debug("Type of Double mean of result: %s" % type(np.mean(np.mean(cached_lookup)))) if type(np.mean(np.mean(cached_lookup))) == np.ma.core.MaskedConstant: need = True if logger.isEnabledFor(logging.DEBUG): logger.debug("I NEED data. Got back: %s" % cached_lookup) else: need = False logger.debug("I DO NOT NEED data") except Exception: # If the time index doesnt even exist, we need need = True logger.debug("I NEED data (no time index exists in cache)") finally: self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) return need # Returns True if we need data or False if we dont
def setup_run(self, hydrodataset, **kwargs): self.hydrodataset = hydrodataset logger.setLevel(logging.PROGRESS) # Relax. time.sleep(0.5) # Add ModelController description to logfile logger.info(str(self)) # Add the model descriptions to logfile for m in self._models: logger.info(str(m)) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. self.times = list(range(0, (self._step * self._nstep) + 1, self._step)) # Calculate a datetime object for each model timestep # This method is duplicated in CachingDataController and CachingForcer # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance( self.geometry, MultiPolygon): point_locations = [ Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points( goal=self._npart, polygon=self.geometry) ] # Initialize the particles logger.progress((2, "Initializing particles")) for x in range(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset)) try: ds = CommonDataset.open(self.hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") self.common_variables = self.get_common_variables_from_dataset(ds) except Exception: logger.exception("Failed to access dataset %s" % self.hydrodataset) raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset) self.timevar = None try: assert self.common_variables.get("u") in ds._current_variables assert self.common_variables.get("v") in ds._current_variables assert self.common_variables.get("x") in ds._current_variables assert self.common_variables.get("y") in ds._current_variables self.timevar = ds.gettimevar(self.common_variables.get("u")) model_start = self.timevar.get_dates()[0] model_end = self.timevar.get_dates()[-1] except AssertionError: logger.exception( "Could not locate variables needed to run model: %s" % str(self.common_variables)) raise BaseDataControllerError( "A required data variable was not found in %s" % self.hydrodataset) finally: ds.closenc() try: assert self.start > model_start assert self.start < model_end except AssertionError: raise BaseDataControllerError( "Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end)) try: assert self.datetimes[-1] > model_start assert self.datetimes[-1] < model_end except AssertionError: raise BaseDataControllerError( "End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
def fill_cache_with_linterp_data(self, i, currenttime): """ Method to streamline request for data from cache, Uses linear interpolation bewtween timesteps to get u,v,w,temp,salt """ if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to release cache file so I can read from it...") timer.sleep(2) pass if self.need_data(i+1): # Acquire lock for asking for data self.data_request_lock.acquire() self.has_data_request_lock.value = os.getpid() try: # Do I still need data? if self.need_data(i+1): # Tell the DataController that we are going to be reading from the file with self.read_lock: self.read_count.value += 1 self.has_read_lock.append(os.getpid()) # Open netcdf file on disk from commondataset self.dataset.opennc() # Get the indices for the current particle location indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.particle.location ) self.dataset.closenc() with self.read_lock: self.read_count.value -= 1 self.has_read_lock.remove(os.getpid()) # Override the time # get the current time index data self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to update cache with the CURRENT time index") timer.sleep(2) pass # Do we still need to get the next timestep? if self.need_data(i+1): # get the next time index data self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]] # Request that the data controller update the cache self.get_data.value = True # Wait until the data controller is done if self.active.value is True: while self.get_data.value is True: logger.debug("Waiting for DataController to update cache with the NEXT time index") timer.sleep(2) pass except Exception: logger.warn("Particle failed to request data correctly") raise finally: # Release lock for asking for data self.has_data_request_lock.value = -1 self.data_request_lock.release()
def __reverse(self, **kwargs): """ Reverse particle just off of the shore in the direction that it came in. Adds a slight random factor to the distance and angle it is reversed in. """ start_point = kwargs.pop('start_point') hit_point = kwargs.pop('hit_point') distance = kwargs.pop('distance') azimuth = kwargs.pop('azimuth') reverse_azimuth = kwargs.pop('reverse_azimuth') reverse_distance = kwargs.get('reverse_distance', None) if reverse_distance is None: reverse_distance = 100 # Randomize the reverse angle slightly (+/- 5 degrees) random_azimuth = reverse_azimuth + AsaRandom.random() * 5 count = 0 nudge_distance = 0.01 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) # Find point just offshore to do testing with. Try 15 times (~350m). This makes sure the start_point is in the water # for the next call to intersect (next while loop). while self.intersect(single_point=nudge_loc.point) and count < 16: nudge_distance *= 2 nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point) nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth) count += 1 # We tried 16 times and couldn't find a point. This should totally never happen. if count == 16: logger.debug( "WOW. Could not find location in water to do shoreline calculation with. Assuming particle did not move from original location" ) return start_point # Keep trying to throw particle back, halfing the distance each time until it is in water. # Only half it 12 times before giving up and returning the point which the particle came from. count = 0 # Distance amount to half each iteration changing_distance = reverse_distance new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) while self.intersect(start_point=nudge_loc.point, end_point=new_loc.point) and count < 12: changing_distance /= 2 new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point) new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth) count += 1 # We tried 10 times and the particle was still on shore, return the point the particle started from. # No randomization. if count == 12: logger.debug( "Could not react particle with shoreline. Assuming particle did not move from original location" ) return start_point return new_loc
def move(self, particle, u, v, w, modelTimestep, **kwargs): temp = kwargs.get('temperature', None) salt = kwargs.get('salinity', None) logger.debug("Temp: %.4f, Salt: %.4f" %(temp,salt)) # IMPORTANT: # If we got no data from the model, we are using the last available value stored in the particles! if (temp is None) or (temp is not None and math.isnan(temp)): temp = particle.last_temp() if (salt is None) or (salt is not None and math.isnan(salt)): salt = particle.last_salt() particle.temp = temp particle.salt = salt # Grow the particle. Growth affects which lifestage the particle is in. growth = 0. do_duration_growth = True modelTimestepDays = modelTimestep / 60. / 60. / 24. if self.linear_a is not None and self.linear_b is not None: if particle.temp is not None: # linear growth, compute q = t / (Ax+B) # Where timestep t (days), at temperature x (deg C), proportion of stage completed (q) growth = modelTimestepDays / (self.linear_a * particle.temp + self.linear_b) particle.grow(growth) do_duration_growth = False else: logger.debug("No temperature found for Particle %s at this location and timestep, skipping linear temperature growth and using duration growth" % particle.uid) pass if do_duration_growth is True: growth = modelTimestepDays / self.duration particle.grow(growth) particle_time = particle.location.time active_diel = None if len(self.diel) > 0: # Find the closests Diel that the current particle time is AFTER, and set it to the active_diel closest = None closest_seconds = None for ad in self.diel: d_time = ad.get_time(loc4d=particle.location) if d_time <= particle_time: seconds = (particle_time - d_time).total_seconds() if closest is None or seconds < closest_seconds: closest = ad closest_seconds = seconds active_diel = closest # Run the active diel behavior and all of the taxis behaviors # u, v, and w store the continuous results from all of the behavior models. u = 0 v = 0 w = 0 behaviors_to_run = filter(None, [self.settlement] + [active_diel] + self.taxis) # Sort these in the order you want them to be run. try: vss = self.capability.calculated_vss except AttributeError: logger.debug("No VSS found, vertical behaviors will not act upon particle") vss = 0 for behave in behaviors_to_run: behave_results = behave.move(particle, 0, 0, vss, modelTimestep, **kwargs) u += behave_results['u'] v += behave_results['v'] w += behave_results['w'] # Do the calculation to determine the new location after running the behaviors result = AsaTransport.distance_from_location_using_u_v_w(u=u, v=v, w=w, timestep=modelTimestep, location=particle.location) result['u'] = u result['v'] = v result['w'] = w return result
def run(self, hydrodataset, **kwargs): # Add ModelController description to logfile logger.info(self) # Add the model descriptions to logfile for m in self._models: logger.info(m) # Calculate the model timesteps # We need times = len(self._nstep) + 1 since data is stored one timestep # after a particle is forced with the final timestep's data. times = range(0,(self._step*self._nstep)+1,self._step) # Calculate a datetime object for each model timestep # This method is duplicated in DataController and ForceParticle # using the 'times' variables above. Will be useful in those other # locations for particles released at different times # i.e. released over a few days modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start) time_chunk = self._time_chunk horiz_chunk = self._horiz_chunk low_memory = kwargs.get("low_memory", False) # Should we remove the cache file at the end of the run? remove_cache = kwargs.get("remove_cache", True) self.bathy_path = kwargs.get("bathy", None) self.cache_path = kwargs.get("cache", None) if self.cache_path is None: # Generate temp filename for dataset cache default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache") temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc") self.cache_path = os.path.join(default_cache_dir, temp_name) logger.progress((1, "Setting up particle start locations")) point_locations = [] if isinstance(self.geometry, Point): point_locations = [self.reference_location] * self._npart elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon): point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)] # Initialize the particles logger.progress((2, "Initializing particles")) for x in xrange(0, self._npart): p = LarvaParticle(id=x) p.location = point_locations[x] # We don't need to fill the location gaps here for environment variables # because the first data collected actually relates to this original # position. # We do need to fill in fields such as settled, halted, etc. p.fill_status_gap() # Set the inital note p.note = p.outputstring() p.notes.append(p.note) self.particles.append(p) # This is where it makes sense to implement the multiprocessing # looping for particles and models. Can handle each particle in # parallel probably. # # Get the number of cores (may take some tuning) and create that # many workers then pass particles into the queue for the workers mgr = multiprocessing.Manager() nproc = multiprocessing.cpu_count() - 1 if nproc <= 0: raise ValueError("Model does not run using less than two CPU cores") # Each particle is a task, plus the DataController number_of_tasks = len(self.particles) + 1 # We need a process for each particle and one for the data controller nproc = min(number_of_tasks, nproc) # When a particle requests data data_request_lock = mgr.Lock() # PID of process with lock has_data_request_lock = mgr.Value('int',-1) nproc_lock = mgr.Lock() # Create the task queue for all of the particles and the DataController tasks = multiprocessing.JoinableQueue(number_of_tasks) # Create the result queue for all of the particles and the DataController results = mgr.Queue(number_of_tasks) # Create the shared state objects get_data = mgr.Value('bool', True) # Number of tasks n_run = mgr.Value('int', number_of_tasks) updating = mgr.Value('bool', False) # When something is reading from cache file read_lock = mgr.Lock() # list of PIDs that are reading has_read_lock = mgr.list() read_count = mgr.Value('int', 0) # When something is writing to the cache file write_lock = mgr.Lock() # PID of process with lock has_write_lock = mgr.Value('int',-1) point_get = mgr.Value('list', [0, 0, 0]) active = mgr.Value('bool', True) logger.progress((3, "Initializing and caching hydro model's grid")) try: ds = CommonDataset.open(hydrodataset) # Query the dataset for common variable names # and the time variable. logger.debug("Retrieving variable information from dataset") common_variables = self.get_common_variables_from_dataset(ds) logger.debug("Pickling time variable to disk for particles") timevar = ds.gettimevar(common_variables.get("u")) f, timevar_pickle_path = tempfile.mkstemp() os.close(f) f = open(timevar_pickle_path, "wb") pickle.dump(timevar, f) f.close() ds.closenc() except: logger.warn("Failed to access remote dataset %s" % hydrodataset) raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset) # Add data controller to the queue first so that it # can get the initial data and is not blocked logger.debug('Starting DataController') logger.progress((4, "Starting processes")) data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count, time_chunk, horiz_chunk, times, self.start, point_get, self.reference_location, low_memory=low_memory, cache=self.cache_path) tasks.put(data_controller) # Create DataController worker data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController") data_controller_process.start() logger.debug('Adding %i particles as tasks' % len(self.particles)) for part in self.particles: forcing = parallel.ForceParticle(part, hydrodataset, common_variables, timevar_pickle_path, times, self.start, self._models, self.reference_location.point, self._use_bathymetry, self._use_shoreline, self._use_seasurface, get_data, n_run, read_lock, has_read_lock, read_count, point_get, data_request_lock, has_data_request_lock, reverse_distance=self.reverse_distance, bathy=self.bathy_path, shoreline_path=self.shoreline_path, shoreline_feature=self.shoreline_feature, cache=self.cache_path, time_method=self.time_method) tasks.put(forcing) # Create workers for the particles. procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i) for i in xrange(nproc - 1) ] for w in procs: w.start() logger.debug('Started %s' % w.name) # Get results back from queue, test for failed particles return_particles = [] retrieved = 0. error_code = 0 logger.info("Waiting for %i particle results" % len(self.particles)) logger.progress((5, "Running model")) while retrieved < number_of_tasks: try: # Returns a tuple of code, result code, tempres = results.get(timeout=240) except Queue.Empty: # Poll the active processes to make sure they are all alive and then continue with loop if not data_controller_process.is_alive() and data_controller_process.exitcode != 0: # Data controller is zombied, kill off other processes. get_data.value == False results.put((-2, "DataController")) new_procs = [] old_procs = [] for p in procs: if not p.is_alive() and p.exitcode != 0: # Do what the Consumer would do if something finished. # Add something to results queue results.put((-3, "ZombieParticle")) # Decrement nproc (DataController exits when this is 0) with nproc_lock: n_run.value = n_run.value - 1 # Remove task from queue (so they can be joined later on) tasks.task_done() # Start a new Consumer. It will exit if there are no tasks available. np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name) new_procs.append(np) old_procs.append(p) # Release any locks the PID had if p.pid in has_read_lock: with read_lock: read_count.value -= 1 has_read_lock.remove(p.pid) if has_data_request_lock.value == p.pid: has_data_request_lock.value = -1 try: data_request_lock.release() except: pass if has_write_lock.value == p.pid: has_write_lock.value = -1 try: write_lock.release() except: pass for p in old_procs: try: procs.remove(p) except ValueError: logger.warn("Did not find %s in the list of processes. Continuing on." % p.name) for p in new_procs: procs.append(p) logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name) p.start() else: # We got one. retrieved += 1 if code == None: logger.warn("Got an unrecognized response from a task.") elif code == -1: logger.warn("Particle %s has FAILED!!" % tempres.uid) elif code == -2: error_code = code logger.warn("DataController has FAILED!! Removing cache file so the particles fail.") try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") pass elif code == -3: error_code = code logger.info("A zombie process was caught and task was removed from queue") elif isinstance(tempres, Particle): logger.info("Particle %d finished" % tempres.uid) return_particles.append(tempres) # We mulitply by 95 here to save 5% for the exporting logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid)) elif tempres == "DataController": logger.info("DataController finished") logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished")) else: logger.info("Got a strange result on results queue") logger.info(str(tempres)) logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks)) if len(return_particles) != len(self.particles): logger.warn("Some particles failed and are not included in the output") # The results queue should be empty at this point assert results.empty() is True # Should be good to join on the tasks now that the queue is empty logger.info("Joining the task queue") tasks.join() # Join all processes logger.info("Joining the processes") for w in procs + [data_controller_process]: # Wait 10 seconds w.join(10.) if w.is_alive(): # Process is hanging, kill it. logger.info("Terminating %s forcefully. This should have exited itself." % w.name) w.terminate() logger.info('Workers complete') self.particles = return_particles # Remove Manager so it shuts down del mgr # Remove pickled timevar os.remove(timevar_pickle_path) # Remove the cache file if remove_cache is True: try: os.remove(self.cache_path) except OSError: logger.debug("Could not remove cache file, it probably never existed") logger.progress((96, "Exporting results")) if len(self.particles) > 0: # If output_formats and path specified, # output particle run data to disk when completed if "output_formats" in kwargs: # Make sure output_path is also included if kwargs.get("output_path", None) != None: formats = kwargs.get("output_formats") output_path = kwargs.get("output_path") if isinstance(formats, list): for format in formats: logger.info("Exporting to: %s" % format) try: self.export(output_path, format=format) except: logger.error("Failed to export to: %s" % format) else: logger.warn('The output_formats parameter should be a list, not saving any output!') else: logger.warn('No output path defined, not saving any output!') else: logger.warn('No output format defined, not saving any output!') else: logger.warn("Model didn't actually do anything, check the log.") if error_code == -2: raise DataControllerError("Error in the DataController") else: raise ModelError("Error in the model") logger.progress((99, "Model Run Complete")) return
def __call__(self, active): c = 0 self.dataset = CommonDataset.open(self.hydrodataset) self.remote = self.dataset.nc # Calculate the datetimes of the model timesteps like # the particle objects do, so we can figure out unique # time indices modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps( self.times, start=self.start_time) timevar = self.dataset.gettimevar(self.uname) # Don't need to grab the last datetime, as it is not needed for forcing, only # for setting the time of the final particle forcing time_indexs = timevar.nearest_index(newtimes[0:-1], select='before') # Have to make sure that we get the plus 1 for the # linear interpolation of u,v,w,temp,salt self.inds = np.unique(time_indexs) self.inds = np.append(self.inds, self.inds.max() + 1) # While there is at least 1 particle still running, # stay alive, if not break while self.n_run.value > 1: if self.caching is False: logger.debug( "Caching is False, not doing much. Just hanging out until all of the particles finish." ) timer.sleep(10) continue # If particle asks for data, do the following if self.get_data.value is True: logger.debug("Particle asked for data!") # Wait for particles to get out while True: self.read_lock.acquire() logger.debug("Read count: %d" % self.read_count.value) if self.read_count.value > 0: logger.debug( "Waiting for write lock on cache file (particles must stop reading)..." ) self.read_lock.release() timer.sleep(2) else: break # Get write lock on the file. Already have read lock. self.write_lock.acquire() self.has_write_lock.value = os.getpid() if c == 0: logger.debug("Creating cache file") try: # Open local cache for writing, overwrites # existing file with same name self.local = netCDF4.Dataset(self.cache_path, 'w') indices = self.dataset.get_indices( self.uname, timeinds=[np.asarray([0])], point=self.start) self.point_get.value = [ self.inds[0], indices[-2], indices[-1] ] # Create dimensions for u and v variables self.local.createDimension('time', None) self.local.createDimension('level', None) self.local.createDimension('x', None) self.local.createDimension('y', None) # Create 3d or 4d u and v variables if self.remote.variables[self.uname].ndim == 4: self.ndim = 4 dimensions = ('time', 'level', 'y', 'x') coordinates = "time z lon lat" elif self.remote.variables[self.uname].ndim == 3: self.ndim = 3 dimensions = ('time', 'y', 'x') coordinates = "time lon lat" shape = self.remote.variables[self.uname].shape # If there is no FillValue defined in the dataset, use np.nan. # Sometimes it will work out correctly and other times we will # have a huge cache file. try: fill = self.remote.variables[ self.uname].missing_value except Exception: fill = np.nan # Create domain variable that specifies # where there is data geographically/by time # and where there is not data, # Used for testing if particle needs to # ask cache to update domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0) domain.coordinates = coordinates # Create local u and v variables u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill) v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill) v.coordinates = coordinates u.coordinates = coordinates localvars = [ u, v, ] remotevars = [ self.remote.variables[self.uname], self.remote.variables[self.vname] ] # Create local w variable if self.wname is not None: w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill) w.coordinates = coordinates localvars.append(w) remotevars.append( self.remote.variables[self.wname]) if self.temp_name is not None and self.salt_name is not None: # Create local temp and salt vars temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill) salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill) temp.coordinates = coordinates salt.coordinates = coordinates localvars.append(temp) localvars.append(salt) remotevars.append( self.remote.variables[self.temp_name]) remotevars.append( self.remote.variables[self.salt_name]) # Create local lat/lon coordinate variables if self.remote.variables[self.xname].ndim == 2: lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False) lon[:] = self.remote.variables[self.xname][:, :] lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False) lat[:] = self.remote.variables[self.yname][:, :] if self.remote.variables[self.xname].ndim == 1: lon = self.local.createVariable('lon', 'f', ("x"), zlib=False) lon[:] = self.remote.variables[self.xname][:] lat = self.local.createVariable('lat', 'f', ("y"), zlib=False) lat[:] = self.remote.variables[self.yname][:] # Create local z variable if self.zname is not None: if self.remote.variables[self.zname].ndim == 4: z = self.local.createVariable( 'z', 'f', ("time", "level", "y", "x"), zlib=False) remotez = self.remote.variables[self.zname] localvars.append(z) remotevars.append(remotez) elif self.remote.variables[self.zname].ndim == 3: z = self.local.createVariable( 'z', 'f', ("level", "y", "x"), zlib=False) z[:] = self.remote.variables[ self.zname][:, :, :] elif self.remote.variables[self.zname].ndim == 1: z = self.local.createVariable('z', 'f', ("level", ), zlib=False) z[:] = self.remote.variables[self.zname][:] # Create local time variable time = self.local.createVariable('time', 'f8', ("time", ), zlib=False) if self.tname is not None: time[:] = self.remote.variables[self.tname][ self.inds] if self.point_get.value[0] + self.time_size > np.max( self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds) + 1) else: current_inds = np.arange( self.point_get.value[0], self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache. # Try 20 times on the first attempt current_attempt = 1 max_attempts = 20 while True: try: assert current_attempt <= max_attempts self.get_remote_data(localvars, remotevars, current_inds, shape) except AssertionError: raise except: logger.warn( "CachingDataController failed to get remote data. Trying again in 20 seconds. %s attempts left." % str(max_attempts - current_attempt)) logger.exception("Data Access Error") timer.sleep(20) current_attempt += 1 else: break c += 1 except (Exception, AssertionError): logger.error( "CachingDataController failed to get data (first request)" ) raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug( "Done updating cache file, closing file, and releasing locks" ) else: logger.debug("Updating cache file") try: # Open local cache dataset for appending self.local = netCDF4.Dataset(self.cache_path, 'a') # Create local and remote variable objects # for the variables of interest u = self.local.variables['u'] v = self.local.variables['v'] time = self.local.variables['time'] remoteu = self.remote.variables[self.uname] remotev = self.remote.variables[self.vname] # Create lists of variable objects for # the data updater localvars = [ u, v, ] remotevars = [ remoteu, remotev, ] if self.salt_name is not None and self.temp_name is not None: salt = self.local.variables['salt'] temp = self.local.variables['temp'] remotesalt = self.remote.variables[self.salt_name] remotetemp = self.remote.variables[self.temp_name] localvars.append(salt) localvars.append(temp) remotevars.append(remotesalt) remotevars.append(remotetemp) if self.wname is not None: w = self.local.variables['w'] remotew = self.remote.variables[self.wname] localvars.append(w) remotevars.append(remotew) if self.zname is not None: remotez = self.remote.variables[self.zname] if remotez.ndim == 4: z = self.local.variables['z'] localvars.append(z) remotevars.append(remotez) if self.tname is not None: # remotetime = self.remote.variables[self.tname] time[self.inds] = self.remote.variables[self.inds] if self.point_get.value[0] + self.time_size > np.max( self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds) + 1) else: current_inds = np.arange( self.point_get.value[0], self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn( "CachingDataController failed to get remote data. Trying again in 30 seconds" ) timer.sleep(30) else: break c += 1 except Exception: logger.error( "CachingDataController failed to get data (not first request)" ) raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug( "Done updating cache file, closing file, and releasing locks" ) else: logger.debug( "Particles are still running, waiting for them to request data..." ) timer.sleep(2) self.dataset.closenc() return "CachingDataController"
def __call__(self, proc, active): c = 0 self.dataset = CommonDataset.open(self.url) self.proc = proc self.remote = self.dataset.nc cachepath = self.cache_path # Calculate the datetimes of the model timesteps like # the particle objects do, so we can figure out unique # time indices modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time) timevar = self.dataset.gettimevar(self.uname) # Don't need to grab the last datetime, as it is not needed for forcing, only # for setting the time of the final particle forcing time_indexs = timevar.nearest_index(newtimes[0:-1], select='before') # Have to make sure that we get the plus 1 for the # linear interpolation of u,v,w,temp,salt self.inds = np.unique(time_indexs) self.inds = np.append(self.inds, self.inds.max()+1) # While there is at least 1 particle still running, # stay alive, if not break while self.n_run.value > 1: logger.debug("Particles are still running, waiting for them to request data...") timer.sleep(2) # If particle asks for data, do the following if self.get_data.value == True: logger.debug("Particle asked for data!") # Wait for particles to get out while True: self.read_lock.acquire() logger.debug("Read count: %d" % self.read_count.value) if self.read_count.value > 0: logger.debug("Waiting for write lock on cache file (particles must stop reading)...") self.read_lock.release() timer.sleep(4) else: break # Get write lock on the file. Already have read lock. self.write_lock.acquire() self.has_write_lock.value = os.getpid() if c == 0: logger.debug("Creating cache file") try: # Open local cache for writing, overwrites # existing file with same name self.local = netCDF4.Dataset(cachepath, 'w') indices = self.dataset.get_indices(self.uname, timeinds=[np.asarray([0])], point=self.start) self.point_get.value = [self.inds[0], indices[-2], indices[-1]] # Create dimensions for u and v variables self.local.createDimension('time', None) self.local.createDimension('level', None) self.local.createDimension('x', None) self.local.createDimension('y', None) # Create 3d or 4d u and v variables if self.remote.variables[self.uname].ndim == 4: self.ndim = 4 dimensions = ('time', 'level', 'y', 'x') coordinates = "time z lon lat" elif self.remote.variables[self.uname].ndim == 3: self.ndim = 3 dimensions = ('time', 'y', 'x') coordinates = "time lon lat" shape = self.remote.variables[self.uname].shape # If there is no FillValue defined in the dataset, use np.nan. # Sometimes it will work out correctly and other times we will # have a huge cache file. try: fill = self.remote.variables[self.uname].missing_value except Exception: fill = np.nan # Create domain variable that specifies # where there is data geographically/by time # and where there is not data, # Used for testing if particle needs to # ask cache to update domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0) domain.coordinates = coordinates # Create local u and v variables u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill) v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill) v.coordinates = coordinates u.coordinates = coordinates localvars = [u, v,] remotevars = [self.remote.variables[self.uname], self.remote.variables[self.vname]] # Create local w variable if self.wname != None: w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill) w.coordinates = coordinates localvars.append(w) remotevars.append(self.remote.variables[self.wname]) if self.temp_name != None and self.salt_name != None: # Create local temp and salt vars temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill) salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill) temp.coordinates = coordinates salt.coordinates = coordinates localvars.append(temp) localvars.append(salt) remotevars.append(self.remote.variables[self.temp_name]) remotevars.append(self.remote.variables[self.salt_name]) # Create local lat/lon coordinate variables if self.remote.variables[self.xname].ndim == 2: lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False) lon[:] = self.remote.variables[self.xname][:, :] lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False) lat[:] = self.remote.variables[self.yname][:, :] if self.remote.variables[self.xname].ndim == 1: lon = self.local.createVariable('lon', 'f', ("x"), zlib=False) lon[:] = self.remote.variables[self.xname][:] lat = self.local.createVariable('lat', 'f', ("y"), zlib=False) lat[:] = self.remote.variables[self.yname][:] # Create local z variable if self.zname != None: if self.remote.variables[self.zname].ndim == 4: z = self.local.createVariable('z', 'f', ("time","level","y","x"), zlib=False) remotez = self.remote.variables[self.zname] localvars.append(z) remotevars.append(remotez) elif self.remote.variables[self.zname].ndim == 3: z = self.local.createVariable('z', 'f', ("level","y","x"), zlib=False) z[:] = self.remote.variables[self.zname][:, :, :] elif self.remote.variables[self.zname].ndim ==1: z = self.local.createVariable('z', 'f', ("level",), zlib=False) z[:] = self.remote.variables[self.zname][:] # Create local time variable time = self.local.createVariable('time', 'f8', ("time",), zlib=False) if self.tname != None: time[:] = self.remote.variables[self.tname][self.inds] if self.point_get.value[0]+self.time_size > np.max(self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1) else: current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn("DataController failed to get remote data. Trying again in 30 seconds") timer.sleep(30) else: break c += 1 except StandardError: logger.error("DataController failed to get data (first request)") raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug("Done updating cache file, closing file, and releasing locks") else: logger.debug("Updating cache file") try: # Open local cache dataset for appending self.local = netCDF4.Dataset(cachepath, 'a') # Create local and remote variable objects # for the variables of interest u = self.local.variables['u'] v = self.local.variables['v'] time = self.local.variables['time'] remoteu = self.remote.variables[self.uname] remotev = self.remote.variables[self.vname] # Create lists of variable objects for # the data updater localvars = [u, v, ] remotevars = [remoteu, remotev, ] if self.salt_name != None and self.temp_name != None: salt = self.local.variables['salt'] temp = self.local.variables['temp'] remotesalt = self.remote.variables[self.salt_name] remotetemp = self.remote.variables[self.temp_name] localvars.append(salt) localvars.append(temp) remotevars.append(remotesalt) remotevars.append(remotetemp) if self.wname != None: w = self.local.variables['w'] remotew = self.remote.variables[self.wname] localvars.append(w) remotevars.append(remotew) if self.zname != None: remotez = self.remote.variables[self.zname] if remotez.ndim == 4: z = self.local.variables['z'] localvars.append(z) remotevars.append(remotez) if self.tname != None: remotetime = self.remote.variables[self.tname] time[self.inds] = self.remote.variables[self.inds] if self.point_get.value[0]+self.time_size > np.max(self.inds): current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1) else: current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size) # Get data from remote dataset and add # to local cache while True: try: self.get_remote_data(localvars, remotevars, current_inds, shape) except: logger.warn("DataController failed to get remote data. Trying again in 30 seconds") timer.sleep(30) else: break c += 1 except StandardError: logger.error("DataController failed to get data (not first request)") raise finally: self.local.sync() self.local.close() self.has_write_lock.value = -1 self.write_lock.release() self.get_data.value = False self.read_lock.release() logger.debug("Done updating cache file, closing file, and releasing locks") else: pass self.dataset.closenc() return "DataController"