Python AsaTransport.get_time_objects_from_model_timestepsの例

プログラミング言語: Python

名前空間/パッケージ名: paegan.transport.utils.asatransport

クラス/型: AsaTransport

メソッド/関数: get_time_objects_from_model_timesteps

hotexamples.comのコード掲載数: 10

Python AsaTransport.get_time_objects_from_model_timesteps - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpaegan.transport.utils.asatransport.AsaTransport.get_time_objects_from_model_timestepsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_time_objects_from_model_timesteps(4)

distance_from_location_using_u_v_w(3)

fill_polygon_with_points(2)

コード例 #1

ファイルを表示

    def __call__(self, active):
        c = 0

        self.dataset = CommonDataset.open(self.hydrodataset)
        self.remote = self.dataset.nc

        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')

        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max() + 1)

        # While there is at least 1 particle still running,
        # stay alive, if not break
        while self.n_run.value > 1:

            if self.caching is False:
                logger.debug(
                    "Caching is False, not doing much.  Just hanging out until all of the particles finish."
                )
                timer.sleep(10)
                continue

            # If particle asks for data, do the following
            if self.get_data.value is True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug(
                            "Waiting for write lock on cache file (particles must stop reading)..."
                        )
                        self.read_lock.release()
                        timer.sleep(2)
                    else:
                        break

                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(self.cache_path, 'w')

                        indices = self.dataset.get_indices(
                            self.uname,
                            timeinds=[np.asarray([0])],
                            point=self.start)
                        self.point_get.value = [
                            self.inds[0], indices[-2], indices[-1]
                        ]

                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)

                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan.
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[
                                self.uname].missing_value
                        except Exception:
                            fill = np.nan

                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to
                        #   ask cache to update
                        domain = self.local.createVariable('domain',
                                                           'i',
                                                           dimensions,
                                                           zlib=False,
                                                           fill_value=0)
                        domain.coordinates = coordinates

                        # Create local u and v variables
                        u = self.local.createVariable('u',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)
                        v = self.local.createVariable('v',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)

                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            self.remote.variables[self.uname],
                            self.remote.variables[self.vname]
                        ]

                        # Create local w variable
                        if self.wname is not None:
                            w = self.local.createVariable('w',
                                                          'f',
                                                          dimensions,
                                                          zlib=False,
                                                          fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(
                                self.remote.variables[self.wname])

                        if self.temp_name is not None and self.salt_name is not None:
                            # Create local temp and salt vars
                            temp = self.local.createVariable('temp',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            salt = self.local.createVariable('salt',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(
                                self.remote.variables[self.temp_name])
                            remotevars.append(
                                self.remote.variables[self.salt_name])

                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon',
                                                            'f', ("x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]

                        # Create local z variable
                        if self.zname is not None:
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable(
                                    'z',
                                    'f', ("time", "level", "y", "x"),
                                    zlib=False)
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable(
                                    'z', 'f', ("level", "y", "x"), zlib=False)
                                z[:] = self.remote.variables[
                                    self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim == 1:
                                z = self.local.createVariable('z',
                                                              'f', ("level", ),
                                                              zlib=False)
                                z[:] = self.remote.variables[self.zname][:]

                        # Create local time variable
                        time = self.local.createVariable('time',
                                                         'f8', ("time", ),
                                                         zlib=False)
                        if self.tname is not None:
                            time[:] = self.remote.variables[self.tname][
                                self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache.
                        # Try 20 times on the first attempt
                        current_attempt = 1
                        max_attempts = 20
                        while True:
                            try:
                                assert current_attempt <= max_attempts
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except AssertionError:
                                raise
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 20 seconds. %s attempts left."
                                    % str(max_attempts - current_attempt))
                                logger.exception("Data Access Error")
                                timer.sleep(20)
                                current_attempt += 1
                            else:
                                break

                        c += 1
                    except (Exception, AssertionError):
                        logger.error(
                            "CachingDataController failed to get data (first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(self.cache_path, 'a')

                        # Create local and remote variable objects
                        # for the variables of interest
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]

                        # Create lists of variable objects for
                        # the data updater
                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            remoteu,
                            remotev,
                        ]
                        if self.salt_name is not None and self.temp_name is not None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname is not None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname is not None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname is not None:
                            # remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 30 seconds"
                                )
                                timer.sleep(30)
                            else:
                                break

                        c += 1
                    except Exception:
                        logger.error(
                            "CachingDataController failed to get data (not first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
            else:
                logger.debug(
                    "Particles are still running, waiting for them to request data..."
                )
                timer.sleep(2)

        self.dataset.closenc()

        return "CachingDataController"

コード例 #2

ファイルを表示

ファイル: model_controller.py プロジェクト: blazetopher/paegan-transport

    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0, (self._step * self._nstep) + 1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__),
                                             "_cache")
            temp_name = AsaRandom.filename(prefix=str(
                datetime.now().microsecond),
                                           suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError(
                "Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int', -1)

        nproc_lock = mgr.Lock()

        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)

        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int', -1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" %
                                      hydrodataset)

        # Add data controller to the queue first so that it
        # can get the initial data and is not blocked

        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset,
                                                  common_variables,
                                                  n_run,
                                                  get_data,
                                                  write_lock,
                                                  has_write_lock,
                                                  read_lock,
                                                  read_count,
                                                  time_chunk,
                                                  horiz_chunk,
                                                  times,
                                                  self.start,
                                                  point_get,
                                                  self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks,
                                                    results,
                                                    n_run,
                                                    nproc_lock,
                                                    active,
                                                    get_data,
                                                    name="DataController")
        data_controller_process.start()

        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(
                part,
                hydrodataset,
                common_variables,
                timevar_pickle_path,
                times,
                self.start,
                self._models,
                self.reference_location.point,
                self._use_bathymetry,
                self._use_shoreline,
                self._use_seasurface,
                get_data,
                n_run,
                read_lock,
                has_read_lock,
                read_count,
                point_get,
                data_request_lock,
                has_data_request_lock,
                reverse_distance=self.reverse_distance,
                bathy=self.bathy_path,
                shoreline_path=self.shoreline_path,
                cache=self.cache_path,
                time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [
            parallel.Consumer(tasks,
                              results,
                              n_run,
                              nproc_lock,
                              active,
                              get_data,
                              name="ForceParticle-%d" % i)
            for i in xrange(nproc - 1)
        ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive(
                ) and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks,
                                               results,
                                               n_run,
                                               nproc_lock,
                                               active,
                                               get_data,
                                               name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)

                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass

                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn(
                            "Did not find %s in the list of processes.  Continuing on."
                            % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn(
                        "Started a new consumer (%s) to replace a zombie consumer"
                        % p.name)
                    p.start()

            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn(
                        "DataController has FAILED!!  Removing cache file so the particles fail."
                    )
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug(
                            "Could not remove cache file, it probably never existed"
                        )
                        pass
                elif code == -3:
                    error_code = code
                    logger.info(
                        "A zombie process was caught and task was removed from queue"
                    )
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress(
                        (round((retrieved / number_of_tasks) * 90.,
                               1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,
                                           1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" %
                            (int(retrieved), number_of_tasks))

        if len(return_particles) != len(self.particles):
            logger.warn(
                "Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
            # Wait 10 seconds
            w.join(10.)
            if w.is_alive():
                # Process is hanging, kill it.
                logger.info(
                    "Terminating %s forcefully.  This should have exited itself."
                    % w.name)
                w.terminate()

        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug(
                    "Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" %
                                             format)
                    else:
                        logger.warn(
                            'The output_formats parameter should be a list, not saving any output!'
                        )
                else:
                    logger.warn(
                        'No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return

コード例 #3

ファイルを表示

ファイル: forcers.py プロジェクト: axiom-data-science/paegan-transport

    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time    = {}
        tot_read_data     = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.")
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc,
                                              distance=movement['distance'], angle=movement['angle'],
                                              azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'],
                                              vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time)))

        return self.particle

コード例 #4

ファイルを表示

ファイル: base.py プロジェクト: ocefpaf/paegan-transport

    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step * self._nstep) + 1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" %
                         self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" %
                                          self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception(
                "Could not locate variables needed to run model: %s" %
                str(self.common_variables))
            raise BaseDataControllerError(
                "A required data variable was not found in %s" %
                self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "Start time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "End time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[-1], model_start, model_end))

コード例 #5

ファイルを表示

ファイル: model_controller.py プロジェクト: kwilcox/paegan-transport

    def setup_run(self, **kwargs):

        logger.setLevel(logging.PROGRESS)

        self.redis_url             = None
        self.redis_log_channel     = None
        self.redis_results_channel = None
        if "redis" in kwargs.get("output_formats", []):
            from paegan.logger.redis_handler import RedisHandler
            self.redis_url             = kwargs.get("redis_url")
            self.redis_log_channel     = kwargs.get("redis_log_channel")
            self.redis_results_channel = kwargs.get("redis_results_channel")
            rhandler = RedisHandler(self.redis_log_channel, self.redis_url)
            rhandler.setLevel(logging.PROGRESS)
            logger.addHandler(rhandler)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(unicode(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(unicode(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = range(0, (self._step*self._nstep)+1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        if kwargs.get("manager", True):
            # Get the number of cores (may take some tuning) and create that
            # many workers then pass particles into the queue for the workers
            self.mgr = multiprocessing.Manager()

            # This tracks if the system is 'alive'.  Most looping whiles will check this
            # and break out if it is False.  This is True until something goes very wrong.
            self.active = self.mgr.Value('bool', True)

            # Each particle is a task, plus the CachingDataController
            self.number_of_tasks = self.get_number_of_tasks()

            # Either spin up the number of cores, or the number of tasks
            self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks)

            # Number of tasks that we need to run.  This is decremented everytime something exits.
            self.n_run = self.mgr.Value('int', self.number_of_tasks)
            # The lock that controls access to the 'n_run' variable
            self.nproc_lock = self.mgr.Lock()

            # Create the task queue for all of the particles and the CachingDataController
            self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks)
            # Create the result queue for all of the particles and the CachingDataController
            self.results = self.mgr.Queue(self.number_of_tasks)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(self.hydrodataset)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)
        # Query the dataset for common variable names
        # and the time variable.
        logger.debug("Retrieving variable information from dataset")
        self.common_variables = self.get_common_variables_from_dataset(ds)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)

        model_start = self.timevar.get_dates()[0]
        model_end   = self.timevar.get_dates()[-1]

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

        ds.closenc()

コード例 #6

ファイルを表示

ファイル: parallel_manager.py プロジェクト: blazetopher/paegan-transport

    def __call__(self, proc, active):

        self.active = active

        if self.usebathy == True:
            self._bathymetry = Bathymetry(file=self.bathy)
        
        self._shoreline = None  
        if self.useshore == True:
            self._shoreline = Shoreline(file=self.shoreline_path, point=self.release_location_centroid, spatialbuffer=0.25)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)
            
        self.proc = proc
        part = self.part
        
        if self.active.value == True:
            while self.get_data.value == True:
                logger.debug("Waiting for DataController to start...")
                timer.sleep(10)
                pass

        # Initialize commondataset of local cache, then
        # close the related netcdf file
        try:
            with self.read_lock:
                self.read_count.value += 1
                self.has_read_lock.append(os.getpid())
            self.dataset = CommonDataset.open(self.localpath)
            self.dataset.closenc()
        except StandardError:
            logger.warn("No cache file: %s.  Particle exiting" % self.localpath)
            raise
        finally:
            with self.read_lock:
                self.read_count.value -= 1
                self.has_read_lock.remove(os.getpid())

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        # Load Timevar from pickle serialization
        f = open(self.timevar_pickle_path,"rb")
        timevar = pickle.load(f)
        f.close()

        if self.time_method == 'interp':
            time_indexs = timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.error("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active.value == False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            # if need a time that is outside of what we have
            #if self.active.value == True:
            #    while self.get_data.value == True:
            #        logger.info("Waiting for DataController to get out...")
            #        timer.sleep(4)
            #        pass
                
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i])
            elif self.time_method == 'interp': 
                u, v, w, temp, salt = self.data_interp(i, timevar, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt not supported!")

            #logger.info("U: %.4f, V: %.4f, W: %.4f" % (u,v,w))
            #logger.info("Temp: %.4f, Salt: %.4f" % (temp,salt))

            # Get the bathy value at the particles location
            if self.usebathy == True:
                bathymetry_value = self._bathymetry.get_depth(part.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            part.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                movement = model.move(part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    self.boundary_interaction(particle=part, starting=part.location, ending=newloc,
                        distance=movement['distance'], angle=movement['angle'], 
                        azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], 
                        vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                logger.debug("%s - was forced by %s and is now at %s" % (part.logstring(), model.__class__.__name__, part.location.logstring()))

            part.note = part.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            part.save()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        part.fill_environment_gap()

        if self.usebathy == True:
            self._bathymetry.close()

        if self.useshore == True:
            self._shoreline.close()

        return part

コード例 #7

ファイルを表示

ファイル: parallel_manager.py プロジェクト: blazetopher/paegan-transport

    def __call__(self, proc, active):
        c = 0
        
        self.dataset = CommonDataset.open(self.url)
        self.proc = proc
        self.remote = self.dataset.nc
        cachepath = self.cache_path
        
        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')
        
        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max()+1)
        
        # While there is at least 1 particle still running, 
        # stay alive, if not break
        while self.n_run.value > 1:
            logger.debug("Particles are still running, waiting for them to request data...")
            timer.sleep(2)
            # If particle asks for data, do the following
            if self.get_data.value == True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug("Waiting for write lock on cache file (particles must stop reading)...")
                        self.read_lock.release()
                        timer.sleep(4)
                    else:
                        break
                    
                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(cachepath, 'w')

                        indices = self.dataset.get_indices(self.uname, timeinds=[np.asarray([0])], point=self.start)
                        self.point_get.value = [self.inds[0], indices[-2], indices[-1]]
                        
                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)
                        
                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan. 
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[self.uname].missing_value
                        except Exception:
                            fill = np.nan
                        
                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to 
                        #   ask cache to update
                        domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0)
                        domain.coordinates = coordinates
                                
                        # Create local u and v variables
                        u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill)
                        v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill)
                        
                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [u, v,]
                        remotevars = [self.remote.variables[self.uname], self.remote.variables[self.vname]]
                        
                        # Create local w variable
                        if self.wname != None:
                            w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(self.remote.variables[self.wname])

                        if self.temp_name != None and self.salt_name != None: 
                            # Create local temp and salt vars       
                            temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill)
                            salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(self.remote.variables[self.temp_name])
                            remotevars.append(self.remote.variables[self.salt_name])
                        
                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon', 'f', ("x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat', 'f', ("y"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]                           
                            
                        # Create local z variable
                        if self.zname != None:            
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable('z', 'f', ("time","level","y","x"), zlib=False)  
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable('z', 'f', ("level","y","x"), zlib=False)
                                z[:] = self.remote.variables[self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim ==1:
                                z = self.local.createVariable('z', 'f', ("level",), zlib=False)
                                z[:] = self.remote.variables[self.zname][:]
                                
                        # Create local time variable
                        time = self.local.createVariable('time', 'f8', ("time",), zlib=False)
                        if self.tname != None:
                            time[:] = self.remote.variables[self.tname][self.inds]
                        
                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size)
                        
                        # Get data from remote dataset and add
                        # to local cache  
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except:
                                logger.warn("DataController failed to get remote data.  Trying again in 30 seconds")
                                timer.sleep(30)
                            else:
                                break
                        
                        c += 1
                    except StandardError:
                        logger.error("DataController failed to get data (first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(cachepath, 'a')
                        
                        # Create local and remote variable objects
                        # for the variables of interest  
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]
                        
                        # Create lists of variable objects for
                        # the data updater
                        localvars = [u, v, ]
                        remotevars = [remoteu, remotev, ]
                        if self.salt_name != None and self.temp_name != None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname != None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname != None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname != None:
                            remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]
                        
                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size)
                        
                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except:
                                logger.warn("DataController failed to get remote data.  Trying again in 30 seconds")
                                timer.sleep(30)
                            else:
                                break
                        
                        c += 1
                    except StandardError:
                        logger.error("DataController failed to get data (not first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
            else:
                pass        

        self.dataset.closenc()

        return "DataController"

コード例 #8

ファイルを表示

ファイル: model_controller.py プロジェクト: acrosby/paegan-transport

    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0,(self._step*self._nstep)+1,self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache")
            temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)
        
        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in 
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError("Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int',-1)

        nproc_lock = mgr.Lock()
        
        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)
        
        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int',-1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)
        
        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset)


        # Add data controller to the queue first so that it 
        # can get the initial data and is not blocked
        
        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count,
                                                  time_chunk, horiz_chunk, times,
                                                  self.start, point_get, self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController")
        data_controller_process.start()
        
        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(part,
                                        hydrodataset,
                                        common_variables,
                                        timevar_pickle_path,
                                        times,
                                        self.start,
                                        self._models,
                                        self.reference_location.point,
                                        self._use_bathymetry,
                                        self._use_shoreline,
                                        self._use_seasurface,
                                        get_data,
                                        n_run,
                                        read_lock,
                                        has_read_lock,
                                        read_count,
                                        point_get,
                                        data_request_lock,
                                        has_data_request_lock,
                                        reverse_distance=self.reverse_distance,
                                        bathy=self.bathy_path,
                                        shoreline_path=self.shoreline_path,
                                        shoreline_feature=self.shoreline_feature,
                                        cache=self.cache_path,
                                        time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i)
                  for i in xrange(nproc - 1) ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive() and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)
                        
                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass
                            
                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass
                            

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                    p.start()
                
            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn("DataController has FAILED!!  Removing cache file so the particles fail.")
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug("Could not remove cache file, it probably never existed")
                        pass
                elif code == -3:
                    error_code = code
                    logger.info("A zombie process was caught and task was removed from queue")
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks))
        
        if len(return_particles) != len(self.particles):
            logger.warn("Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
                # Wait 10 seconds
                w.join(10.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                    w.terminate()
                    
        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug("Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')  
                else:
                    logger.warn('No output path defined, not saving any output!')  
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return

コード例 #9

ファイルを表示

ファイル: base.py プロジェクト: axiom-data-science/paegan-transport

    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step*self._nstep)+1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

コード例 #10

ファイルを表示

ファイル: forcers.py プロジェクト: ocefpaf/paegan-transport

    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(
                path=self.shoreline_path,
                feature_name=self.shoreline_feature,
                point=self.release_location_centroid,
                spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(
                start_point=self.release_location_centroid,
                end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception(
                    "Could not load Bathymetry file: %s, using no Bathymetry for this run!"
                    % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception(
                "Time indexes are messed up. Need to have equal datetime and time indexes"
            )
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time = {}
        tot_read_data = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel,
                                     json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(
                    i, newtimes[loop_i])
            else:
                logger.warn(
                    "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported."
                )
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(
                    self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle,
                                      u,
                                      v,
                                      w,
                                      modelTimestep[loop_i],
                                      temperature=temp,
                                      salinity=salt,
                                      bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'],
                                    longitude=movement['longitude'],
                                    depth=movement['depth'],
                                    time=newtimes[loop_i + 1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s"
                        % (self.particle.logstring(), movement['distance'],
                           movement['vertical_distance'],
                           model.__class__.__name__,
                           newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(
                        particle=self.particle,
                        starting=self.particle.location,
                        ending=newloc,
                        distance=movement['distance'],
                        angle=movement['angle'],
                        azimuth=movement['azimuth'],
                        reverse_azimuth=movement['reverse_azimuth'],
                        vertical_distance=movement['vertical_distance'],
                        vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - was forced by %s and is now at %s" %
                        (self.particle.logstring(), model.__class__.__name__,
                         self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(
                    self.redis_results_channel,
                    json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(
            textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' %
                            (self.particle.uid, tot_read_data, {
                                s: '{:g} seconds'.format(f)
                                for s, f in list(tot_model_time.items())
                            }, tot_boundary_time)))

        return self.particle