Esempio n. 1
0
    def setUp(self):
        data = open(
            os.path.normpath(
                os.path.join(
                    os.path.dirname(__file__),
                    "./resources/files/lifestage_single.json"))).read()
        self.lifestage = LifeStage(json=data)

        start_lat = 38
        start_lon = -76
        start_depth = -5
        temp_time = datetime.utcnow()
        self.start_time = datetime(temp_time.year, temp_time.month,
                                   temp_time.day,
                                   temp_time.hour).replace(tzinfo=pytz.utc)
        self.loc = Location4D(latitude=start_lat,
                              longitude=start_lon,
                              depth=start_depth,
                              time=self.start_time)

        self.particles = []
        # Create particles
        for i in range(0, 3):
            p = LarvaParticle()
            p.location = self.loc
            self.particles.append(p)

        # 48 timesteps at an hour each = 2 days of running
        self.times = list(range(0, 172800, 3600))  # in seconds
        self.temps = []
        self.salts = []
        for w in range(0, 48):
            self.temps.append(random.randint(20, 40))
            self.salts.append(random.randint(10, 30))
    def setUp(self):
        data = open(os.path.normpath(os.path.join(os.path.dirname(__file__), "./resources/files/lifestage_single.json"))).read()
        self.lifestage = LifeStage(json=data)

        start_lat = 38
        start_lon = -76
        start_depth = -5
        temp_time = datetime.utcnow()
        self.start_time = datetime(temp_time.year, temp_time.month, temp_time.day, temp_time.hour).replace(tzinfo=pytz.utc)
        self.loc = Location4D(latitude=start_lat, longitude=start_lon, depth=start_depth, time=self.start_time)

        self.particles = []
        # Create particles
        for i in xrange(0, 3):
            p = LarvaParticle()
            p.location = self.loc
            self.particles.append(p)

        # 48 timesteps at an hour each = 2 days of running
        self.times = range(0, 172800, 3600)  # in seconds
        self.temps = []
        self.salts = []
        for w in xrange(0, 48):
            self.temps.append(random.randint(20, 40))
            self.salts.append(random.randint(10, 30))
Esempio n. 3
0
class LarvaParticleTest(unittest.TestCase):
    def setUp(self):
        self.p = LarvaParticle()

    def test_growth(self):
        assert self.p.lifestage_index == 0
        self.p.grow(0.5)
        assert self.p.lifestage_index == 0
        self.p.grow(0.5)
        assert self.p.lifestage_index == 1
        self.p.grow(0.5)
        assert self.p.lifestage_index == 1
        assert self.p.lifestage_progress == 1.5

    def test_over_growth(self):
        self.p.grow(1.5)
        assert self.p.lifestage_index == 1
        assert self.p.lifestage_progress == 1.5
        self.p.grow(1.6)
        assert self.p.lifestage_progress == 3.1
        assert self.p.lifestage_index == 3
class LarvaParticleTest(unittest.TestCase):

    def setUp(self):
        self.p = LarvaParticle()

    def test_growth(self):
        assert self.p.lifestage_index == 0
        self.p.grow(0.5)
        assert self.p.lifestage_index == 0
        self.p.grow(0.5)
        assert self.p.lifestage_index == 1
        self.p.grow(0.5)
        assert self.p.lifestage_index == 1
        assert self.p.lifestage_progress == 1.5

    def test_over_growth(self):
        self.p.grow(1.5)
        assert self.p.lifestage_index == 1
        assert self.p.lifestage_progress == 1.5
        self.p.grow(1.6)
        assert self.p.lifestage_progress == 3.1
        assert self.p.lifestage_index == 3
    def test_attempts(self):
        start_lat = 38
        start_lon = -76

        settle = Settlement(json='{"upper": 100.0, "lower": 200.0, "type": "benthic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat, longitude=start_lon, depth=-50, time=self.start_time)
        # Set bathymetry BETWEEN lower (200) and upper (100)
        settle.attempt(particle, -150)
        # We should have moved vertically to the bottom and settled
        assert particle.location.depth == -150
        assert particle.location.latitude == particle.locations[-2].latitude
        assert particle.location.longitude == particle.locations[-2].longitude
        assert particle.location.time == particle.locations[-2].time
        assert particle.settled


        settle = Settlement(json='{"upper": 100.0, "lower": 200.0, "type": "benthic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat, longitude=start_lon, depth=-250, time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should not have moved
        assert len(particle.locations) == 1
        assert not particle.settled


        settle = Settlement(json='{"upper": 100.0, "lower": 200.0, "type": "pelagic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat, longitude=start_lon, depth=-50, time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should not have moved
        assert len(particle.locations) == 1
        assert not particle.settled


        settle = Settlement(json='{"upper": 100.0, "lower": 200.0, "type": "pelagic"}')
        # Particle is between the upper and lower bounds
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat, longitude=start_lon, depth=-150, time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should have settled, but not moved anywhere
        assert len(particle.locations) == 1
        assert particle.settled    
Esempio n. 6
0
    def test_attempts(self):
        start_lat = 38
        start_lon = -76

        settle = Settlement(
            json='{"upper": 100.0, "lower": 200.0, "type": "benthic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat,
                                       longitude=start_lon,
                                       depth=-50,
                                       time=self.start_time)
        # Set bathymetry BETWEEN lower (200) and upper (100)
        settle.attempt(particle, -150)
        # We should have moved vertically to the bottom and settled
        assert particle.location.depth == -150
        assert particle.location.latitude == particle.locations[-2].latitude
        assert particle.location.longitude == particle.locations[-2].longitude
        assert particle.location.time == particle.locations[-2].time
        assert particle.settled

        settle = Settlement(
            json='{"upper": 100.0, "lower": 200.0, "type": "benthic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat,
                                       longitude=start_lon,
                                       depth=-250,
                                       time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should not have moved
        assert len(particle.locations) == 1
        assert not particle.settled

        settle = Settlement(
            json='{"upper": 100.0, "lower": 200.0, "type": "pelagic"}')
        # Particle above the upper bound
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat,
                                       longitude=start_lon,
                                       depth=-50,
                                       time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should not have moved
        assert len(particle.locations) == 1
        assert not particle.settled

        settle = Settlement(
            json='{"upper": 100.0, "lower": 200.0, "type": "pelagic"}')
        # Particle is between the upper and lower bounds
        particle = LarvaParticle()
        particle.location = Location4D(latitude=start_lat,
                                       longitude=start_lon,
                                       depth=-150,
                                       time=self.start_time)
        # Set bathymetry BELOW lower (200) and upper (100)
        settle.attempt(particle, -400)
        # We should have settled, but not moved anywhere
        assert len(particle.locations) == 1
        assert particle.settled
    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0, (self._step * self._nstep) + 1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__),
                                             "_cache")
            temp_name = AsaRandom.filename(prefix=str(
                datetime.now().microsecond),
                                           suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError(
                "Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int', -1)

        nproc_lock = mgr.Lock()

        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)

        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int', -1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" %
                                      hydrodataset)

        # Add data controller to the queue first so that it
        # can get the initial data and is not blocked

        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset,
                                                  common_variables,
                                                  n_run,
                                                  get_data,
                                                  write_lock,
                                                  has_write_lock,
                                                  read_lock,
                                                  read_count,
                                                  time_chunk,
                                                  horiz_chunk,
                                                  times,
                                                  self.start,
                                                  point_get,
                                                  self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks,
                                                    results,
                                                    n_run,
                                                    nproc_lock,
                                                    active,
                                                    get_data,
                                                    name="DataController")
        data_controller_process.start()

        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(
                part,
                hydrodataset,
                common_variables,
                timevar_pickle_path,
                times,
                self.start,
                self._models,
                self.reference_location.point,
                self._use_bathymetry,
                self._use_shoreline,
                self._use_seasurface,
                get_data,
                n_run,
                read_lock,
                has_read_lock,
                read_count,
                point_get,
                data_request_lock,
                has_data_request_lock,
                reverse_distance=self.reverse_distance,
                bathy=self.bathy_path,
                shoreline_path=self.shoreline_path,
                cache=self.cache_path,
                time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [
            parallel.Consumer(tasks,
                              results,
                              n_run,
                              nproc_lock,
                              active,
                              get_data,
                              name="ForceParticle-%d" % i)
            for i in xrange(nproc - 1)
        ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive(
                ) and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks,
                                               results,
                                               n_run,
                                               nproc_lock,
                                               active,
                                               get_data,
                                               name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)

                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass

                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn(
                            "Did not find %s in the list of processes.  Continuing on."
                            % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn(
                        "Started a new consumer (%s) to replace a zombie consumer"
                        % p.name)
                    p.start()

            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn(
                        "DataController has FAILED!!  Removing cache file so the particles fail."
                    )
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug(
                            "Could not remove cache file, it probably never existed"
                        )
                        pass
                elif code == -3:
                    error_code = code
                    logger.info(
                        "A zombie process was caught and task was removed from queue"
                    )
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress(
                        (round((retrieved / number_of_tasks) * 90.,
                               1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,
                                           1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" %
                            (int(retrieved), number_of_tasks))

        if len(return_particles) != len(self.particles):
            logger.warn(
                "Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
            # Wait 10 seconds
            w.join(10.)
            if w.is_alive():
                # Process is hanging, kill it.
                logger.info(
                    "Terminating %s forcefully.  This should have exited itself."
                    % w.name)
                w.terminate()

        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug(
                    "Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" %
                                             format)
                    else:
                        logger.warn(
                            'The output_formats parameter should be a list, not saving any output!'
                        )
                else:
                    logger.warn(
                        'No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return
    def setup_run(self, **kwargs):

        logger.setLevel(logging.PROGRESS)

        self.redis_url             = None
        self.redis_log_channel     = None
        self.redis_results_channel = None
        if "redis" in kwargs.get("output_formats", []):
            from paegan.logger.redis_handler import RedisHandler
            self.redis_url             = kwargs.get("redis_url")
            self.redis_log_channel     = kwargs.get("redis_log_channel")
            self.redis_results_channel = kwargs.get("redis_results_channel")
            rhandler = RedisHandler(self.redis_log_channel, self.redis_url)
            rhandler.setLevel(logging.PROGRESS)
            logger.addHandler(rhandler)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(unicode(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(unicode(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = range(0, (self._step*self._nstep)+1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        if kwargs.get("manager", True):
            # Get the number of cores (may take some tuning) and create that
            # many workers then pass particles into the queue for the workers
            self.mgr = multiprocessing.Manager()

            # This tracks if the system is 'alive'.  Most looping whiles will check this
            # and break out if it is False.  This is True until something goes very wrong.
            self.active = self.mgr.Value('bool', True)

            # Each particle is a task, plus the CachingDataController
            self.number_of_tasks = self.get_number_of_tasks()

            # Either spin up the number of cores, or the number of tasks
            self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks)

            # Number of tasks that we need to run.  This is decremented everytime something exits.
            self.n_run = self.mgr.Value('int', self.number_of_tasks)
            # The lock that controls access to the 'n_run' variable
            self.nproc_lock = self.mgr.Lock()

            # Create the task queue for all of the particles and the CachingDataController
            self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks)
            # Create the result queue for all of the particles and the CachingDataController
            self.results = self.mgr.Queue(self.number_of_tasks)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(self.hydrodataset)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)
        # Query the dataset for common variable names
        # and the time variable.
        logger.debug("Retrieving variable information from dataset")
        self.common_variables = self.get_common_variables_from_dataset(ds)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)

        model_start = self.timevar.get_dates()[0]
        model_end   = self.timevar.get_dates()[-1]

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

        ds.closenc()
Esempio n. 9
0
    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step * self._nstep) + 1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" %
                         self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" %
                                          self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception(
                "Could not locate variables needed to run model: %s" %
                str(self.common_variables))
            raise BaseDataControllerError(
                "A required data variable was not found in %s" %
                self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "Start time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "End time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[-1], model_start, model_end))
Esempio n. 10
0
    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0,(self._step*self._nstep)+1,self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache")
            temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)
        
        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in 
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError("Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int',-1)

        nproc_lock = mgr.Lock()
        
        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)
        
        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int',-1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)
        
        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset)


        # Add data controller to the queue first so that it 
        # can get the initial data and is not blocked
        
        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count,
                                                  time_chunk, horiz_chunk, times,
                                                  self.start, point_get, self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController")
        data_controller_process.start()
        
        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(part,
                                        hydrodataset,
                                        common_variables,
                                        timevar_pickle_path,
                                        times,
                                        self.start,
                                        self._models,
                                        self.reference_location.point,
                                        self._use_bathymetry,
                                        self._use_shoreline,
                                        self._use_seasurface,
                                        get_data,
                                        n_run,
                                        read_lock,
                                        has_read_lock,
                                        read_count,
                                        point_get,
                                        data_request_lock,
                                        has_data_request_lock,
                                        reverse_distance=self.reverse_distance,
                                        bathy=self.bathy_path,
                                        shoreline_path=self.shoreline_path,
                                        shoreline_feature=self.shoreline_feature,
                                        cache=self.cache_path,
                                        time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i)
                  for i in xrange(nproc - 1) ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive() and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)
                        
                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass
                            
                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass
                            

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                    p.start()
                
            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn("DataController has FAILED!!  Removing cache file so the particles fail.")
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug("Could not remove cache file, it probably never existed")
                        pass
                elif code == -3:
                    error_code = code
                    logger.info("A zombie process was caught and task was removed from queue")
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks))
        
        if len(return_particles) != len(self.particles):
            logger.warn("Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
                # Wait 10 seconds
                w.join(10.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                    w.terminate()
                    
        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug("Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')  
                else:
                    logger.warn('No output path defined, not saving any output!')  
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return
Esempio n. 11
0
 def setUp(self):
     self.p = LarvaParticle()
Esempio n. 12
0
    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step*self._nstep)+1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
Esempio n. 13
0
 def setUp(self):
     self.p = LarvaParticle()