def __reverse(self, **kwargs):
        """
            Reverse particle just off of the shore in the direction that it came in.
            Adds a slight random factor to the distance and angle it is reversed in.
        """

        #st = time.clock()

        start_point = kwargs.pop('start_point')
        hit_point = kwargs.pop('hit_point')
        reverse_azimuth = kwargs.pop('reverse_azimuth')
        reverse_distance = kwargs.get('reverse_distance', None)
        if reverse_distance is None:
            reverse_distance = 100

        # Randomize the reverse angle slightly (+/- 5 degrees)
        random_azimuth = reverse_azimuth + AsaRandom.random() * 5

        count = 0
        nudge_distance = 0.01
        nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point)
        nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth)

        # Find point just offshore to do testing with.  Try 15 times (~350m).  This makes sure the start_point is in the water
        # for the next call to intersect (next while loop).
        while self.intersect(single_point=nudge_loc.point) and count < 16:
            nudge_distance *= 2
            nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance, azimuth=reverse_azimuth, start_point=hit_point)
            nudge_loc = Location4D(latitude=nudge_point['latitude'], longitude=nudge_point['longitude'], depth=start_point.depth)
            count += 1

        # We tried 16 times and couldn't find a point.  This should totally never happen.
        if count == 16:
            logger.warn("LOOK: Could not find location in water to do shoreline calculation with.  Assuming particle did not move from original location")
            return start_point

        # Keep trying to throw particle back, halfing the distance each time until it is in water.
        # Only half it 6 times before giving up and returning the point which the particle came from.
        count = 0
        # Distance amount to half each iteration
        changing_distance = reverse_distance
        new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point)
        new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth)

        # We don't want to reverse further than the current spatial buffer, because we will reindex the
        # source file everytime we reverse, which will slow down the calculations considerably.
        while (not self._spatial_query_object.contains(new_loc.point) or self.intersect(start_point=nudge_loc.point, end_point=new_loc.point)) and count < 6:
            changing_distance /= 2
            new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point)
            new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth)
            count += 1

        # We tried 10 times and the particle was still on shore, return the point the particle started from.
        # No randomization.
        if count == 6:
            logger.warn("LOOK: Could not react particle with shoreline.  Assuming particle did not move from original location")
            return start_point

        #logger.info("Reaction time: %f" % (time.clock() - st))
        return new_loc
Ejemplo n.º 2
0
    def index(self, **kwargs):
        """
            This queries the shapefile around a buffer of a point
            The results of this spatial query are used for shoreline detection.

            Using the entire shapefile without the spatial query takes over
            30 times the time with world land polygons.

        """

        point = kwargs.pop("point", None)
        spatialbuffer = kwargs.pop("spatialbuffer", self._spatialbuffer)

        self._layer.SetSpatialFilter(None)
        self._spatial_query_object = None
        
        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            poly = ogr.CreateGeometryFromWkt(self._spatial_query_object.wkt)
            self._layer.SetSpatialFilter(poly)
            poly.Destroy()

        self._geoms = []
        # The _geoms should be only Polygons, not MultiPolygons
        for element in self._layer:
            try:
                geom = wkb.loads(element.GetGeometryRef().ExportToWkb())
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn("Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
    def index(self, point=None, spatialbuffer=None):
        """
            This queries the shapefile around a buffer of a point
            The results of this spatial query are used for shoreline detection.

            Using the entire shapefile without the spatial query takes over
            30 times the time with world land polygons.

        """
        spatialbuffer = spatialbuffer or self._spatialbuffer
        self._spatial_query_object = None
        geoms                      = []

        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            geoms = self.get_geoms_for_bounds(self._spatial_query_object.envelope.bounds)

        self._geoms = []

        # The _geoms should be only Polygons, not MultiPolygons
        for geom in geoms:
            try:
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn("Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
Ejemplo n.º 4
0
    def index(self, point=None, spatialbuffer=None):
        """
            This queries the shapefile around a buffer of a point
            The results of this spatial query are used for shoreline detection.

            Using the entire shapefile without the spatial query takes over
            30 times the time with world land polygons.

        """
        spatialbuffer = spatialbuffer or self._spatialbuffer
        self._spatial_query_object = None
        geoms = []

        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            geoms = self.get_geoms_for_bounds(
                self._spatial_query_object.envelope.bounds)

        self._geoms = []

        # The _geoms should be only Polygons, not MultiPolygons
        for geom in geoms:
            try:
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn(
                    "Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s"
                    % (str(point), str(spatialbuffer)))
    def run(self):

        while True:

            try:
                next_task = self.task_queue.get(True, 10)
            except queue.Empty:
                logger.info("No tasks left to complete, closing %s" % self.name)
                break
            else:
                answer = (None, None)
                try:
                    answer = (1, next_task(self.active))
                except Exception:
                    logger.exception("Disabling Error")
                    if isinstance(next_task, CachingDataController):
                        answer = (-2, "CachingDataController")
                        # Tell the particles that the CachingDataController is releasing file
                        self.get_data.value = False
                        # The data controller has died, so don't process any more tasks
                        self.active.value = False
                    elif isinstance(next_task, BaseForcer):
                        answer = (-1, next_task.particle)
                    else:
                        logger.warn("Strange task raised an exception: %s" % str(next_task.__class__))
                        answer = (None, None)
                finally:
                    self.result_queue.put(answer)

                    self.nproc_lock.acquire()
                    self.n_run.value = self.n_run.value - 1
                    self.nproc_lock.release()

                    self.task_queue.task_done()
Ejemplo n.º 6
0
 def load_initial_dataset(self):
     """
     Initialize self.dataset, then close it
     A cacher will have to wrap this in locks, while a straight runner will not.
     """
     try:
         self.dataset = CommonDataset.open(self.hydrodataset)
         if self.timevar is None:
             self.timevar = self.dataset.gettimevar(self.common_variables.get("u"))
     except Exception:
         logger.warn("No source dataset: %s.  Particle exiting" % self.hydrodataset)
         raise
Ejemplo n.º 7
0
 def load_initial_dataset(self):
     """
     Initialize self.dataset, then close it
     A cacher will have to wrap this in locks, while a straight runner will not.
     """
     try:
         self.dataset = CommonDataset.open(self.hydrodataset)
         if self.timevar is None:
             self.timevar = self.dataset.gettimevar(
                 self.common_variables.get("u"))
     except Exception:
         logger.warn("No source dataset: %s.  Particle exiting" %
                     self.hydrodataset)
         raise
Ejemplo n.º 8
0
    def attempt(self, particle, depth):

        # We may want to have settlement affect the u/v/w in the future
        u = 0
        v = 0
        w = 0

        # If the particle is settled, don't move it anywhere
        if particle.settled:
            return (0, 0, 0)

        # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface.
        # We are assuming here that the bathymetry is also negative down.

        if self.type.lower() == "benthic":
            # Is the sea floor within the upper and lower bounds?
            if self.upper >= depth >= self.lower:
                # Move the particle to the sea floor.
                # TODO: Should the particle just swim downwards?
                newloc = Location4D(location=particle.location)
                newloc.depth = depth
                particle.location = newloc
                particle.settle()
                logger.info("Particle %d settled in %s mode" %
                            (particle.uid, self.type))
        elif self.type.lower() == "pelagic":
            # Are we are in enough water to settle
            # Ignore this bathymetry test since we would need a high resolution
            # dataset for this to work.
            #if self.upper >= depth:

            # Is the particle within the range?
            if self.upper >= particle.location.depth >= self.lower:
                # Just settle the particle
                particle.settle()
                logger.info("Particle %d settled in %s mode" %
                            (particle.uid, self.type))
            else:
                logger.debug(
                    "Particle did NOT settle.  Depth conditions not met.  Upper limit: %d - Lower limit: %d - Particle: %d"
                    % (self.upper, self.lower, particle.location.depth))

            #else:
            #    logger.info("Particle did NOT settle.  Water not deep enough.  Upper limit: %d - Bathymetry: %d" % (self.upper, depth))
        else:
            logger.warn(
                "Settlement type %s not recognized, not trying to settle Particle %d."
                % (self.type, particle.uid))

        return (u, v, w)
 def react(self, **kwargs):
     """
         Bounce off of a shoreline
         feature = Linestring of two points, being the line segment the particle hit.
         angle = decimal degrees from 0 (x-axis), couter-clockwise (math style)
     """
     if self._type == "bounce":
         logger.warn("This shoreline type is NOT SUPPORTED and is broken")
         return self.__bounce(**kwargs)
     elif self._type == "reverse":
         return self.__reverse(**kwargs)
     else:
         logger.warn("Not reacting to shoreline (sticky with inifinite concentration)")
         return kwargs.get('hit_point')
Ejemplo n.º 10
0
 def react(self, **kwargs):
     """
         Bounce off of a shoreline
         feature = Linestring of two points, being the line segment the particle hit.
         angle = decimal degrees from 0 (x-axis), couter-clockwise (math style)
     """
     if self._type == "bounce":
         logger.warn("This shoreline type is NOT SUPPORTED and is broken")
         return self.__bounce(**kwargs)
     elif self._type == "reverse":
         return self.__reverse(**kwargs)
     else:
         logger.warn(
             "Not reacting to shoreline (sticky with inifinite concentration)"
         )
         return kwargs.get('hit_point')
Ejemplo n.º 11
0
 def load_initial_dataset(self):
     """
     Initialize self.dataset, then close it
     A cacher will have to wrap this in locks, while a straight runner will not.
     """
     try:
         with self.read_lock:
             self.read_count.value += 1
             self.has_read_lock.append(os.getpid())
         self.dataset = CommonDataset.open(self.hydrodataset)
         self.dataset.closenc()
     except Exception:
         logger.warn("No source dataset: %s.  Particle exiting" % self.hydrodataset)
         raise
     finally:
         with self.read_lock:
             self.read_count.value -= 1
             self.has_read_lock.remove(os.getpid())
Ejemplo n.º 12
0
    def attempt(self, particle, depth):

        # We may want to have settlement affect the u/v/w in the future
        u = 0
        v = 0
        w = 0

        # If the particle is settled, don't move it anywhere
        if particle.settled:
            return (0,0,0)

        # A particle is negative down from the sea surface, so "-3" is 3 meters below the surface.
        # We are assuming here that the bathymetry is also negative down.

        if self.type.lower() == "benthic":
            # Is the sea floor within the upper and lower bounds?
            if self.upper >= depth >= self.lower:
                # Move the particle to the sea floor.
                # TODO: Should the particle just swim downwards?
                newloc = Location4D(location=particle.location)
                newloc.depth = depth
                particle.location = newloc
                particle.settle()
                logger.info("Particle %d settled in %s mode" % (particle.uid, self.type))
        elif self.type.lower() == "pelagic":
            # Are we are in enough water to settle
            # Ignore this bathymetry test since we would need a high resolution 
            # dataset for this to work.
            #if self.upper >= depth:

            # Is the particle within the range?
            if self.upper >= particle.location.depth >= self.lower:
                # Just settle the particle
                particle.settle()
                logger.info("Particle %d settled in %s mode" % (particle.uid, self.type))
            else:
                logger.debug("Particle did NOT settle.  Depth conditions not met.  Upper limit: %d - Lower limit: %d - Particle: %d" % (self.upper, self.lower, particle.location.depth))
                
            #else:
            #    logger.info("Particle did NOT settle.  Water not deep enough.  Upper limit: %d - Bathymetry: %d" % (self.upper, depth))
        else:
            logger.warn("Settlement type %s not recognized, not trying to settle Particle %d." % (self.type, particle.uid))

        return (u,v,w)
Ejemplo n.º 13
0
 def load_initial_dataset(self):
     """
     Initialize self.dataset, then close it
     A cacher will have to wrap this in locks, while a straight runner will not.
     """
     try:
         with self.read_lock:
             self.read_count.value += 1
             self.has_read_lock.append(os.getpid())
         self.dataset = CommonDataset.open(self.hydrodataset)
         self.dataset.closenc()
     except Exception:
         logger.warn("No source dataset: %s.  Particle exiting" %
                     self.hydrodataset)
         raise
     finally:
         with self.read_lock:
             self.read_count.value -= 1
             self.has_read_lock.remove(os.getpid())
Ejemplo n.º 14
0
    def run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        self.setup_run(**kwargs)

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks()
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # This blocks until the tasks are all done.
        self.particles = self.listen_for_results()

        logger.info('Consumers are all finished!')

        logger.info('Cleaning up')
        self.cleanup()

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:

                logger.progress((96, "Exporting results"))

                # Make sure output_path is also included
                if kwargs.get("output_path", None) is not None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.exception("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')
                else:
                    logger.warn('No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if self.error_code == -2:
                raise BaseDataControllerError("Error in the BaseDataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((97, "Model Run Complete"))
        return self.particles
Ejemplo n.º 15
0
    def __reverse(self, **kwargs):
        """
            Reverse particle just off of the shore in the direction that it came in.
            Adds a slight random factor to the distance and angle it is reversed in.
        """
        start_point = kwargs.pop('start_point')
        hit_point = kwargs.pop('hit_point')
        distance = kwargs.pop('distance')
        azimuth = kwargs.pop('azimuth')
        reverse_azimuth = kwargs.pop('reverse_azimuth')
        reverse_distance = kwargs.get('reverse_distance', None)
        if reverse_distance is None:
            reverse_distance = 100

        # Randomize the reverse angle slightly (+/- 5 degrees)
        random_azimuth = reverse_azimuth + AsaRandom.random() * 5

        # Nudge the hitpoint off of the shore by a tiny bit to test shoreline intersection in while loop.
        nudged_hit_point = AsaGreatCircle.great_circle(distance=0.01, azimuth=random_azimuth, start_point=hit_point)
        nudged_hit_location = Location4D(latitude=nudged_hit_point['latitude'], longitude=nudged_hit_point['longitude'], depth=start_point.depth)

        new_point = AsaGreatCircle.great_circle(distance=reverse_distance, azimuth=random_azimuth, start_point=hit_point)
        new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth)

        # Keep trying to throw particle back, halfing the distance each time until it is in water.
        # Only half it 10 times before giving up and returning the point which the particle came from.
        count = 0
        # Distance amount to half each iteration
        changing_distance = reverse_distance
        while self.intersect(start_point=nudged_hit_location.point, end_point=new_loc.point) and count < 10:
            changing_distance /= 2
            new_point = AsaGreatCircle.great_circle(distance=changing_distance, azimuth=random_azimuth, start_point=hit_point)
            new_loc = Location4D(latitude=new_point['latitude'], longitude=new_point['longitude'], depth=start_point.depth)
            count += 1

        # We tried 10 times and the particle was still on shore, return the point the particle started from.
        # No randomization.
        if count == 10:
            logger.warn("Could not react particle with shoreline.  Assuming particle did not move from original location")
            new_loc = start_point

        return new_loc
    def index(self, point=None, spatialbuffer=None):
        spatialbuffer              = spatialbuffer or self._spatialbuffer
        self._spatial_query_object = None
        geoms                      = []

        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            bounds                     = point.buffer(spatialbuffer).envelope.wkt
            geoms                      = self.get_geoms_for_bounds(bounds)

        self._geoms = []

        for geom in geoms:
            try:
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn("Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s" % (str(point), str(spatialbuffer)))
Ejemplo n.º 17
0
    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'),
                                          'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener",
                                  target=self.listen_for_results,
                                  args=(output_h5_file,
                                        self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count(
            ))  # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn(
                        'The output_formats parameter should be a list, not saving any output!'
                    )
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn(
                'No output_formats parameter was defined, not saving any output!'
            )

        logger.progress((97, "Model Run Complete"))

        return
Ejemplo n.º 18
0
    def index(self, point=None, spatialbuffer=None):
        spatialbuffer = spatialbuffer or self._spatialbuffer
        self._spatial_query_object = None
        geoms = []

        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            bounds = point.buffer(spatialbuffer).envelope.wkt
            geoms = self.get_geoms_for_bounds(bounds)

        self._geoms = []

        for geom in geoms:
            try:
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn(
                    "Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s"
                    % (str(point), str(spatialbuffer)))
    def run(self):

        while True:

            try:
                next_task = self.task_queue.get(True, 10)
            except Queue.Empty:
                logger.info("No tasks left to complete, closing %s" % self.name)
                break
            else:
                answer = (None, None)
                try:
                    answer = (1, next_task(self.name, self.active))
                except Exception as detail:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    logger.error("Disabling Error: " +\
                                 repr(traceback.format_exception(exc_type, exc_value,
                                              exc_traceback)))
                    if isinstance(next_task, DataController):
                        answer = (-2, "DataController")
                        # Tell the particles that the DataController is releasing file
                        self.get_data.value = False
                        # The data controller has died, so don't process any more tasks
                        self.active.value = False
                    elif isinstance(next_task, ForceParticle):
                        answer = (-1, next_task.part)
                    else:
                        logger.warn("Strange task raised an exception: %s" % str(next_task.__class__))
                        answer = (None, None)
                finally:
                    self.result_queue.put(answer)

                    self.nproc_lock.acquire()
                    self.n_run.value = self.n_run.value - 1
                    self.nproc_lock.release()

                    self.task_queue.task_done()
Ejemplo n.º 20
0
    def run(self):

        while True:

            try:
                next_task = self.task_queue.get(True, 10)
            except queue.Empty:
                logger.info("No tasks left to complete, closing %s" %
                            self.name)
                break
            else:
                answer = (None, None)
                try:
                    answer = (1, next_task(self.active))
                except Exception:
                    logger.exception("Disabling Error")
                    if isinstance(next_task, CachingDataController):
                        answer = (-2, "CachingDataController")
                        # Tell the particles that the CachingDataController is releasing file
                        self.get_data.value = False
                        # The data controller has died, so don't process any more tasks
                        self.active.value = False
                    elif isinstance(next_task, BaseForcer):
                        answer = (-1, next_task.particle)
                    else:
                        logger.warn("Strange task raised an exception: %s" %
                                    str(next_task.__class__))
                        answer = (None, None)
                finally:
                    self.result_queue.put(answer)

                    self.nproc_lock.acquire()
                    self.n_run.value = self.n_run.value - 1
                    self.nproc_lock.release()

                    self.task_queue.task_done()
Ejemplo n.º 21
0
    def index(self, **kwargs):
        """
            This queries the shapefile around a buffer of a point
            The results of this spatial query are used for shoreline detection.

            Using the entire shapefile without the spatial query takes over
            30 times the time with world land polygons.

        """

        point = kwargs.pop("point", None)
        spatialbuffer = kwargs.pop("spatialbuffer", self._spatialbuffer)

        self._layer.SetSpatialFilter(None)
        self._spatial_query_object = None

        if point:
            self._spatial_query_object = point.buffer(spatialbuffer)
            poly = ogr.CreateGeometryFromWkt(self._spatial_query_object.wkt)
            self._layer.SetSpatialFilter(poly)
            poly.Destroy()

        self._geoms = []
        # The _geoms should be only Polygons, not MultiPolygons
        for element in self._layer:
            try:
                geom = wkb.loads(element.GetGeometryRef().ExportToWkb())
                if isinstance(geom, Polygon):
                    self._geoms.append(geom)
                elif isinstance(geom, MultiPolygon):
                    for poly in geom:
                        self._geoms.append(poly)
            except:
                logger.warn(
                    "Could not find valid geometry in shoreline element.  Point: %s, Buffer: %s"
                    % (str(point), str(spatialbuffer)))
Ejemplo n.º 22
0
    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count())    # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn('The output_formats parameter should be a list, not saving any output!')
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn('No output_formats parameter was defined, not saving any output!')

        logger.progress((97, "Model Run Complete"))

        return
Ejemplo n.º 23
0
    def __reverse(self, **kwargs):
        """
            Reverse particle just off of the shore in the direction that it came in.
            Adds a slight random factor to the distance and angle it is reversed in.
        """

        #st = time.clock()

        start_point = kwargs.pop('start_point')
        hit_point = kwargs.pop('hit_point')
        reverse_azimuth = kwargs.pop('reverse_azimuth')
        reverse_distance = kwargs.get('reverse_distance', None)
        if reverse_distance is None:
            reverse_distance = 100

        # Randomize the reverse angle slightly (+/- 5 degrees)
        random_azimuth = reverse_azimuth + AsaRandom.random() * 5

        count = 0
        nudge_distance = 0.01
        nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance,
                                                  azimuth=reverse_azimuth,
                                                  start_point=hit_point)
        nudge_loc = Location4D(latitude=nudge_point['latitude'],
                               longitude=nudge_point['longitude'],
                               depth=start_point.depth)

        # Find point just offshore to do testing with.  Try 15 times (~350m).  This makes sure the start_point is in the water
        # for the next call to intersect (next while loop).
        while self.intersect(single_point=nudge_loc.point) and count < 16:
            nudge_distance *= 2
            nudge_point = AsaGreatCircle.great_circle(distance=nudge_distance,
                                                      azimuth=reverse_azimuth,
                                                      start_point=hit_point)
            nudge_loc = Location4D(latitude=nudge_point['latitude'],
                                   longitude=nudge_point['longitude'],
                                   depth=start_point.depth)
            count += 1

        # We tried 16 times and couldn't find a point.  This should totally never happen.
        if count == 16:
            logger.warn(
                "LOOK: Could not find location in water to do shoreline calculation with.  Assuming particle did not move from original location"
            )
            return start_point

        # Keep trying to throw particle back, halfing the distance each time until it is in water.
        # Only half it 6 times before giving up and returning the point which the particle came from.
        count = 0
        # Distance amount to half each iteration
        changing_distance = reverse_distance
        new_point = AsaGreatCircle.great_circle(distance=reverse_distance,
                                                azimuth=random_azimuth,
                                                start_point=hit_point)
        new_loc = Location4D(latitude=new_point['latitude'],
                             longitude=new_point['longitude'],
                             depth=start_point.depth)

        # We don't want to reverse further than the current spatial buffer, because we will reindex the
        # source file everytime we reverse, which will slow down the calculations considerably.
        while (not self._spatial_query_object.contains(new_loc.point)
               or self.intersect(start_point=nudge_loc.point,
                                 end_point=new_loc.point)) and count < 6:
            changing_distance /= 2
            new_point = AsaGreatCircle.great_circle(distance=changing_distance,
                                                    azimuth=random_azimuth,
                                                    start_point=hit_point)
            new_loc = Location4D(latitude=new_point['latitude'],
                                 longitude=new_point['longitude'],
                                 depth=start_point.depth)
            count += 1

        # We tried 10 times and the particle was still on shore, return the point the particle started from.
        # No randomization.
        if count == 6:
            logger.warn(
                "LOOK: Could not react particle with shoreline.  Assuming particle did not move from original location"
            )
            return start_point

        #logger.info("Reaction time: %f" % (time.clock() - st))
        return new_loc
Ejemplo n.º 24
0
    def fill_cache_with_linterp_data(self, i, currenttime):
        """
            Method to streamline request for data from cache,
            Uses linear interpolation bewtween timesteps to
            get u,v,w,temp,salt
        """
        if self.active.value is True:
            while self.get_data.value is True:
                logger.debug("Waiting for DataController to release cache file so I can read from it...")
                timer.sleep(2)
                pass

        if self.need_data(i+1):
            # Acquire lock for asking for data
            self.data_request_lock.acquire()
            self.has_data_request_lock.value = os.getpid()
            try:
                # Do I still need data?
                if self.need_data(i+1):

                    # Tell the DataController that we are going to be reading from the file
                    with self.read_lock:
                        self.read_count.value += 1
                        self.has_read_lock.append(os.getpid())

                    # Open netcdf file on disk from commondataset
                    self.dataset.opennc()
                    # Get the indices for the current particle location
                    indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.particle.location )
                    self.dataset.closenc()

                    with self.read_lock:
                        self.read_count.value -= 1
                        self.has_read_lock.remove(os.getpid())

                    # Override the time
                    # get the current time index data
                    self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]]
                    # Request that the data controller update the cache
                    self.get_data.value = True
                    # Wait until the data controller is done
                    if self.active.value is True:
                        while self.get_data.value is True:
                            logger.debug("Waiting for DataController to update cache with the CURRENT time index")
                            timer.sleep(2)
                            pass

                    # Do we still need to get the next timestep?
                    if self.need_data(i+1):
                        # get the next time index data
                        self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]]
                        # Request that the data controller update the cache
                        self.get_data.value = True
                        # Wait until the data controller is done
                        if self.active.value is True:
                            while self.get_data.value is True:
                                logger.debug("Waiting for DataController to update cache with the NEXT time index")
                                timer.sleep(2)
                                pass
            except Exception:
                logger.warn("Particle failed to request data correctly")
                raise
            finally:
                # Release lock for asking for data
                self.has_data_request_lock.value = -1
                self.data_request_lock.release()
Ejemplo n.º 25
0
    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(
                path=self.shoreline_path,
                feature_name=self.shoreline_feature,
                point=self.release_location_centroid,
                spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(
                start_point=self.release_location_centroid,
                end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception(
                    "Could not load Bathymetry file: %s, using no Bathymetry for this run!"
                    % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception(
                "Time indexes are messed up. Need to have equal datetime and time indexes"
            )
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time = {}
        tot_read_data = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel,
                                     json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(
                    i, newtimes[loop_i])
            else:
                logger.warn(
                    "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported."
                )
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(
                    self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle,
                                      u,
                                      v,
                                      w,
                                      modelTimestep[loop_i],
                                      temperature=temp,
                                      salinity=salt,
                                      bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'],
                                    longitude=movement['longitude'],
                                    depth=movement['depth'],
                                    time=newtimes[loop_i + 1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s"
                        % (self.particle.logstring(), movement['distance'],
                           movement['vertical_distance'],
                           model.__class__.__name__,
                           newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(
                        particle=self.particle,
                        starting=self.particle.location,
                        ending=newloc,
                        distance=movement['distance'],
                        angle=movement['angle'],
                        azimuth=movement['azimuth'],
                        reverse_azimuth=movement['reverse_azimuth'],
                        vertical_distance=movement['vertical_distance'],
                        vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - was forced by %s and is now at %s" %
                        (self.particle.logstring(), model.__class__.__name__,
                         self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(
                    self.redis_results_channel,
                    json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(
            textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' %
                            (self.particle.uid, tot_read_data, {
                                s: '{:g} seconds'.format(f)
                                for s, f in list(tot_model_time.items())
                            }, tot_boundary_time)))

        return self.particle
Ejemplo n.º 26
0
    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time    = {}
        tot_read_data     = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.")
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc,
                                              distance=movement['distance'], angle=movement['angle'],
                                              azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'],
                                              vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time)))

        return self.particle
    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0, (self._step * self._nstep) + 1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__),
                                             "_cache")
            temp_name = AsaRandom.filename(prefix=str(
                datetime.now().microsecond),
                                           suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError(
                "Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int', -1)

        nproc_lock = mgr.Lock()

        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)

        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int', -1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" %
                                      hydrodataset)

        # Add data controller to the queue first so that it
        # can get the initial data and is not blocked

        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset,
                                                  common_variables,
                                                  n_run,
                                                  get_data,
                                                  write_lock,
                                                  has_write_lock,
                                                  read_lock,
                                                  read_count,
                                                  time_chunk,
                                                  horiz_chunk,
                                                  times,
                                                  self.start,
                                                  point_get,
                                                  self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks,
                                                    results,
                                                    n_run,
                                                    nproc_lock,
                                                    active,
                                                    get_data,
                                                    name="DataController")
        data_controller_process.start()

        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(
                part,
                hydrodataset,
                common_variables,
                timevar_pickle_path,
                times,
                self.start,
                self._models,
                self.reference_location.point,
                self._use_bathymetry,
                self._use_shoreline,
                self._use_seasurface,
                get_data,
                n_run,
                read_lock,
                has_read_lock,
                read_count,
                point_get,
                data_request_lock,
                has_data_request_lock,
                reverse_distance=self.reverse_distance,
                bathy=self.bathy_path,
                shoreline_path=self.shoreline_path,
                cache=self.cache_path,
                time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [
            parallel.Consumer(tasks,
                              results,
                              n_run,
                              nproc_lock,
                              active,
                              get_data,
                              name="ForceParticle-%d" % i)
            for i in xrange(nproc - 1)
        ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive(
                ) and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks,
                                               results,
                                               n_run,
                                               nproc_lock,
                                               active,
                                               get_data,
                                               name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)

                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass

                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn(
                            "Did not find %s in the list of processes.  Continuing on."
                            % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn(
                        "Started a new consumer (%s) to replace a zombie consumer"
                        % p.name)
                    p.start()

            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn(
                        "DataController has FAILED!!  Removing cache file so the particles fail."
                    )
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug(
                            "Could not remove cache file, it probably never existed"
                        )
                        pass
                elif code == -3:
                    error_code = code
                    logger.info(
                        "A zombie process was caught and task was removed from queue"
                    )
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress(
                        (round((retrieved / number_of_tasks) * 90.,
                               1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,
                                           1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" %
                            (int(retrieved), number_of_tasks))

        if len(return_particles) != len(self.particles):
            logger.warn(
                "Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
            # Wait 10 seconds
            w.join(10.)
            if w.is_alive():
                # Process is hanging, kill it.
                logger.info(
                    "Terminating %s forcefully.  This should have exited itself."
                    % w.name)
                w.terminate()

        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug(
                    "Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" %
                                             format)
                    else:
                        logger.warn(
                            'The output_formats parameter should be a list, not saving any output!'
                        )
                else:
                    logger.warn(
                        'No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return
Ejemplo n.º 28
0
    def listen_for_results(self):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % len(self.particles))
            logger.progress((5, "Running model"))
            while retrieved < self.number_of_tasks:
                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except Queue.Empty:
                    # Poll the active processes to make sure they are all alive and then continue with loop
                    if not self.data_controller_process.is_alive() and self.data_controller_process.exitcode != 0:
                        # Data controller is zombied, kill off other processes.
                        self.get_data.value is False
                        self.results.put((-2, "CachingDataController"))

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "ZombieParticle"))
                            # Decrement nproc (CachingDataController exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                            # Release any locks the PID had
                            if p.pid in self.has_read_lock:
                                with self.read_lock:
                                    self.read_count.value -= 1
                                    self.has_read_lock.remove(p.pid)

                            if self.has_data_request_lock.value == p.pid:
                                self.has_data_request_lock.value = -1
                                try:
                                    self.data_request_lock.release()
                                except:
                                    pass

                            if self.has_write_lock.value == p.pid:
                                self.has_write_lock.value = -1
                                try:
                                    self.write_lock.release()
                                except:
                                    pass

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn("Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -2:
                        self.error_code = code
                        logger.warn("CachingDataController has FAILED!!  Removing cache file so the particles fail.")
                        try:
                            os.remove(self.cache_path)
                        except OSError:
                            logger.debug("Could not remove cache file, it probably never existed")
                            pass
                    elif code == -3:
                        self.error_code = code
                        logger.info("A zombie process was caught and task was removed from queue")
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid))
                    elif tempres == "CachingDataController":
                        logger.info("CachingDataController finished")
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "CachingDataController finished"))
                    else:
                        logger.info("Got a strange result on results queue")
                        logger.info(str(tempres))

                    logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks))

            if len(return_particles) != len(self.particles):
                logger.warn("Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()

            self.particles = return_particles

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs + [self.data_controller_process]:
                    # Wait 20 seconds
                    w.join(20.)
                    if w.is_alive():
                        # Process is hanging, kill it.
                        logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                        w.terminate()
Ejemplo n.º 29
0
def manager(run_id):

    with app.app_context():

        job = get_current_job()

        output_path = os.path.join(current_app.config['OUTPUT_PATH'], run_id)
        shutil.rmtree(output_path, ignore_errors=True)
        os.makedirs(output_path)

        cache_path = os.path.join(current_app.config['CACHE_PATH'], run_id)
        shutil.rmtree(cache_path, ignore_errors=True)
        os.makedirs(cache_path)

        f, log_file = tempfile.mkstemp(dir=cache_path, prefix=run_id, suffix=".log")
        os.close(f)
        os.chmod(log_file, 0644)

        # Set up Logger
        logger = logging.getLogger(run_id)
        handler = FileHandler(log_file)
        handler.setLevel(logging.INFO)
        formatter = logging.Formatter('[%(asctime)s] - %(levelname)s - %(name)s - %(processName)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)

        res = urlparse(current_app.config.get("RESULTS_REDIS_URI"))
        redis_pool = redis.ConnectionPool(host=res.hostname, port=res.port, db=res.path[1:])
        r = redis.Redis(connection_pool=redis_pool)

        run = db.Run.find_one( { '_id' : ObjectId(run_id) } )
        if run is None:
            return "Failed to locate run %s. May have been deleted while task was in the queue?" % run_id

        def listen_for_logs():
            pubsub = r.pubsub()
            pubsub.subscribe("%s:log" % run_id)
            for msg in pubsub.listen():

                if msg['type'] != "message":
                    continue

                if msg["data"] == "FINISHED":
                    break

                try:
                    prog = json.loads(msg["data"])
                    if prog is not None:
                        if prog.get("level", "").lower() == "progress":
                            job.meta["progress"] = float(prog.get("value", job.meta.get("progress", None)))
                            job.meta["message"]  = prog.get("message", job.meta.get("message", ""))
                            job.meta["updated"]  = prog.get("time", datetime.utcnow().replace(tzinfo=pytz.utc).astimezone(pytz.utc))
                            job.save()
                            logger.info("PROGRESS: %(value).2f - %(message)s" % prog)
                        else:
                            getattr(logger, prog["level"].lower())(prog.get("message"))
                except Exception:
                    logger.info("Got strange result: %s" % msg["data"])
                    pass

            pubsub.close()
            sys.exit()

        def listen_for_results(output_h5_file, total_particles):
            # Create output file (hdf5)
            particles_finished = 0
            results = ResultsPyTable(output_h5_file)
            pubsub = r.pubsub()
            pubsub.subscribe("%s:results" % run_id)
            for msg in pubsub.listen():

                if msg['type'] != "message":
                    continue

                if msg["data"] == "FINISHED":
                    break

                try:
                    json_msg = json.loads(msg["data"])
                    if json_msg.get("status", None):
                        #  "COMPLETED" or "FAILED" when a particle finishes
                        particles_finished += 1
                        percent_complete = 90. * (float(particles_finished) / float(total_particles)) + 5  # Add the 5 progress that was used prior to the particles starting (controller)
                        r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : percent_complete, "message" : "Particle #%s %s!" % (particles_finished, json_msg.get("status"))}))
                        if particles_finished == total_particles:
                            break
                    else:
                        # Write to HDF file
                        results.write(json_msg)
                except Exception:
                    logger.info("Got strange result: %s" % msg["data"])
                    pass

            pubsub.close()
            results.compute()
            results.close()
            sys.exit()

        pl = threading.Thread(name="LogListener", target=listen_for_logs)
        pl.daemon = True
        pl.start()

        output_h5_file = os.path.join(output_path, "results.h5")
        rl = threading.Thread(name="ResultListener", target=listen_for_results, args=(output_h5_file, run['particles']))
        rl.daemon = True
        rl.start()

        # Wait for PubSub listening to begin
        time.sleep(1)

        model = None
        try:

            r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : 0, "message" : "Setting up model"}))

            hydropath      = run['hydro_path']
            geometry       = loads(run['geometry'])
            start_depth    = run['release_depth']
            num_particles  = run['particles']
            time_step      = run['timestep']
            num_steps      = int(math.ceil((run['duration'] * 24 * 60 * 60) / time_step))
            start_time     = run['start'].replace(tzinfo = pytz.utc)
            shoreline_path = run['shoreline_path'] or app.config.get("SHORE_PATH")
            shoreline_feat = run['shoreline_feature']

            # Setup Models
            models = []
            if run['cached_behavior'] is not None and run['cached_behavior'].get('results', None) is not None:
                behavior_data = run['cached_behavior']['results'][0]
                l = LarvaBehavior(data=behavior_data)
                models.append(l)
            models.append(Transport(horizDisp=run['horiz_dispersion'], vertDisp=run['vert_dispersion']))

            model = DistributedModelController(geometry=geometry,
                                               depth=start_depth,
                                               start=start_time,
                                               step=time_step,
                                               nstep=num_steps,
                                               npart=num_particles,
                                               models=models,
                                               use_bathymetry=True,
                                               bathy_path=current_app.config['BATHY_PATH'],
                                               use_shoreline=True,
                                               time_method=run['time_method'],
                                               shoreline_path=shoreline_path,
                                               shoreline_feature=shoreline_feat,
                                               shoreline_index_buffer=0.05)

            model.setup_run(hydropath, output_formats=["redis"], redis_url=current_app.config.get("RESULTS_REDIS_URI"), redis_results_channel="%s:results" % run_id, redis_log_channel="%s:log" % run_id)

        except Exception as exception:
            logger.warn("Run failed to initialize, cleaning up.")
            logger.warn(exception.message)
            job.meta["outcome"] = "failed"
            job.save()
            raise

        try:
            r.publish("%s:log" % run_id, json.dumps({"time" : datetime.utcnow().isoformat(), "level" : "progress", "value" : 4, "message" : "Adding particles to queue"}))
            for part in model.particles:
                particle_queue.enqueue_call(func=particle, args=(hydropath, part, model,))

        except Exception, exception:
            logger.warn("Failed to start particles, cleaning up.")
            logger.warn(exception.message)
            r.publish("%s:results" % run_id, "FINISHED")
            job.meta["outcome"] = "failed"
            job.save()
            raise

        finally:
Ejemplo n.º 30
0
    def fill_cache_with_linterp_data(self, i, currenttime):
        """
            Method to streamline request for data from cache,
            Uses linear interpolation bewtween timesteps to
            get u,v,w,temp,salt
        """
        if self.active.value is True:
            while self.get_data.value is True:
                logger.debug(
                    "Waiting for DataController to release cache file so I can read from it..."
                )
                timer.sleep(2)
                pass

        if self.need_data(i + 1):
            # Acquire lock for asking for data
            self.data_request_lock.acquire()
            self.has_data_request_lock.value = os.getpid()
            try:
                # Do I still need data?
                if self.need_data(i + 1):

                    # Tell the DataController that we are going to be reading from the file
                    with self.read_lock:
                        self.read_count.value += 1
                        self.has_read_lock.append(os.getpid())

                    # Open netcdf file on disk from commondataset
                    self.dataset.opennc()
                    # Get the indices for the current particle location
                    indices = self.dataset.get_indices(
                        'u',
                        timeinds=[np.asarray([i - 1])],
                        point=self.particle.location)
                    self.dataset.closenc()

                    with self.read_lock:
                        self.read_count.value -= 1
                        self.has_read_lock.remove(os.getpid())

                    # Override the time
                    # get the current time index data
                    self.point_get.value = [
                        indices[0] + 1, indices[-2], indices[-1]
                    ]
                    # Request that the data controller update the cache
                    self.get_data.value = True
                    # Wait until the data controller is done
                    if self.active.value is True:
                        while self.get_data.value is True:
                            logger.debug(
                                "Waiting for DataController to update cache with the CURRENT time index"
                            )
                            timer.sleep(2)
                            pass

                    # Do we still need to get the next timestep?
                    if self.need_data(i + 1):
                        # get the next time index data
                        self.point_get.value = [
                            indices[0] + 2, indices[-2], indices[-1]
                        ]
                        # Request that the data controller update the cache
                        self.get_data.value = True
                        # Wait until the data controller is done
                        if self.active.value is True:
                            while self.get_data.value is True:
                                logger.debug(
                                    "Waiting for DataController to update cache with the NEXT time index"
                                )
                                timer.sleep(2)
                                pass
            except Exception:
                logger.warn("Particle failed to request data correctly")
                raise
            finally:
                # Release lock for asking for data
                self.has_data_request_lock.value = -1
                self.data_request_lock.release()
    def __call__(self, proc, active):

        self.active = active

        if self.usebathy == True:
            self._bathymetry = Bathymetry(file=self.bathy)
        
        self._shoreline = None  
        if self.useshore == True:
            self._shoreline = Shoreline(file=self.shoreline_path, point=self.release_location_centroid, spatialbuffer=0.25)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)
            
        self.proc = proc
        part = self.part
        
        if self.active.value == True:
            while self.get_data.value == True:
                logger.debug("Waiting for DataController to start...")
                timer.sleep(10)
                pass

        # Initialize commondataset of local cache, then
        # close the related netcdf file
        try:
            with self.read_lock:
                self.read_count.value += 1
                self.has_read_lock.append(os.getpid())
            self.dataset = CommonDataset.open(self.localpath)
            self.dataset.closenc()
        except StandardError:
            logger.warn("No cache file: %s.  Particle exiting" % self.localpath)
            raise
        finally:
            with self.read_lock:
                self.read_count.value -= 1
                self.has_read_lock.remove(os.getpid())

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        # Load Timevar from pickle serialization
        f = open(self.timevar_pickle_path,"rb")
        timevar = pickle.load(f)
        f.close()

        if self.time_method == 'interp':
            time_indexs = timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.error("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active.value == False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            # if need a time that is outside of what we have
            #if self.active.value == True:
            #    while self.get_data.value == True:
            #        logger.info("Waiting for DataController to get out...")
            #        timer.sleep(4)
            #        pass
                
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i])
            elif self.time_method == 'interp': 
                u, v, w, temp, salt = self.data_interp(i, timevar, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt not supported!")

            #logger.info("U: %.4f, V: %.4f, W: %.4f" % (u,v,w))
            #logger.info("Temp: %.4f, Salt: %.4f" % (temp,salt))

            # Get the bathy value at the particles location
            if self.usebathy == True:
                bathymetry_value = self._bathymetry.get_depth(part.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            part.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                movement = model.move(part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    self.boundary_interaction(particle=part, starting=part.location, ending=newloc,
                        distance=movement['distance'], angle=movement['angle'], 
                        azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'], 
                        vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                logger.debug("%s - was forced by %s and is now at %s" % (part.logstring(), model.__class__.__name__, part.location.logstring()))

            part.note = part.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            part.save()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        part.fill_environment_gap()

        if self.usebathy == True:
            self._bathymetry.close()

        if self.useshore == True:
            self._shoreline.close()

        return part
    def data_interp(self, i, timevar, currenttime):
        """
            Method to streamline request for data from cache,
            Uses linear interpolation bewtween timesteps to
            get u,v,w,temp,salt
        """
        if self.active.value == True:
            while self.get_data.value == True:
                logger.debug("Waiting for DataController to release cache file so I can read from it...")
                timer.sleep(4)
                pass

        if self.need_data(i+1):
            # Acquire lock for asking for data
            self.data_request_lock.acquire()
            self.has_data_request_lock.value = os.getpid()
            try:
                # Do I still need data?
                if self.need_data(i+1):

                    # Tell the DataController that we are going to be reading from the file
                    with self.read_lock:
                        self.read_count.value += 1
                        self.has_read_lock.append(os.getpid())

                    # Open netcdf file on disk from commondataset
                    self.dataset.opennc()
                    # Get the indices for the current particle location
                    indices = self.dataset.get_indices('u', timeinds=[np.asarray([i-1])], point=self.part.location )
                    self.dataset.closenc()

                    with self.read_lock:
                        self.read_count.value -= 1
                        self.has_read_lock.remove(os.getpid())
                    
                    # Override the time
                    # get the current time index data
                    self.point_get.value = [indices[0] + 1, indices[-2], indices[-1]]
                    # Request that the data controller update the cache
                    self.get_data.value = True
                    # Wait until the data controller is done
                    if self.active.value == True:
                        while self.get_data.value == True:
                            logger.debug("Waiting for DataController to update cache with the CURRENT time index")
                            timer.sleep(4)
                            pass 

                    # get the next time index data
                    self.point_get.value = [indices[0] + 2, indices[-2], indices[-1]]
                    # Request that the data controller update the cache
                    self.get_data.value = True
                    # Wait until the data controller is done
                    if self.active.value == True:
                        while self.get_data.value == True:
                            logger.debug("Waiting for DataController to update cache with the NEXT time index")
                            timer.sleep(4)
                            pass
            except StandardError:
                logger.warn("Particle failed to request data correctly")
                raise
            finally:
                # Release lock for asking for data
                self.has_data_request_lock.value = -1
                self.data_request_lock.release()
                

        # Tell the DataController that we are going to be reading from the file
        with self.read_lock:
            self.read_count.value += 1
            self.has_read_lock.append(os.getpid())

        try:
            # Open netcdf file on disk from commondataset
            self.dataset.opennc()

            # Grab data at time index closest to particle location
            u = [np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location ))),
                 np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location )))]
            v = [np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location ))),
                 np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location )))]
            # if there is vertical velocity inthe dataset, get it
            if 'w' in self.dataset.nc.variables:
                w = [np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location ))),
                    np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location )))]
            else:
                w = [0.0, 0.0]
            # If there is salt and temp in the dataset, get it
            if self.temp_name != None and self.salt_name != None:
                temp = [np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location ))),
                        np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location )))]
                salt = [np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location ))),
                        np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location )))]
            
            # Check for nans that occur in the ocean (happens because
            # of model and coastline resolution mismatches)
            if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any():
                # Take the mean of the closest 4 points
                # If this includes nan which it will, result is nan
                uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.part.location, num=2)
                varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.part.location, num=2)
                uarray2 = self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.part.location, num=2)
                varray2 = self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.part.location, num=2)
                if 'w' in self.dataset.nc.variables:
                    warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.part.location, num=2)
                    warray2 = self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.part.location, num=2)
                    w = [warray1.mean(), warray2.mean()]
                else:
                    w = [0.0, 0.0]
                    
                if self.temp_name != None and self.salt_name != None:
                    temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.part.location, num=2)
                    saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.part.location, num=2)
                    temparray2 = self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.part.location, num=2)
                    saltarray2 = self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.part.location, num=2)
                    temp = [temparray1.mean(), temparray2.mean()]
                    salt = [saltarray1.mean(), saltarray2.mean()]
                u = [uarray1.mean(), uarray2.mean()]
                v = [varray1.mean(), varray2.mean()]             
            
            # Linear interp of data between timesteps
            currenttime = date2num(currenttime)
            timevar = timevar.datenum
            u = self.linterp(timevar[i:i+2], u, currenttime)
            v = self.linterp(timevar[i:i+2], v, currenttime)
            w = self.linterp(timevar[i:i+2], w, currenttime)
            if self.temp_name != None and self.salt_name != None:
                temp = self.linterp(timevar[i:i+2], temp, currenttime)
                salt = self.linterp(timevar[i:i+2], salt, currenttime)
            
            if self.temp_name is None:
                temp = np.nan
            if self.salt_name is None:
                salt = np.nan

            #logger.info(self.dataset.get_xyind_from_point('u', self.part.location, num=1))

        except StandardError:
            logger.error("Error in data_interp method on ForceParticle")
            raise
        finally:
            self.dataset.closenc()
            with self.read_lock:
                self.read_count.value -= 1
                self.has_read_lock.remove(os.getpid())

        return u, v, w, temp, salt
    def __call__(self, proc, active):
        c = 0
        
        self.dataset = CommonDataset.open(self.url)
        self.proc = proc
        self.remote = self.dataset.nc
        cachepath = self.cache_path
        
        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')
        
        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max()+1)
        
        # While there is at least 1 particle still running, 
        # stay alive, if not break
        while self.n_run.value > 1:
            logger.debug("Particles are still running, waiting for them to request data...")
            timer.sleep(2)
            # If particle asks for data, do the following
            if self.get_data.value == True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug("Waiting for write lock on cache file (particles must stop reading)...")
                        self.read_lock.release()
                        timer.sleep(4)
                    else:
                        break
                    
                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(cachepath, 'w')

                        indices = self.dataset.get_indices(self.uname, timeinds=[np.asarray([0])], point=self.start)
                        self.point_get.value = [self.inds[0], indices[-2], indices[-1]]
                        
                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)
                        
                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan. 
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[self.uname].missing_value
                        except Exception:
                            fill = np.nan
                        
                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to 
                        #   ask cache to update
                        domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0)
                        domain.coordinates = coordinates
                                
                        # Create local u and v variables
                        u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill)
                        v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill)
                        
                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [u, v,]
                        remotevars = [self.remote.variables[self.uname], self.remote.variables[self.vname]]
                        
                        # Create local w variable
                        if self.wname != None:
                            w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(self.remote.variables[self.wname])

                        if self.temp_name != None and self.salt_name != None: 
                            # Create local temp and salt vars       
                            temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill)
                            salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(self.remote.variables[self.temp_name])
                            remotevars.append(self.remote.variables[self.salt_name])
                        
                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon', 'f', ("x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat', 'f', ("y"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]                           
                            
                        # Create local z variable
                        if self.zname != None:            
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable('z', 'f', ("time","level","y","x"), zlib=False)  
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable('z', 'f', ("level","y","x"), zlib=False)
                                z[:] = self.remote.variables[self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim ==1:
                                z = self.local.createVariable('z', 'f', ("level",), zlib=False)
                                z[:] = self.remote.variables[self.zname][:]
                                
                        # Create local time variable
                        time = self.local.createVariable('time', 'f8', ("time",), zlib=False)
                        if self.tname != None:
                            time[:] = self.remote.variables[self.tname][self.inds]
                        
                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size)
                        
                        # Get data from remote dataset and add
                        # to local cache  
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except:
                                logger.warn("DataController failed to get remote data.  Trying again in 30 seconds")
                                timer.sleep(30)
                            else:
                                break
                        
                        c += 1
                    except StandardError:
                        logger.error("DataController failed to get data (first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(cachepath, 'a')
                        
                        # Create local and remote variable objects
                        # for the variables of interest  
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]
                        
                        # Create lists of variable objects for
                        # the data updater
                        localvars = [u, v, ]
                        remotevars = [remoteu, remotev, ]
                        if self.salt_name != None and self.temp_name != None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname != None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname != None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname != None:
                            remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]
                        
                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0],self.point_get.value[0] + self.time_size)
                        
                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except:
                                logger.warn("DataController failed to get remote data.  Trying again in 30 seconds")
                                timer.sleep(30)
                            else:
                                break
                        
                        c += 1
                    except StandardError:
                        logger.error("DataController failed to get data (not first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
            else:
                pass        

        self.dataset.closenc()

        return "DataController"
Ejemplo n.º 34
0
    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0,(self._step*self._nstep)+1,self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache")
            temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)
        
        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in 
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError("Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int',-1)

        nproc_lock = mgr.Lock()
        
        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)
        
        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int',-1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)
        
        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset)


        # Add data controller to the queue first so that it 
        # can get the initial data and is not blocked
        
        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count,
                                                  time_chunk, horiz_chunk, times,
                                                  self.start, point_get, self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController")
        data_controller_process.start()
        
        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(part,
                                        hydrodataset,
                                        common_variables,
                                        timevar_pickle_path,
                                        times,
                                        self.start,
                                        self._models,
                                        self.reference_location.point,
                                        self._use_bathymetry,
                                        self._use_shoreline,
                                        self._use_seasurface,
                                        get_data,
                                        n_run,
                                        read_lock,
                                        has_read_lock,
                                        read_count,
                                        point_get,
                                        data_request_lock,
                                        has_data_request_lock,
                                        reverse_distance=self.reverse_distance,
                                        bathy=self.bathy_path,
                                        shoreline_path=self.shoreline_path,
                                        shoreline_feature=self.shoreline_feature,
                                        cache=self.cache_path,
                                        time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i)
                  for i in xrange(nproc - 1) ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive() and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)
                        
                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass
                            
                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass
                            

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                    p.start()
                
            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn("DataController has FAILED!!  Removing cache file so the particles fail.")
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug("Could not remove cache file, it probably never existed")
                        pass
                elif code == -3:
                    error_code = code
                    logger.info("A zombie process was caught and task was removed from queue")
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks))
        
        if len(return_particles) != len(self.particles):
            logger.warn("Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
                # Wait 10 seconds
                w.join(10.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                    w.terminate()
                    
        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug("Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')  
                else:
                    logger.warn('No output path defined, not saving any output!')  
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return
Ejemplo n.º 35
0
def run(run_id):

    # Sleep to give the Run object enough time to save
    time.sleep(10)

    with app.app_context():
        from paegan.logger import logger

        job = get_current_job()

        output_path = os.path.join(current_app.config['OUTPUT_PATH'], run_id)
        shutil.rmtree(output_path, ignore_errors=True)
        os.makedirs(output_path)

        cache_path = os.path.join(current_app.config['CACHE_PATH'], run_id)
        shutil.rmtree(cache_path, ignore_errors=True)
        os.makedirs(cache_path)

        temp_animation_path = os.path.join(current_app.config['OUTPUT_PATH'], "temp_images_" + run_id)
        shutil.rmtree(temp_animation_path, ignore_errors=True)
        os.makedirs(temp_animation_path)

        # Set up Logger
        queue = multiprocessing.Queue(-1)

        f, log_file = tempfile.mkstemp(dir=cache_path, prefix=run_id, suffix=".log")
        os.close(f)

        # Close any existing handlers
        (hand.close() for hand in logger.handlers)
        # Remove any existing handlers
        logger.handlers = []
        logger.setLevel(logging.PROGRESS)
        handler = MultiProcessingLogHandler(log_file, queue)
        handler.setLevel(logging.PROGRESS)
        formatter = logging.Formatter('[%(asctime)s] - %(levelname)s - %(name)s - %(processName)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)

        # Progress stuff.  Hokey!
        progress_deque = collections.deque(maxlen=1)
        progress_handler = ProgressHandler(progress_deque)
        progress_handler.setLevel(logging.PROGRESS)
        logger.addHandler(progress_handler)

        e = threading.Event()

        def save_progress():
            while e.wait(5) is not True:
                try:
                    record = progress_deque.pop()
                    if record == StopIteration:
                        break

                    job.meta["updated"] = record[0]
                    if record is not None and record[1] >= 0:
                        job.meta["progress"] = record[1]
                    if isinstance(record[2], unicode) or isinstance(record[2], str):
                        job.meta["message"] = record[2]

                    job.save()
                except IndexError:
                    pass
                except Exception:
                    raise
            return

        t = threading.Thread(name="ProgressUpdater", target=save_progress)
        t.daemon = True
        t.start()

        model = None

        try:

            logger.progress((0, "Configuring model"))

            run = db.Run.find_one( { '_id' : ObjectId(run_id) } )
            if run is None:
                return "Failed to locate run %s. May have been deleted while task was in the queue?" % run_id

            geometry       = loads(run['geometry'])
            start_depth    = run['release_depth']
            num_particles  = run['particles']
            time_step      = run['timestep']
            num_steps      = int(math.ceil((run['duration'] * 24 * 60 * 60) / time_step))
            start_time     = run['start'].replace(tzinfo = pytz.utc)
            shoreline_path = run['shoreline_path'] or app.config.get("SHORE_PATH")
            shoreline_feat = run['shoreline_feature']

            # Set up output directory/bucket for run
            output_formats = ['Shapefile', 'NetCDF', 'Trackline']

            # Setup Models
            models = []
            if run['cached_behavior'] is not None and run['cached_behavior'].get('results', None) is not None:
                behavior_data = run['cached_behavior']['results'][0]
                l = LarvaBehavior(data=behavior_data)
                models.append(l)
            models.append(Transport(horizDisp=run['horiz_dispersion'], vertDisp=run['vert_dispersion']))

            # Setup ModelController
            model = ModelController(geometry=geometry, depth=start_depth, start=start_time, step=time_step, nstep=num_steps, npart=num_particles, models=models, use_bathymetry=True, use_shoreline=True,
                                    time_chunk=run['time_chunk'], horiz_chunk=run['horiz_chunk'], time_method=run['time_method'], shoreline_path=shoreline_path, shoreline_feature=shoreline_feat, reverse_distance=1500)

            # Run the model
            cache_file = os.path.join(cache_path, run_id + ".nc.cache")
            bathy_file = current_app.config['BATHY_PATH']

            model.run(run['hydro_path'], output_path=output_path, bathy=bathy_file, output_formats=output_formats, cache=cache_file, remove_cache=False, caching=run['caching'])

            # Skip creating movie output_path
            """
            from paegan.viz.trajectory import CFTrajectory

            logger.info("Creating animation...")
            for filename in os.listdir(output_path):
                if os.path.splitext(filename)[1][1:] == "nc":
                    # Found netCDF file
                    netcdf_file = os.path.join(output_path,filename)
                    traj = CFTrajectory(netcdf_file)
                    success = traj.plot_animate(os.path.join(output_path,'animation.avi'), temp_folder=temp_animation_path, bathy=app.config['BATHY_PATH'])
                    if not success:
                        logger.info("Could not create animation")
                    else:
                        logger.info("Animation saved")
            """
            job.meta["outcome"] = "success"
            job.save()
            return "Successfully ran %s" % run_id

        except Exception as exception:
            logger.warn("Run FAILED, cleaning up and uploading log.")
            logger.warn(exception.message)
            job.meta["outcome"] = "failed"
            job.save()
            raise

        finally:

            logger.progress((99, "Processing output files"))
            # Close the handler so we can upload the log file without a file lock
            (hand.close() for hand in logger.handlers)
            queue.put(StopIteration)
            # Break out of the progress loop
            e.set()
            t.join()

            # Move logfile to output directory
            shutil.move(log_file, os.path.join(output_path, 'model.log'))

            # Move cachefile to output directory if we made one
            if run['caching']:
                shutil.move(cache_file, output_path)

            output_files = []
            for filename in os.listdir(output_path):
                outfile = os.path.join(output_path, filename)
                output_files.append(outfile)

            result_files = []
            base_access_url = current_app.config.get('NON_S3_OUTPUT_URL', None)
            # Handle results and cleanup
            if current_app.config['USE_S3'] is True:
                base_access_url = urljoin("http://%s.s3.amazonaws.com/output/" % current_app.config['S3_BUCKET'], run_id)
                # Upload results to S3 and remove the local copies
                conn = S3Connection()
                bucket = conn.get_bucket(current_app.config['S3_BUCKET'])

                for outfile in output_files:
                    # Don't upload the cache file
                    if os.path.basename(outfile) == os.path.basename(cache_file):
                        continue

                    # Upload the outfile with the same as the run name
                    _, ext = os.path.splitext(outfile)
                    new_filename = slugify(unicode(run['name'])) + ext

                    k = Key(bucket)
                    k.key = "output/%s/%s" % (run_id, new_filename)
                    k.set_contents_from_filename(outfile)
                    k.set_acl('public-read')
                    result_files.append(base_access_url + "/" + new_filename)
                    os.remove(outfile)

                shutil.rmtree(output_path, ignore_errors=True)

            else:
                for outfile in output_files:
                    result_files.append(urljoin(base_access_url, run_id) + "/" + os.path.basename(outfile))

            shutil.rmtree(temp_animation_path, ignore_errors=True)

            # Set output fields
            run.output = result_files
            run.ended = datetime.utcnow()
            run.compute()
            run.save()

            # Cleanup
            logger.removeHandler(handler)
            del formatter
            del handler
            del logger
            del model
            queue.close()

            job.meta["message"] = "Complete"
            job.save()
Ejemplo n.º 36
0
    def __call__(self, active):
        c = 0

        self.dataset = CommonDataset.open(self.hydrodataset)
        self.remote = self.dataset.nc

        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')

        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max() + 1)

        # While there is at least 1 particle still running,
        # stay alive, if not break
        while self.n_run.value > 1:

            if self.caching is False:
                logger.debug(
                    "Caching is False, not doing much.  Just hanging out until all of the particles finish."
                )
                timer.sleep(10)
                continue

            # If particle asks for data, do the following
            if self.get_data.value is True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug(
                            "Waiting for write lock on cache file (particles must stop reading)..."
                        )
                        self.read_lock.release()
                        timer.sleep(2)
                    else:
                        break

                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(self.cache_path, 'w')

                        indices = self.dataset.get_indices(
                            self.uname,
                            timeinds=[np.asarray([0])],
                            point=self.start)
                        self.point_get.value = [
                            self.inds[0], indices[-2], indices[-1]
                        ]

                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)

                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan.
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[
                                self.uname].missing_value
                        except Exception:
                            fill = np.nan

                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to
                        #   ask cache to update
                        domain = self.local.createVariable('domain',
                                                           'i',
                                                           dimensions,
                                                           zlib=False,
                                                           fill_value=0)
                        domain.coordinates = coordinates

                        # Create local u and v variables
                        u = self.local.createVariable('u',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)
                        v = self.local.createVariable('v',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)

                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            self.remote.variables[self.uname],
                            self.remote.variables[self.vname]
                        ]

                        # Create local w variable
                        if self.wname is not None:
                            w = self.local.createVariable('w',
                                                          'f',
                                                          dimensions,
                                                          zlib=False,
                                                          fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(
                                self.remote.variables[self.wname])

                        if self.temp_name is not None and self.salt_name is not None:
                            # Create local temp and salt vars
                            temp = self.local.createVariable('temp',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            salt = self.local.createVariable('salt',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(
                                self.remote.variables[self.temp_name])
                            remotevars.append(
                                self.remote.variables[self.salt_name])

                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon',
                                                            'f', ("x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]

                        # Create local z variable
                        if self.zname is not None:
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable(
                                    'z',
                                    'f', ("time", "level", "y", "x"),
                                    zlib=False)
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable(
                                    'z', 'f', ("level", "y", "x"), zlib=False)
                                z[:] = self.remote.variables[
                                    self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim == 1:
                                z = self.local.createVariable('z',
                                                              'f', ("level", ),
                                                              zlib=False)
                                z[:] = self.remote.variables[self.zname][:]

                        # Create local time variable
                        time = self.local.createVariable('time',
                                                         'f8', ("time", ),
                                                         zlib=False)
                        if self.tname is not None:
                            time[:] = self.remote.variables[self.tname][
                                self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache.
                        # Try 20 times on the first attempt
                        current_attempt = 1
                        max_attempts = 20
                        while True:
                            try:
                                assert current_attempt <= max_attempts
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except AssertionError:
                                raise
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 20 seconds. %s attempts left."
                                    % str(max_attempts - current_attempt))
                                logger.exception("Data Access Error")
                                timer.sleep(20)
                                current_attempt += 1
                            else:
                                break

                        c += 1
                    except (Exception, AssertionError):
                        logger.error(
                            "CachingDataController failed to get data (first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(self.cache_path, 'a')

                        # Create local and remote variable objects
                        # for the variables of interest
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]

                        # Create lists of variable objects for
                        # the data updater
                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            remoteu,
                            remotev,
                        ]
                        if self.salt_name is not None and self.temp_name is not None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname is not None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname is not None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname is not None:
                            # remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 30 seconds"
                                )
                                timer.sleep(30)
                            else:
                                break

                        c += 1
                    except Exception:
                        logger.error(
                            "CachingDataController failed to get data (not first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
            else:
                logger.debug(
                    "Particles are still running, waiting for them to request data..."
                )
                timer.sleep(2)

        self.dataset.closenc()

        return "CachingDataController"
Ejemplo n.º 37
0
    def listen_for_results(self):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % len(self.particles))
            logger.progress((5, "Running model"))
            while retrieved < self.number_of_tasks:
                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except Queue.Empty:

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "Zombie"))
                            # Decrement nproc (Consumer exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn("Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -3:
                        self.error_code = code
                        logger.info("A zombie process was caught and task was removed from queue")
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid))
                    else:
                        logger.info("Got a strange result on results queue: %s" % str(tempres))

                    logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks))

            if len(return_particles) != len(self.particles):
                logger.warn("Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()

            self.particles = return_particles

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs:
                    # Wait 20 seconds
                    w.join(20.)
                    if w.is_alive():
                        # Process is hanging, kill it.
                        logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                        w.terminate()
Ejemplo n.º 38
0
    def listen_for_results(self, output_h5_file, total_particles):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % total_particles)
            while retrieved < self.total_task_count(
            ):  # One for the CachingDataController

                logger.info("looping in listen_for_results")

                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except queue.Empty:
                    # Poll the active processes to make sure they are all alive and then continue with loop
                    if not self.data_controller_process.is_alive(
                    ) and self.data_controller_process.exitcode != 0:
                        # Data controller is zombied, kill off other processes.
                        self.get_data.value is False
                        self.results.put((-2, "CachingDataController"))

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "ZombieParticle"))
                            # Decrement nproc (CachingDataController exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks,
                                          self.results,
                                          self.n_run,
                                          self.nproc_lock,
                                          self.active,
                                          self.get_data,
                                          name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                            # Release any locks the PID had
                            if p.pid in self.has_read_lock:
                                with self.read_lock:
                                    self.read_count.value -= 1
                                    self.has_read_lock.remove(p.pid)

                            if self.has_data_request_lock.value == p.pid:
                                self.has_data_request_lock.value = -1
                                try:
                                    self.data_request_lock.release()
                                except:
                                    pass

                            if self.has_write_lock.value == p.pid:
                                self.has_write_lock.value = -1
                                try:
                                    self.write_lock.release()
                                except:
                                    pass

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn(
                                "Did not find %s in the list of processes.  Continuing on."
                                % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn(
                            "Started a new consumer (%s) to replace a zombie consumer"
                            % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn(
                            "Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -2:
                        self.error_code = code
                        logger.warn(
                            "CachingDataController has FAILED!!  Removing cache file so the particles fail."
                        )
                        try:
                            os.remove(self.cache_path)
                        except OSError:
                            logger.debug(
                                "Could not remove cache file, it probably never existed"
                            )
                            pass
                    elif code == -3:
                        self.error_code = code
                        logger.info(
                            "A zombie process was caught and task was removed from queue"
                        )
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress(
                            (round((retrieved / self.total_task_count()) * 90.,
                                   1), "Particle %d finished" % tempres.uid))
                    elif tempres == "CachingDataController":
                        logger.info("CachingDataController finished")
                        logger.progress(
                            (round((retrieved / self.total_task_count()) * 90.,
                                   1), "CachingDataController finished"))
                    else:
                        logger.info("Got a strange result on results queue")
                        logger.info(str(tempres))

                    logger.info("Retrieved %i/%i results" %
                                (int(retrieved), self.total_task_count()))

                # Relax
                time.sleep(1)

            if len(return_particles) != total_particles:
                logger.warn(
                    "Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()
            self.tasks.close()
            self.tasks.join_thread()

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs + [self.data_controller_process]:
                # Wait 20 seconds
                w.join(20.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info(
                        "Terminating %s forcefully.  This should have exited itself."
                        % w.name)
                    w.terminate()

        if self.error_code == -2:
            raise ValueError(
                "Error in the BaseDataController (error_code was -2)")

        results = ex.ResultsPyTable(output_h5_file)
        for p in return_particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return