Python progress Examples

Programming Language: Python

Namespace/Package Name: paegan.logger.logger

Method/Function: progress

Examples at hotexamples.com: 20

Python progress - 20 examples found. These are the top rated real world Python examples of paegan.logger.logger.progress extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: base.py Project: axiom-data-science/paegan-transport

    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < total_particles:
            try:
                # self.result is an iterator that can timeout on next()
                particle = self.result.next(timeout)
                retrieved += 1
                particles.append(particle)
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved))

        logger.info(particles)
        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return

Example #2

Show file

File: ipython.py Project: ocefpaf/paegan-transport

    def start_tasks(self, **kwargs):

        try:
            logger.info('Adding %i particles as tasks' % self.total_particle_count())
            tasks = []

            for part in self.particles:
                forcer = BaseForcer(self.hydrodataset,
                                    particle=part,
                                    common_variables=self.common_variables,
                                    timevar=self.timevar,
                                    times=self.times,
                                    start_time=self.start,
                                    models=self._models,
                                    release_location_centroid=self.reference_location.point,
                                    usebathy=self._use_bathymetry,
                                    useshore=self._use_shoreline,
                                    usesurface=self._use_seasurface,
                                    reverse_distance=self.reverse_distance,
                                    bathy_path=self.bathy_path,
                                    shoreline_path=self.shoreline_path,
                                    shoreline_feature=self.shoreline_feature,
                                    time_method=self.time_method,
                                    shoreline_index_buffer=self.shoreline_index_buffer)
                tasks.append(forcer)

            logger.progress((5, 'Running model'))
            return self.pool.map_async(Runner(), tasks)

        except Exception:
            logger.exception("Something didn't start correctly!")
            raise

Example #3

Show file

File: base.py Project: ocefpaf/paegan-transport

    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < total_particles:
            try:
                # self.result is an iterator that can timeout on next()
                particle = self.result.next(timeout)
                retrieved += 1
                particles.append(particle)
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90.,
                                   1), "%s Particle(s) complete" % retrieved))

        logger.info(particles)
        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return

Example #4

Show file

File: cached.py Project: axiom-data-science/paegan-transport

    def start_tasks(self, **kwargs):
        try:
            logger.info('Starting CachingDataController')

            # Add data controller to the queue first so that it
            # can get the initial data and is not blocked
            data_controller = CachingDataController(self.hydrodataset, self.common_variables, self.n_run, self.get_data, self.write_lock, self.has_write_lock, self.read_lock, self.read_count,
                                                    self.time_chunk, self.horiz_chunk, self.times, self.start, self.point_get, self.reference_location, cache_path=self.cache_path)
            self.tasks.put(data_controller)
            # Create CachingDataController worker
            self.data_controller_process = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingDataController")
            self.data_controller_process.start()

            logger.info('Adding %i particles as tasks' % self.total_particle_count())

            for part in self.particles:
                forcer = CachingForcer(self.cache_path,
                                       particle=part,
                                       common_variables=self.common_variables,
                                       timevar=self.timevar,
                                       times=self.times,
                                       start_time=self.start,
                                       models=self._models,
                                       release_location_centroid=self.reference_location.point,
                                       usebathy=self._use_bathymetry,
                                       useshore=self._use_shoreline,
                                       usesurface=self._use_seasurface,
                                       reverse_distance=self.reverse_distance,
                                       bathy_path=self.bathy_path,
                                       shoreline_path=self.shoreline_path,
                                       shoreline_feature=self.shoreline_feature,
                                       time_method=self.time_method,
                                       shoreline_index_buffer=self.shoreline_index_buffer,
                                       get_data=self.get_data,
                                       read_lock=self.read_lock,
                                       has_read_lock=self.has_read_lock,
                                       read_count=self.read_count,
                                       point_get=self.point_get,
                                       data_request_lock=self.data_request_lock,
                                       has_data_request_lock=self.has_data_request_lock
                                      )
                self.tasks.put(forcer)

            # Create workers for the particles.
            self.procs = [Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingForcer-%d" % i)
                          for i in range(self.nproc - 1) ]
            logger.progress((5, 'Running model'))
            for w in self.procs:
                w.start()
                logger.info('Started %s' % w.name)

            return True

        except Exception:
            logger.exception("Something didn't start correctly!")
            return False

Example #5

Show file

File: distributed.py Project: ocefpaf/paegan-transport

    def start_tasks(self, **kwargs):
        logger.progress((5, 'Running model'))
        rc = redis.from_url(self.redis_url)
        if kwargs.get('task_queue_call'):
            for p in self.particles:
                try:
                    kwargs.get('task_queue_call')(func=particle_runner,
                                                  args=(
                                                      p,
                                                      self,
                                                  ))
                except Exception:
                    logger.exception(traceback.format_exc())
                    rc.publish(self.redis_results_channel,
                               json.dumps({
                                   "status": "FAILED",
                                   "uid": p.uid
                               }))
            return True
        else:
            tasks = []
            for p in self.particles:
                f = BaseForcer(
                    self.hydrodataset,
                    particle=p,
                    common_variables=self.common_variables,
                    times=self.times,
                    start_time=self.start,
                    models=self._models,
                    release_location_centroid=self.reference_location.point,
                    usebathy=self._use_bathymetry,
                    useshore=self._use_shoreline,
                    usesurface=self._use_seasurface,
                    reverse_distance=self.reverse_distance,
                    bathy_path=self.bathy_path,
                    shoreline_path=self.shoreline_path,
                    shoreline_feature=self.shoreline_feature,
                    time_method=self.time_method,
                    redis_url=self.redis_url,
                    redis_results_channel=self.redis_results_channel,
                    shoreline_index_buffer=self.shoreline_index_buffer)
                tasks.append(f)
            self.pool = multiprocessing.Pool()
            return self.pool.imap(Runner(), tasks)

        return False

Example #6

Show file

File: model_controller.py Project: daf/paegan-transport

    def listen_for_results(self):
        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < len(self.particles):
            try:
                # @TODO: better mechanism than switching on type
                if isinstance(self.pool, multiprocessing.pool.Pool):
                    # self.result is an iterator that can timeout on next()
                    particle = self.result.next(timeout)
                    retrieved += 1
                    particles.append(particle)
                else:
                    # IPython parallel View
                    # self.result is an AsyncMapResult
                    from IPython.parallel import TimeoutError
                    try:
                        new_particles = self.result.get(timeout=1)
                    except TimeoutError:
                        pass    # this is fine, get incremental progress below
                    else:
                        particles = new_particles

                    # progress is absolute, not incremental
                    retrieved = self.result.progress

            except StopIteration:
                assert retrieved >= len(self.particles)
                break
            except:
                logger.exception("Particle has FAILED!!")
                #logger.warn("Particle %s has FAILED!!" % particle.uid)
                continue

            # We multiply by 95 here to save 5% for the exporting
            logger.progress((round((float(retrieved) / self.number_of_tasks) * 90., 1), "%s Particle(s) complete" % retrieved))

        return particles

Example #7

Show file

File: base.py Project: ocefpaf/paegan-transport

    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'),
                                          'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener",
                                  target=self.listen_for_results,
                                  args=(output_h5_file,
                                        self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count(
            ))  # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn(
                        'The output_formats parameter should be a list, not saving any output!'
                    )
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn(
                'No output_formats parameter was defined, not saving any output!'
            )

        logger.progress((97, "Model Run Complete"))

        return

Example #8

Show file

File: ipython.py Project: ocefpaf/paegan-transport

    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0

        while retrieved < total_particles:
            try:
                # IPython parallel View
                # self.result is an AsyncMapResult
                from IPython.parallel import TimeoutError
                try:
                    new_particles = self.result.get(timeout=1)
                except TimeoutError:
                    pass    # this is fine, get incremental progress below
                else:
                    particles = new_particles

                # progress is absolute, not incremental
                retrieved = self.result.progress
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved))

        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return

Example #9

Show file

File: model_controller.py Project: daf/paegan-transport

    def run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        self.setup_run(**kwargs)

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks()
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # This blocks until the tasks are all done.
        self.particles = self.listen_for_results()

        logger.info('Consumers are all finished!')

        logger.info('Cleaning up')
        self.cleanup()

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:

                logger.progress((96, "Exporting results"))

                # Make sure output_path is also included
                if kwargs.get("output_path", None) is not None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.exception("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')
                else:
                    logger.warn('No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if self.error_code == -2:
                raise BaseDataControllerError("Error in the BaseDataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((97, "Model Run Complete"))
        return self.particles

Example #10

Show file

File: base.py Project: axiom-data-science/paegan-transport

    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count())    # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn('The output_formats parameter should be a list, not saving any output!')
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn('No output_formats parameter was defined, not saving any output!')

        logger.progress((97, "Model Run Complete"))

        return

Example #11

Show file

File: larva.py Project: asascience-open/larva-service

def run(run_id):

    # Sleep to give the Run object enough time to save
    time.sleep(10)

    with app.app_context():
        from paegan.logger import logger

        job = get_current_job()

        output_path = os.path.join(current_app.config['OUTPUT_PATH'], run_id)
        shutil.rmtree(output_path, ignore_errors=True)
        os.makedirs(output_path)

        cache_path = os.path.join(current_app.config['CACHE_PATH'], run_id)
        shutil.rmtree(cache_path, ignore_errors=True)
        os.makedirs(cache_path)

        temp_animation_path = os.path.join(current_app.config['OUTPUT_PATH'], "temp_images_" + run_id)
        shutil.rmtree(temp_animation_path, ignore_errors=True)
        os.makedirs(temp_animation_path)

        # Set up Logger
        queue = multiprocessing.Queue(-1)

        f, log_file = tempfile.mkstemp(dir=cache_path, prefix=run_id, suffix=".log")
        os.close(f)

        # Close any existing handlers
        (hand.close() for hand in logger.handlers)
        # Remove any existing handlers
        logger.handlers = []
        logger.setLevel(logging.PROGRESS)
        handler = MultiProcessingLogHandler(log_file, queue)
        handler.setLevel(logging.PROGRESS)
        formatter = logging.Formatter('[%(asctime)s] - %(levelname)s - %(name)s - %(processName)s - %(message)s')
        handler.setFormatter(formatter)
        logger.addHandler(handler)

        # Progress stuff.  Hokey!
        progress_deque = collections.deque(maxlen=1)
        progress_handler = ProgressHandler(progress_deque)
        progress_handler.setLevel(logging.PROGRESS)
        logger.addHandler(progress_handler)

        e = threading.Event()

        def save_progress():
            while e.wait(5) is not True:
                try:
                    record = progress_deque.pop()
                    if record == StopIteration:
                        break

                    job.meta["updated"] = record[0]
                    if record is not None and record[1] >= 0:
                        job.meta["progress"] = record[1]
                    if isinstance(record[2], unicode) or isinstance(record[2], str):
                        job.meta["message"] = record[2]

                    job.save()
                except IndexError:
                    pass
                except Exception:
                    raise
            return

        t = threading.Thread(name="ProgressUpdater", target=save_progress)
        t.daemon = True
        t.start()

        model = None

        try:

            logger.progress((0, "Configuring model"))

            run = db.Run.find_one( { '_id' : ObjectId(run_id) } )
            if run is None:
                return "Failed to locate run %s. May have been deleted while task was in the queue?" % run_id

            geometry       = loads(run['geometry'])
            start_depth    = run['release_depth']
            num_particles  = run['particles']
            time_step      = run['timestep']
            num_steps      = int(math.ceil((run['duration'] * 24 * 60 * 60) / time_step))
            start_time     = run['start'].replace(tzinfo = pytz.utc)
            shoreline_path = run['shoreline_path'] or app.config.get("SHORE_PATH")
            shoreline_feat = run['shoreline_feature']

            # Set up output directory/bucket for run
            output_formats = ['Shapefile', 'NetCDF', 'Trackline']

            # Setup Models
            models = []
            if run['cached_behavior'] is not None and run['cached_behavior'].get('results', None) is not None:
                behavior_data = run['cached_behavior']['results'][0]
                l = LarvaBehavior(data=behavior_data)
                models.append(l)
            models.append(Transport(horizDisp=run['horiz_dispersion'], vertDisp=run['vert_dispersion']))

            # Setup ModelController
            model = ModelController(geometry=geometry, depth=start_depth, start=start_time, step=time_step, nstep=num_steps, npart=num_particles, models=models, use_bathymetry=True, use_shoreline=True,
                                    time_chunk=run['time_chunk'], horiz_chunk=run['horiz_chunk'], time_method=run['time_method'], shoreline_path=shoreline_path, shoreline_feature=shoreline_feat, reverse_distance=1500)

            # Run the model
            cache_file = os.path.join(cache_path, run_id + ".nc.cache")
            bathy_file = current_app.config['BATHY_PATH']

            model.run(run['hydro_path'], output_path=output_path, bathy=bathy_file, output_formats=output_formats, cache=cache_file, remove_cache=False, caching=run['caching'])

            # Skip creating movie output_path
            """
            from paegan.viz.trajectory import CFTrajectory

            logger.info("Creating animation...")
            for filename in os.listdir(output_path):
                if os.path.splitext(filename)[1][1:] == "nc":
                    # Found netCDF file
                    netcdf_file = os.path.join(output_path,filename)
                    traj = CFTrajectory(netcdf_file)
                    success = traj.plot_animate(os.path.join(output_path,'animation.avi'), temp_folder=temp_animation_path, bathy=app.config['BATHY_PATH'])
                    if not success:
                        logger.info("Could not create animation")
                    else:
                        logger.info("Animation saved")
            """
            job.meta["outcome"] = "success"
            job.save()
            return "Successfully ran %s" % run_id

        except Exception as exception:
            logger.warn("Run FAILED, cleaning up and uploading log.")
            logger.warn(exception.message)
            job.meta["outcome"] = "failed"
            job.save()
            raise

        finally:

            logger.progress((99, "Processing output files"))
            # Close the handler so we can upload the log file without a file lock
            (hand.close() for hand in logger.handlers)
            queue.put(StopIteration)
            # Break out of the progress loop
            e.set()
            t.join()

            # Move logfile to output directory
            shutil.move(log_file, os.path.join(output_path, 'model.log'))

            # Move cachefile to output directory if we made one
            if run['caching']:
                shutil.move(cache_file, output_path)

            output_files = []
            for filename in os.listdir(output_path):
                outfile = os.path.join(output_path, filename)
                output_files.append(outfile)

            result_files = []
            base_access_url = current_app.config.get('NON_S3_OUTPUT_URL', None)
            # Handle results and cleanup
            if current_app.config['USE_S3'] is True:
                base_access_url = urljoin("http://%s.s3.amazonaws.com/output/" % current_app.config['S3_BUCKET'], run_id)
                # Upload results to S3 and remove the local copies
                conn = S3Connection()
                bucket = conn.get_bucket(current_app.config['S3_BUCKET'])

                for outfile in output_files:
                    # Don't upload the cache file
                    if os.path.basename(outfile) == os.path.basename(cache_file):
                        continue

                    # Upload the outfile with the same as the run name
                    _, ext = os.path.splitext(outfile)
                    new_filename = slugify(unicode(run['name'])) + ext

                    k = Key(bucket)
                    k.key = "output/%s/%s" % (run_id, new_filename)
                    k.set_contents_from_filename(outfile)
                    k.set_acl('public-read')
                    result_files.append(base_access_url + "/" + new_filename)
                    os.remove(outfile)

                shutil.rmtree(output_path, ignore_errors=True)

            else:
                for outfile in output_files:
                    result_files.append(urljoin(base_access_url, run_id) + "/" + os.path.basename(outfile))

            shutil.rmtree(temp_animation_path, ignore_errors=True)

            # Set output fields
            run.output = result_files
            run.ended = datetime.utcnow()
            run.compute()
            run.save()

            # Cleanup
            logger.removeHandler(handler)
            del formatter
            del handler
            del logger
            del model
            queue.close()

            job.meta["message"] = "Complete"
            job.save()

Example #12

Show file

File: model_controller.py Project: blazetopher/paegan-transport

    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0, (self._step * self._nstep) + 1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__),
                                             "_cache")
            temp_name = AsaRandom.filename(prefix=str(
                datetime.now().microsecond),
                                           suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError(
                "Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int', -1)

        nproc_lock = mgr.Lock()

        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)

        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int', -1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" %
                                      hydrodataset)

        # Add data controller to the queue first so that it
        # can get the initial data and is not blocked

        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset,
                                                  common_variables,
                                                  n_run,
                                                  get_data,
                                                  write_lock,
                                                  has_write_lock,
                                                  read_lock,
                                                  read_count,
                                                  time_chunk,
                                                  horiz_chunk,
                                                  times,
                                                  self.start,
                                                  point_get,
                                                  self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks,
                                                    results,
                                                    n_run,
                                                    nproc_lock,
                                                    active,
                                                    get_data,
                                                    name="DataController")
        data_controller_process.start()

        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(
                part,
                hydrodataset,
                common_variables,
                timevar_pickle_path,
                times,
                self.start,
                self._models,
                self.reference_location.point,
                self._use_bathymetry,
                self._use_shoreline,
                self._use_seasurface,
                get_data,
                n_run,
                read_lock,
                has_read_lock,
                read_count,
                point_get,
                data_request_lock,
                has_data_request_lock,
                reverse_distance=self.reverse_distance,
                bathy=self.bathy_path,
                shoreline_path=self.shoreline_path,
                cache=self.cache_path,
                time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [
            parallel.Consumer(tasks,
                              results,
                              n_run,
                              nproc_lock,
                              active,
                              get_data,
                              name="ForceParticle-%d" % i)
            for i in xrange(nproc - 1)
        ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive(
                ) and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks,
                                               results,
                                               n_run,
                                               nproc_lock,
                                               active,
                                               get_data,
                                               name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)

                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass

                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn(
                            "Did not find %s in the list of processes.  Continuing on."
                            % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn(
                        "Started a new consumer (%s) to replace a zombie consumer"
                        % p.name)
                    p.start()

            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn(
                        "DataController has FAILED!!  Removing cache file so the particles fail."
                    )
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug(
                            "Could not remove cache file, it probably never existed"
                        )
                        pass
                elif code == -3:
                    error_code = code
                    logger.info(
                        "A zombie process was caught and task was removed from queue"
                    )
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress(
                        (round((retrieved / number_of_tasks) * 90.,
                               1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,
                                           1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" %
                            (int(retrieved), number_of_tasks))

        if len(return_particles) != len(self.particles):
            logger.warn(
                "Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
            # Wait 10 seconds
            w.join(10.)
            if w.is_alive():
                # Process is hanging, kill it.
                logger.info(
                    "Terminating %s forcefully.  This should have exited itself."
                    % w.name)
                w.terminate()

        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug(
                    "Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" %
                                             format)
                    else:
                        logger.warn(
                            'The output_formats parameter should be a list, not saving any output!'
                        )
                else:
                    logger.warn(
                        'No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return

Example #13

Show file

File: model_controller.py Project: kwilcox/paegan-transport

    def listen_for_results(self):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % len(self.particles))
            logger.progress((5, "Running model"))
            while retrieved < self.number_of_tasks:
                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except Queue.Empty:
                    # Poll the active processes to make sure they are all alive and then continue with loop
                    if not self.data_controller_process.is_alive() and self.data_controller_process.exitcode != 0:
                        # Data controller is zombied, kill off other processes.
                        self.get_data.value is False
                        self.results.put((-2, "CachingDataController"))

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "ZombieParticle"))
                            # Decrement nproc (CachingDataController exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                            # Release any locks the PID had
                            if p.pid in self.has_read_lock:
                                with self.read_lock:
                                    self.read_count.value -= 1
                                    self.has_read_lock.remove(p.pid)

                            if self.has_data_request_lock.value == p.pid:
                                self.has_data_request_lock.value = -1
                                try:
                                    self.data_request_lock.release()
                                except:
                                    pass

                            if self.has_write_lock.value == p.pid:
                                self.has_write_lock.value = -1
                                try:
                                    self.write_lock.release()
                                except:
                                    pass

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn("Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -2:
                        self.error_code = code
                        logger.warn("CachingDataController has FAILED!!  Removing cache file so the particles fail.")
                        try:
                            os.remove(self.cache_path)
                        except OSError:
                            logger.debug("Could not remove cache file, it probably never existed")
                            pass
                    elif code == -3:
                        self.error_code = code
                        logger.info("A zombie process was caught and task was removed from queue")
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid))
                    elif tempres == "CachingDataController":
                        logger.info("CachingDataController finished")
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "CachingDataController finished"))
                    else:
                        logger.info("Got a strange result on results queue")
                        logger.info(str(tempres))

                    logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks))

            if len(return_particles) != len(self.particles):
                logger.warn("Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()

            self.particles = return_particles

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs + [self.data_controller_process]:
                    # Wait 20 seconds
                    w.join(20.)
                    if w.is_alive():
                        # Process is hanging, kill it.
                        logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                        w.terminate()

Example #14

Show file

File: model_controller.py Project: kwilcox/paegan-transport

    def setup_run(self, **kwargs):

        logger.setLevel(logging.PROGRESS)

        self.redis_url             = None
        self.redis_log_channel     = None
        self.redis_results_channel = None
        if "redis" in kwargs.get("output_formats", []):
            from paegan.logger.redis_handler import RedisHandler
            self.redis_url             = kwargs.get("redis_url")
            self.redis_log_channel     = kwargs.get("redis_log_channel")
            self.redis_results_channel = kwargs.get("redis_results_channel")
            rhandler = RedisHandler(self.redis_log_channel, self.redis_url)
            rhandler.setLevel(logging.PROGRESS)
            logger.addHandler(rhandler)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(unicode(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(unicode(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = range(0, (self._step*self._nstep)+1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        if kwargs.get("manager", True):
            # Get the number of cores (may take some tuning) and create that
            # many workers then pass particles into the queue for the workers
            self.mgr = multiprocessing.Manager()

            # This tracks if the system is 'alive'.  Most looping whiles will check this
            # and break out if it is False.  This is True until something goes very wrong.
            self.active = self.mgr.Value('bool', True)

            # Each particle is a task, plus the CachingDataController
            self.number_of_tasks = self.get_number_of_tasks()

            # Either spin up the number of cores, or the number of tasks
            self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks)

            # Number of tasks that we need to run.  This is decremented everytime something exits.
            self.n_run = self.mgr.Value('int', self.number_of_tasks)
            # The lock that controls access to the 'n_run' variable
            self.nproc_lock = self.mgr.Lock()

            # Create the task queue for all of the particles and the CachingDataController
            self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks)
            # Create the result queue for all of the particles and the CachingDataController
            self.results = self.mgr.Queue(self.number_of_tasks)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(self.hydrodataset)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)
        # Query the dataset for common variable names
        # and the time variable.
        logger.debug("Retrieving variable information from dataset")
        self.common_variables = self.get_common_variables_from_dataset(ds)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)

        model_start = self.timevar.get_dates()[0]
        model_end   = self.timevar.get_dates()[-1]

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

        ds.closenc()

Example #15

Show file

File: model_controller.py Project: kwilcox/paegan-transport

    def listen_for_results(self):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % len(self.particles))
            logger.progress((5, "Running model"))
            while retrieved < self.number_of_tasks:
                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except Queue.Empty:

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "Zombie"))
                            # Decrement nproc (Consumer exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn("Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -3:
                        self.error_code = code
                        logger.info("A zombie process was caught and task was removed from queue")
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress((round((retrieved / self.number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid))
                    else:
                        logger.info("Got a strange result on results queue: %s" % str(tempres))

                    logger.info("Retrieved %i/%i results" % (int(retrieved), self.number_of_tasks))

            if len(return_particles) != len(self.particles):
                logger.warn("Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()

            self.particles = return_particles

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs:
                    # Wait 20 seconds
                    w.join(20.)
                    if w.is_alive():
                        # Process is hanging, kill it.
                        logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                        w.terminate()

Example #16

Show file

File: base.py Project: ocefpaf/paegan-transport

    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step * self._nstep) + 1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" %
                         self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" %
                                          self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception(
                "Could not locate variables needed to run model: %s" %
                str(self.common_variables))
            raise BaseDataControllerError(
                "A required data variable was not found in %s" %
                self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "Start time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "End time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[-1], model_start, model_end))

Example #17

Show file

File: base.py Project: axiom-data-science/paegan-transport

    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step*self._nstep)+1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

Example #18

Show file

File: model_controller.py Project: acrosby/paegan-transport

    def run(self, hydrodataset, **kwargs):

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0,(self._step*self._nstep)+1,self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk
        low_memory = kwargs.get("low_memory", False)

        # Should we remove the cache file at the end of the run?
        remove_cache = kwargs.get("remove_cache", True)

        self.bathy_path = kwargs.get("bathy", None)

        self.cache_path = kwargs.get("cache", None)
        if self.cache_path is None:
            # Generate temp filename for dataset cache
            default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache")
            temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc")
            self.cache_path = os.path.join(default_cache_dir, temp_name)
        
        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in 
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError("Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # When a particle requests data
        data_request_lock = mgr.Lock()
        # PID of process with lock
        has_data_request_lock = mgr.Value('int',-1)

        nproc_lock = mgr.Lock()
        
        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)
        
        # Create the shared state objects
        get_data = mgr.Value('bool', True)
        # Number of tasks
        n_run = mgr.Value('int', number_of_tasks)
        updating = mgr.Value('bool', False)

        # When something is reading from cache file
        read_lock = mgr.Lock()
        # list of PIDs that are reading
        has_read_lock = mgr.list()
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int',-1)

        point_get = mgr.Value('list', [0, 0, 0])
        active = mgr.Value('bool', True)
        
        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            common_variables = self.get_common_variables_from_dataset(ds)

            logger.debug("Pickling time variable to disk for particles")
            timevar = ds.gettimevar(common_variables.get("u"))
            f, timevar_pickle_path = tempfile.mkstemp()
            os.close(f)
            f = open(timevar_pickle_path, "wb")
            pickle.dump(timevar, f)
            f.close()
            ds.closenc()
        except:
            logger.warn("Failed to access remote dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible DAP endpoint: %s" % hydrodataset)


        # Add data controller to the queue first so that it 
        # can get the initial data and is not blocked
        
        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count,
                                                  time_chunk, horiz_chunk, times,
                                                  self.start, point_get, self.reference_location,
                                                  low_memory=low_memory,
                                                  cache=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController")
        data_controller_process.start()
        
        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(part,
                                        hydrodataset,
                                        common_variables,
                                        timevar_pickle_path,
                                        times,
                                        self.start,
                                        self._models,
                                        self.reference_location.point,
                                        self._use_bathymetry,
                                        self._use_shoreline,
                                        self._use_seasurface,
                                        get_data,
                                        n_run,
                                        read_lock,
                                        has_read_lock,
                                        read_count,
                                        point_get,
                                        data_request_lock,
                                        has_data_request_lock,
                                        reverse_distance=self.reverse_distance,
                                        bathy=self.bathy_path,
                                        shoreline_path=self.shoreline_path,
                                        shoreline_feature=self.shoreline_feature,
                                        cache=self.cache_path,
                                        time_method=self.time_method)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d"%i)
                  for i in xrange(nproc - 1) ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive() and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value == False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)
                        
                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass
                            
                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass
                            

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                    p.start()
                
            else:
                # We got one.
                retrieved += 1
                if code == None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn("DataController has FAILED!!  Removing cache file so the particles fail.")
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug("Could not remove cache file, it probably never existed")
                        pass
                elif code == -3:
                    error_code = code
                    logger.info("A zombie process was caught and task was removed from queue")
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90.,1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" % (int(retrieved),number_of_tasks))
        
        if len(return_particles) != len(self.particles):
            logger.warn("Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
                # Wait 10 seconds
                w.join(10.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                    w.terminate()
                    
        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove pickled timevar
        os.remove(timevar_pickle_path)

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug("Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) != None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.error("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')  
                else:
                    logger.warn('No output path defined, not saving any output!')  
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return

Example #19

Show file

File: cached.py Project: ocefpaf/paegan-transport

    def start_tasks(self, **kwargs):
        try:
            logger.info('Starting CachingDataController')

            # Add data controller to the queue first so that it
            # can get the initial data and is not blocked
            data_controller = CachingDataController(self.hydrodataset,
                                                    self.common_variables,
                                                    self.n_run,
                                                    self.get_data,
                                                    self.write_lock,
                                                    self.has_write_lock,
                                                    self.read_lock,
                                                    self.read_count,
                                                    self.time_chunk,
                                                    self.horiz_chunk,
                                                    self.times,
                                                    self.start,
                                                    self.point_get,
                                                    self.reference_location,
                                                    cache_path=self.cache_path)
            self.tasks.put(data_controller)
            # Create CachingDataController worker
            self.data_controller_process = Consumer(
                self.tasks,
                self.results,
                self.n_run,
                self.nproc_lock,
                self.active,
                self.get_data,
                name="CachingDataController")
            self.data_controller_process.start()

            logger.info('Adding %i particles as tasks' %
                        self.total_particle_count())

            for part in self.particles:
                forcer = CachingForcer(
                    self.cache_path,
                    particle=part,
                    common_variables=self.common_variables,
                    timevar=self.timevar,
                    times=self.times,
                    start_time=self.start,
                    models=self._models,
                    release_location_centroid=self.reference_location.point,
                    usebathy=self._use_bathymetry,
                    useshore=self._use_shoreline,
                    usesurface=self._use_seasurface,
                    reverse_distance=self.reverse_distance,
                    bathy_path=self.bathy_path,
                    shoreline_path=self.shoreline_path,
                    shoreline_feature=self.shoreline_feature,
                    time_method=self.time_method,
                    shoreline_index_buffer=self.shoreline_index_buffer,
                    get_data=self.get_data,
                    read_lock=self.read_lock,
                    has_read_lock=self.has_read_lock,
                    read_count=self.read_count,
                    point_get=self.point_get,
                    data_request_lock=self.data_request_lock,
                    has_data_request_lock=self.has_data_request_lock)
                self.tasks.put(forcer)

            # Create workers for the particles.
            self.procs = [
                Consumer(self.tasks,
                         self.results,
                         self.n_run,
                         self.nproc_lock,
                         self.active,
                         self.get_data,
                         name="CachingForcer-%d" % i)
                for i in range(self.nproc - 1)
            ]
            logger.progress((5, 'Running model'))
            for w in self.procs:
                w.start()
                logger.info('Started %s' % w.name)

            return True

        except Exception:
            logger.exception("Something didn't start correctly!")
            return False

Example #20

Show file

File: cached.py Project: ocefpaf/paegan-transport

    def listen_for_results(self, output_h5_file, total_particles):
        try:
            # Get results back from queue, test for failed particles
            return_particles = []
            retrieved = 0.
            self.error_code = 0

            logger.info("Waiting for %i particle results" % total_particles)
            while retrieved < self.total_task_count(
            ):  # One for the CachingDataController

                logger.info("looping in listen_for_results")

                try:
                    # Returns a tuple of code, result
                    code, tempres = self.results.get(timeout=240)
                except queue.Empty:
                    # Poll the active processes to make sure they are all alive and then continue with loop
                    if not self.data_controller_process.is_alive(
                    ) and self.data_controller_process.exitcode != 0:
                        # Data controller is zombied, kill off other processes.
                        self.get_data.value is False
                        self.results.put((-2, "CachingDataController"))

                    new_procs = []
                    old_procs = []
                    for p in self.procs:
                        if not p.is_alive() and p.exitcode != 0:
                            # Do what the Consumer would do if something finished.
                            # Add something to results queue
                            self.results.put((-3, "ZombieParticle"))
                            # Decrement nproc (CachingDataController exits when this is 0)
                            with self.nproc_lock:
                                self.n_run.value = self.n_run.value - 1

                            # Remove task from queue (so they can be joined later on)
                            self.tasks.task_done()

                            # Start a new Consumer.  It will exit if there are no tasks available.
                            np = Consumer(self.tasks,
                                          self.results,
                                          self.n_run,
                                          self.nproc_lock,
                                          self.active,
                                          self.get_data,
                                          name=p.name)
                            new_procs.append(np)
                            old_procs.append(p)

                            # Release any locks the PID had
                            if p.pid in self.has_read_lock:
                                with self.read_lock:
                                    self.read_count.value -= 1
                                    self.has_read_lock.remove(p.pid)

                            if self.has_data_request_lock.value == p.pid:
                                self.has_data_request_lock.value = -1
                                try:
                                    self.data_request_lock.release()
                                except:
                                    pass

                            if self.has_write_lock.value == p.pid:
                                self.has_write_lock.value = -1
                                try:
                                    self.write_lock.release()
                                except:
                                    pass

                    for p in old_procs:
                        try:
                            self.procs.remove(p)
                        except ValueError:
                            logger.warn(
                                "Did not find %s in the list of processes.  Continuing on."
                                % p.name)

                    for p in new_procs:
                        self.procs.append(p)
                        logger.warn(
                            "Started a new consumer (%s) to replace a zombie consumer"
                            % p.name)
                        p.start()

                else:
                    # We got one.
                    retrieved += 1
                    if code is None:
                        logger.warn(
                            "Got an unrecognized response from a task.")
                    elif code == -1:
                        logger.warn("Particle %s has FAILED!!" % tempres.uid)
                    elif code == -2:
                        self.error_code = code
                        logger.warn(
                            "CachingDataController has FAILED!!  Removing cache file so the particles fail."
                        )
                        try:
                            os.remove(self.cache_path)
                        except OSError:
                            logger.debug(
                                "Could not remove cache file, it probably never existed"
                            )
                            pass
                    elif code == -3:
                        self.error_code = code
                        logger.info(
                            "A zombie process was caught and task was removed from queue"
                        )
                    elif isinstance(tempres, Particle):
                        logger.info("Particle %d finished" % tempres.uid)
                        return_particles.append(tempres)
                        # We mulitply by 95 here to save 5% for the exporting
                        logger.progress(
                            (round((retrieved / self.total_task_count()) * 90.,
                                   1), "Particle %d finished" % tempres.uid))
                    elif tempres == "CachingDataController":
                        logger.info("CachingDataController finished")
                        logger.progress(
                            (round((retrieved / self.total_task_count()) * 90.,
                                   1), "CachingDataController finished"))
                    else:
                        logger.info("Got a strange result on results queue")
                        logger.info(str(tempres))

                    logger.info("Retrieved %i/%i results" %
                                (int(retrieved), self.total_task_count()))

                # Relax
                time.sleep(1)

            if len(return_particles) != total_particles:
                logger.warn(
                    "Some particles failed and are not included in the output")

            # The results queue should be empty at this point
            assert self.results.empty() is True

            # Should be good to join on the tasks now that the queue is empty
            logger.info("Joining the task queue")
            self.tasks.join()
            self.tasks.close()
            self.tasks.join_thread()

        finally:
            # Join all processes
            logger.info("Joining the processes")
            for w in self.procs + [self.data_controller_process]:
                # Wait 20 seconds
                w.join(20.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info(
                        "Terminating %s forcefully.  This should have exited itself."
                        % w.name)
                    w.terminate()

        if self.error_code == -2:
            raise ValueError(
                "Error in the BaseDataController (error_code was -2)")

        results = ex.ResultsPyTable(output_h5_file)
        for p in return_particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return