Python Filtering Examples

Programming Language: Python

Namespace/Package Name: webservice

Class/Type: Filtering

Examples at hotexamples.com: 5

Python Filtering - 5 examples found. These are the top rated real world Python examples of webservice.Filtering extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

applyAllFiltersOnField(5)

applyFiltersOnField(3)

Example #1

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self,
                                              min_lat,
                                              max_lat,
                                              min_lon,
                                              max_lon,
                                              ds,
                                              start_time=0,
                                              end_time=-1,
                                              applySeasonalFilter=True,
                                              applyLowPass=True,
                                              fill=-9999.,
                                              spark_master="local[1]",
                                              spark_nexecs=1,
                                              spark_nparts=1):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)

        ndays = len(daysinrange)
        if ndays == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} days in range'.format(ndays))
        for i, d in enumerate(daysinrange):
            self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))
        spark_nparts_needed = min(spark_nparts, ndays)
        nexus_tiles_spark = [(min_lat, max_lat, min_lon, max_lon, ds,
                              list(daysinrange_part), fill)
                             for daysinrange_part in np.array_split(
                                 daysinrange, spark_nparts_needed)]

        # Launch Spark computations
        rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts_needed)
        results = rdd.map(TimeSeriesCalculator.calc_average_on_day).collect()
        #
        results = list(itertools.chain.from_iterable(results))
        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results,
                                    'mean',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'max',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'min',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)

        self._create_nc_file_time1d(np.array(results),
                                    'ts.nc',
                                    'mean',
                                    fill=-9999.)
        return results, {}

Example #2

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, end_time=-1,
                                              applySeasonalFilter=True, applyLowPass=True):

        daysinrange = self._tile_service.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time,
                                                                end_time)

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    self.log.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results, 'mean', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'max', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'min', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)

        return results, {}

Example #3

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            logger.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        logger.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}

Example #4

Show file

File: TimeSeriesSpark.py Project: wphyojpl/incubator-sdap-nexus

    def calc(self, request, **args):
        """

        :param request: StatsComputeOptions
        :param args: dict
        :return:
        """
        start_time = datetime.now()
        ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested, normalize_dates = self.parse_arguments(
            request)
        metrics_record = self._create_metrics_record()

        resultsRaw = []

        for shortName in ds:

            the_time = datetime.now()
            daysinrange = self._get_tile_service().find_days_in_range_asc(
                bounding_polygon.bounds[1],
                bounding_polygon.bounds[3],
                bounding_polygon.bounds[0],
                bounding_polygon.bounds[2],
                shortName,
                start_seconds_from_epoch,
                end_seconds_from_epoch,
                metrics_callback=metrics_record.record_metrics)
            self.log.info("Finding days in range took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            ndays = len(daysinrange)
            if ndays == 0:
                raise NoDataException(
                    reason="No data found for selected timeframe")

            self.log.debug('Found {0} days in range'.format(ndays))
            for i, d in enumerate(daysinrange):
                self.log.debug('{0}, {1}'.format(i,
                                                 datetime.utcfromtimestamp(d)))
            spark_nparts = self._spark_nparts(nparts_requested)
            self.log.info('Using {} partitions'.format(spark_nparts))
            results, meta = spark_driver(daysinrange,
                                         bounding_polygon,
                                         shortName,
                                         self._tile_service_factory,
                                         metrics_record.record_metrics,
                                         normalize_dates,
                                         spark_nparts=spark_nparts,
                                         sc=self._sc)

            if apply_seasonal_cycle_filter:
                the_time = datetime.now()
                # get time series for _clim dataset
                shortName_clim = shortName + "_clim"
                daysinrange_clim = self._get_tile_service(
                ).find_days_in_range_asc(
                    bounding_polygon.bounds[1],
                    bounding_polygon.bounds[3],
                    bounding_polygon.bounds[0],
                    bounding_polygon.bounds[2],
                    shortName_clim,
                    0,
                    SECONDS_IN_ONE_YEAR,
                    metrics_callback=metrics_record.record_metrics)
                if len(daysinrange_clim) == 0:
                    raise NexusProcessingException(
                        reason=
                        "There is no climatology data present for dataset " +
                        shortName + ".")
                results_clim, _ = spark_driver(daysinrange_clim,
                                               bounding_polygon,
                                               shortName_clim,
                                               self._tile_service_factory,
                                               metrics_record.record_metrics,
                                               normalize_dates=False,
                                               spark_nparts=spark_nparts,
                                               sc=self._sc)
                clim_indexed_by_month = {
                    datetime.utcfromtimestamp(result['time']).month: result
                    for result in results_clim
                }
                if len(clim_indexed_by_month) < 12:
                    raise NexusProcessingException(
                        reason="There are only " + len(clim_indexed_by_month) +
                        " months of climatology data for dataset " +
                        shortName +
                        ". A full year of climatology data is required for computing deseasoned timeseries."
                    )

                for result in results:
                    month = datetime.utcfromtimestamp(result['time']).month

                    result['meanSeasonal'] = result[
                        'mean'] - clim_indexed_by_month[month]['mean']
                    result['minSeasonal'] = result[
                        'min'] - clim_indexed_by_month[month]['min']
                    result['maxSeasonal'] = result[
                        'max'] - clim_indexed_by_month[month]['max']
                self.log.info("Seasonal calculation took %s for dataset %s" %
                              (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            filtering.applyAllFiltersOnField(
                results,
                'mean',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'max',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'min',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)

            if apply_seasonal_cycle_filter and apply_low_pass_filter:
                try:
                    filtering.applyFiltersOnField(results,
                                                  'meanSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'minSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'maxSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                except Exception as e:
                    # If it doesn't work log the error but ignore it
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error calculating SeasonalLowPass filter:\n%s" % tb)

            resultsRaw.append([results, meta])
            self.log.info("LowPass filter calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            self._create_nc_file_time1d(np.array(results),
                                        'ts.nc',
                                        'mean',
                                        fill=-9999.)
            self.log.info("NetCDF generation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

        the_time = datetime.now()
        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = TimeSeriesSparkHandlerImpl.calculate_comparison_stats(
                    results)
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=None,
                                minLat=bounding_polygon.bounds[1],
                                maxLat=bounding_polygon.bounds[3],
                                minLon=bounding_polygon.bounds[0],
                                maxLon=bounding_polygon.bounds[2],
                                ds=ds,
                                startTime=start_seconds_from_epoch,
                                endTime=end_seconds_from_epoch)

        total_duration = (datetime.now() - start_time).total_seconds()
        metrics_record.record_metrics(actual_time=total_duration)
        metrics_record.print_metrics(logger)

        self.log.info("Merging results and calculating comparisons took %s" %
                      (str(datetime.now() - the_time)))
        return res

Example #5

Show file

    def calc(self, request, **args):
        """
    
        :param request: StatsComputeOptions
        :param args: dict
        :return:
        """

        ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested = self.parse_arguments(
            request)

        resultsRaw = []

        for shortName in ds:

            the_time = datetime.now()
            daysinrange = self._tile_service.find_days_in_range_asc(
                bounding_polygon.bounds[1], bounding_polygon.bounds[3],
                bounding_polygon.bounds[0], bounding_polygon.bounds[2],
                shortName, start_seconds_from_epoch, end_seconds_from_epoch)
            self.log.info("Finding days in range took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            ndays = len(daysinrange)
            if ndays == 0:
                raise NoDataException(
                    reason="No data found for selected timeframe")

            self.log.debug('Found {0} days in range'.format(ndays))
            for i, d in enumerate(daysinrange):
                self.log.debug('{0}, {1}'.format(i,
                                                 datetime.utcfromtimestamp(d)))
            spark_nparts = self._spark_nparts(nparts_requested)
            self.log.info('Using {} partitions'.format(spark_nparts))
            the_time = datetime.now()
            results, meta = spark_driver(daysinrange,
                                         bounding_polygon,
                                         shortName,
                                         spark_nparts=spark_nparts,
                                         sc=self._sc)
            self.log.info("Time series calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            if apply_seasonal_cycle_filter:
                the_time = datetime.now()
                for result in results:
                    month = datetime.utcfromtimestamp(result['time']).month
                    month_mean, month_max, month_min = self.calculate_monthly_average(
                        month, bounding_polygon.wkt, shortName)
                    seasonal_mean = result['mean'] - month_mean
                    seasonal_min = result['min'] - month_min
                    seasonal_max = result['max'] - month_max
                    result['meanSeasonal'] = seasonal_mean
                    result['minSeasonal'] = seasonal_min
                    result['maxSeasonal'] = seasonal_max
                self.log.info("Seasonal calculation took %s for dataset %s" %
                              (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            filtering.applyAllFiltersOnField(
                results,
                'mean',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'max',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'min',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)

            if apply_seasonal_cycle_filter and apply_low_pass_filter:
                try:
                    filtering.applyFiltersOnField(results,
                                                  'meanSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'minSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'maxSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                except Exception as e:
                    # If it doesn't work log the error but ignore it
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error calculating SeasonalLowPass filter:\n%s" % tb)

            resultsRaw.append([results, meta])
            self.log.info("LowPass filter calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            self._create_nc_file_time1d(np.array(results),
                                        'ts.nc',
                                        'mean',
                                        fill=-9999.)
            self.log.info("NetCDF generation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

        the_time = datetime.now()
        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = TimeSeriesHandlerImpl.calculate_comparison_stats(
                    results)
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=None,
                                minLat=bounding_polygon.bounds[1],
                                maxLat=bounding_polygon.bounds[3],
                                minLon=bounding_polygon.bounds[0],
                                maxLon=bounding_polygon.bounds[2],
                                ds=ds,
                                startTime=start_seconds_from_epoch,
                                endTime=end_seconds_from_epoch)

        self.log.info("Merging results and calculating comparisons took %s" %
                      (str(datetime.now() - the_time)))
        return res