Python NoDataException Examples

Programming Language: Python

Namespace/Package Name: webservice.webmodel

Class/Type: NoDataException

Examples at hotexamples.com: 14

Python NoDataException - 14 examples found. These are the top rated real world Python examples of webservice.webmodel.NoDataException extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NoDataException(14)

Frequently Used Methods

NoDataException (14)

Example #1

Show file

    def calc(self, compute_options, **args):
        ds, bbox, start_time, end_time = self.parse_arguments(compute_options)

        min_lon, min_lat, max_lon, max_lat = bbox.bounds

        nexus_tiles_spark = [(self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon) for x, tile in
                             enumerate(self._tile_service.find_tiles_in_box(min_lat, max_lat, min_lon, max_lon,
                                                                            ds, start_time, end_time,
                                                                            fetch_data=False))]

        print ("Got {} tiles".format(len(nexus_tiles_spark)))
        if len(nexus_tiles_spark) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        results = spark_driver(self._sc, self._latlon, nexus_tiles_spark)

        results = filter(None, results)
        results = sorted(results, key=lambda entry: entry["time"])
        for i in range(len(results)):
            results[i]['lons'] = sorted(results[i]['lons'],
                                        key=lambda entry: entry['longitude'])

        # Deseason disabled. See SDAP-148
        # results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results, compute_options=None, type=HoffMoellerResults.LONGITUDE,
                                    minLat=min_lat, maxLat=max_lat, minLon=min_lon,
                                    maxLon=max_lon, ds=ds, startTime=start_time, endTime=end_time)
        return result

Example #2

Show file

    def calc(self, compute_options, **args):
        ds, bbox, start_time, end_time, normalize_dates = self.parse_arguments(
            compute_options)

        metrics_record = self._create_metrics_record()
        calculation_start = datetime.now()

        min_lon, min_lat, max_lon, max_lat = bbox.bounds

        nexus_tiles_spark = [
            (self._latlon, tile.tile_id, x, min_lat, max_lat, min_lon, max_lon)
            for x, tile in enumerate(self._get_tile_service(
            ).find_tiles_in_box(min_lat,
                                max_lat,
                                min_lon,
                                max_lon,
                                ds,
                                start_time,
                                end_time,
                                metrics_callback=metrics_record.record_metrics,
                                fetch_data=False))
        ]

        print(("Got {} tiles".format(len(nexus_tiles_spark))))
        if len(nexus_tiles_spark) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        results = spark_driver(self._sc, self._latlon,
                               self._tile_service_factory, nexus_tiles_spark,
                               metrics_record.record_metrics, normalize_dates)

        results = [_f for _f in results if _f]
        results = sorted(results, key=lambda entry: entry["time"])
        for i in range(len(results)):
            results[i]['lons'] = sorted(results[i]['lons'],
                                        key=lambda entry: entry['longitude'])

        # Deseason disabled. See SDAP-148
        # results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results,
                                    compute_options=None,
                                    type=HoffMoellerResults.LONGITUDE,
                                    minLat=min_lat,
                                    maxLat=max_lat,
                                    minLon=min_lon,
                                    maxLon=max_lon,
                                    ds=ds,
                                    startTime=start_time,
                                    endTime=end_time)

        duration = (datetime.now() - calculation_start).total_seconds()
        metrics_record.record_metrics(actual_time=duration)
        metrics_record.print_metrics(self.log)

        return result

Example #3

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self,
                                              min_lat,
                                              max_lat,
                                              min_lon,
                                              max_lon,
                                              ds,
                                              start_time=0,
                                              end_time=-1,
                                              applySeasonalFilter=True,
                                              applyLowPass=True,
                                              fill=-9999.,
                                              spark_master="local[1]",
                                              spark_nexecs=1,
                                              spark_nparts=1):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)

        ndays = len(daysinrange)
        if ndays == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} days in range'.format(ndays))
        for i, d in enumerate(daysinrange):
            self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))
        spark_nparts_needed = min(spark_nparts, ndays)
        nexus_tiles_spark = [(min_lat, max_lat, min_lon, max_lon, ds,
                              list(daysinrange_part), fill)
                             for daysinrange_part in np.array_split(
                                 daysinrange, spark_nparts_needed)]

        # Launch Spark computations
        rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts_needed)
        results = rdd.map(TimeSeriesCalculator.calc_average_on_day).collect()
        #
        results = list(itertools.chain.from_iterable(results))
        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results,
                                    'mean',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'max',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results,
                                    'min',
                                    applySeasonal=applySeasonalFilter,
                                    applyLowPass=applyLowPass)

        self._create_nc_file_time1d(np.array(results),
                                    'ts.nc',
                                    'mean',
                                    fill=-9999.)
        return results, {}

Example #4

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, end_time=-1,
                                              applySeasonalFilter=True, applyLowPass=True):

        daysinrange = self._tile_service.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time,
                                                                end_time)

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    self.log.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results, 'mean', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'max', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'min', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)

        return results, {}

Example #5

Show file

    def calc(self, computeOptions, **args):
        """

        :param computeOptions: StatsComputeOptions
        :param args: dict
        :return:
        """

        self._minLat = float(computeOptions.get_min_lat())
        self._maxLat = float(computeOptions.get_max_lat())
        self._minLon = float(computeOptions.get_min_lon())
        self._maxLon = float(computeOptions.get_max_lon())
        self._ds = computeOptions.get_dataset()[0]
        self._startTime = computeOptions.get_start_time()
        self._endTime = computeOptions.get_end_time()

        self._find_native_resolution()
        print 'Using Native resolution: lat_res=%f, lon_res=%f' % (
            self._latRes, self._lonRes)
        self._minLatCent = self._minLat + self._latRes / 2
        self._minLonCent = self._minLon + self._lonRes / 2
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self._maxLatCent = self._minLatCent + (nlats - 1) * self._latRes
        self._maxLonCent = self._minLonCent + (nlons - 1) * self._lonRes
        print 'nlats=', nlats, 'nlons=', nlons
        print 'center lat range = %f to %f' % (self._minLatCent,
                                               self._maxLatCent)
        print 'center lon range = %f to %f' % (self._minLonCent,
                                               self._maxLonCent)
        sys.stdout.flush()
        a = np.zeros((nlats, nlons), dtype=np.float64, order='C')

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        print 'Initially found %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        self._prune_tiles(nexus_tiles)
        print 'Pruned to %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        #for tile in nexus_tiles:
        #    print 'lats: ', tile.latitudes.compressed()
        #    print 'lons: ', tile.longitudes.compressed()

        avg_tiles = map(self._map, nexus_tiles)
        print 'shape a = ', a.shape
        sys.stdout.flush()
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        for tile in avg_tiles:
            if tile is not None:
                (tile_data, tile_min_lat, tile_max_lat, tile_min_lon,
                 tile_max_lon) = tile
                print 'shape tile_data = ', tile_data.shape
                print 'tile data mask = ', tile_data.mask
                sys.stdout.flush()
                if np.any(np.logical_not(tile_data.mask)):
                    y0 = self._lat2ind(tile_min_lat)
                    y1 = self._lat2ind(tile_max_lat)
                    x0 = self._lon2ind(tile_min_lon)
                    x1 = self._lon2ind(tile_max_lon)
                    print 'writing tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                        (tile_min_lat, tile_max_lat,
                         tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                    sys.stdout.flush()
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                else:
                    print 'All pixels masked in tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                        (tile_min_lat, tile_max_lat,
                         tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                    sys.stdout.flush()

        self._create_nc_file(a)

        return TimeAvgMapResults(results={},
                                 meta={},
                                 computeOptions=computeOptions)

Example #6

Show file

File: LongitudeLatitudeMap.py Project: hdfeos/nexus

    def calc(self, computeOptions, **args):
        minLat = computeOptions.get_min_lat()
        maxLat = computeOptions.get_max_lat()
        minLon = computeOptions.get_min_lon()
        maxLon = computeOptions.get_max_lon()
        ds = computeOptions.get_dataset()[0]
        startTime = computeOptions.get_start_time()
        endTime = computeOptions.get_end_time()
        maskLimitType = computeOptions.get_mask_type()

        chunks, meta = self.getChunksForBox(minLat,
                                            maxLat,
                                            minLon,
                                            maxLon,
                                            ds,
                                            startTime=startTime,
                                            endTime=endTime)

        if len(chunks) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        masker = LandMaskChecker(self._landmask, maskLimitType)
        a = self._allocateArray(int(math.ceil(maxLat - minLat)),
                                int(math.ceil(maxLon - minLon)))
        lat = minLat
        y = 0
        x = 0
        while lat < maxLat:
            lon = minLon
            x = 0
            while lon < maxLon:

                values = []
                # for t in range(0, len(chunks)):
                for n in chunks:

                    chunk = chunks[n]
                    value = chunk.getValueForLatLon(lat, lon)
                    lm = chunk.getLandmaskForLatLon(lat, lon)
                    if lm == 1.0 and value != 32767.0 and not masker.isLatLonMasked(
                            lat, lon):
                        values.append(value)

                if len(values) > 0:
                    avg = np.average(values)
                    min = np.min(values)
                    max = np.max(values)
                    std = np.std(values)
                    cnt = len(values)

                    xi = range(0, len(values))
                    slope, intercept, r_value, p_value, std_err = stats.linregress(
                        xi, values)

                else:
                    avg, min, max, std, cnt = (0, 0, 0, 0, 0)
                    slope, intercept, r_value, p_value, std_err = (0, 0, 0, 0,
                                                                   0)

                avg = 0.0 if not self._validNumber(float(avg)) else float(avg)
                min = 0.0 if not self._validNumber(float(min)) else float(min)
                max = 0.0 if not self._validNumber(float(max)) else float(max)
                std = 0.0 if not self._validNumber(float(std)) else float(std)
                cnt = 0.0 if not self._validNumber(float(cnt)) else float(cnt)
                slope = 0.0 if not self._validNumber(
                    float(slope)) else float(slope)
                intercept = 0.0 if not self._validNumber(
                    float(intercept)) else float(intercept)
                r_value = 0.0 if not self._validNumber(
                    float(r_value)) else float(r_value)
                p_value = 0.0 if not self._validNumber(
                    float(p_value)) else float(p_value)
                std_err = 0.0 if not self._validNumber(
                    float(std_err)) else float(std_err)

                a[y][x] = {
                    'avg': avg,
                    'min': min,
                    'max': max,
                    'std': std,
                    'cnt': cnt,
                    'slope': slope,
                    'intercept': intercept,
                    'r': r_value,
                    'p': p_value,
                    'stderr': std_err,
                    'lat': float(lat),
                    'lon': float(lon)
                }

                lon = lon + 1
                x = x + 1
            lat = lat + 1
            y = y + 1

        return LongitudeLatitudeMapResults(results=a,
                                           meta=meta,
                                           computeOptions=computeOptions)

Example #7

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            logger.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        logger.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}

Example #8

Show file

    def calc(self, computeOptions, **args):

        spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg(
        )
        self._setQueryParams(computeOptions.get_dataset(),
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time(),
                             spark_master=spark_master,
                             spark_nexecs=spark_nexecs,
                             spark_nparts=spark_nparts)

        self.log.debug('ds = {0}'.format(self._ds))
        if not len(self._ds) == 2:
            raise NexusProcessingException(
                reason=
                "Requires two datasets for comparison. Specify request parameter ds=Dataset_1,Dataset_2",
                code=400)
        if next(iter([clim for clim in self._ds if 'CLIM' in clim]), False):
            raise NexusProcessingException(
                reason="Cannot compute correlation on a climatology", code=400)

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))
        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self.log.debug('nlats={0}, nlons={1}'.format(nlats, nlons))

        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]

        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        num_time_parts = 72
        # num_time_parts = 2
        # num_time_parts = 1
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(
            len(nexus_tiles_spark)))

        # Set the time boundaries for each of the Spark map tuples.
        # Every Nth element in the array gets the same time bounds.
        spark_part_times = np.linspace(self._startTime,
                                       self._endTime + 1,
                                       num_time_parts + 1,
                                       dtype=np.int64)

        spark_part_time_ranges = \
            np.repeat([[[spark_part_times[i],
                         spark_part_times[i + 1] - 1] for i in range(num_time_parts)]],
                      len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
        self.log.debug(
            'spark_part_time_ranges={0}'.format(spark_part_time_ranges))
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
        # print 'nexus_tiles_spark final = '
        # for i in range(len(nexus_tiles_spark)):
        #    print nexus_tiles_spark[i]

        # Launch Spark computations
        # print 'nexus_tiles_spark=',nexus_tiles_spark
        rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
        sum_tiles_part = rdd.map(self._map)
        # print "sum_tiles_part = ",sum_tiles_part.collect()
        sum_tiles = \
            sum_tiles_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1],
                                                        x[2] + val[2],
                                                        x[3] + val[3],
                                                        x[4] + val[4],
                                                        x[5] + val[5]),
                                        lambda x, y: (x[0] + y[0],
                                                      x[1] + y[1],
                                                      x[2] + y[2],
                                                      x[3] + y[3],
                                                      x[4] + y[4],
                                                      x[5] + y[5]))
        # Convert the N (pixel-wise count) array for each tile to be a
        # NumPy masked array.  That is the last array in the tuple of
        # intermediate summation arrays.  Set mask to True if count is 0.
        sum_tiles = \
            sum_tiles.map(lambda (bounds, (sum_x, sum_y, sum_xx,
            sum_yy, sum_xy, n)):
                          (bounds, (sum_x, sum_y, sum_xx, sum_yy, sum_xy,
                                    np.ma.array(n,
                                                mask=~(n.astype(bool))))))

        # print 'sum_tiles = ',sum_tiles.collect()

        # For each pixel in each tile compute an array of Pearson
        # correlation coefficients.  The map function is called once
        # per tile.  The result of this map operation is a list of 3-tuples of
        # (bounds, r, n) for each tile (r=Pearson correlation coefficient
        # and n=number of input values that went into each pixel with
        # any masked values not included).
        corr_tiles = \
            sum_tiles.map(lambda (bounds, (sum_x, sum_y, sum_xx, sum_yy,
            sum_xy, n)):
                          (bounds,
                           np.ma.array(((sum_xy - sum_x * sum_y / n) /
                                        np.sqrt((sum_xx - sum_x * sum_x / n) *
                                                (sum_yy - sum_y * sum_y / n))),
                                       mask=~(n.astype(bool))),
                           n)).collect()

        r = np.zeros((nlats, nlons), dtype=np.float64, order='C')
        n = np.zeros((nlats, nlons), dtype=np.uint32, order='C')

        # The tiles below are NOT Nexus objects.  They are tuples
        # with the following for each correlation map subset:
        # (1) lat-lon bounding box, (2) array of correlation r values,
        # and (3) array of count n values.
        for tile in corr_tiles:
            ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
             tile_data, tile_cnt) = tile
            y0 = self._lat2ind(tile_min_lat)
            y1 = self._lat2ind(tile_max_lat)
            x0 = self._lon2ind(tile_min_lon)
            x1 = self._lon2ind(tile_max_lon)
            self.log.debug(
                'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon,
                        y0, y1, x0, x1))
            r[y0:y1 + 1, x0:x1 + 1] = tile_data
            n[y0:y1 + 1, x0:x1 + 1] = tile_cnt

        # Store global map in a NetCDF file.
        self._create_nc_file(r, 'corrmap.nc', 'r')

        # Create dict for JSON response
        results = [[{
            'r': r[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(r.shape[1])] for y in range(r.shape[0])]

        return CorrelationResults(results)

Example #9

Show file

File: TimeAvgMapSpark.py Project: lfcma/nexus

    def calc(self, computeOptions, **args):
        """

        :param computeOptions: StatsComputeOptions
        :param args: dict
        :return:
        """

        spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg(
        )
        self._setQueryParams(computeOptions.get_dataset()[0],
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time(),
                             spark_master=spark_master,
                             spark_nexecs=spark_nexecs,
                             spark_nparts=spark_nparts)

        if 'CLIM' in self._ds:
            raise NexusProcessingException(
                reason=
                "Cannot compute Latitude/Longitude Time Average plot on a climatology",
                code=400)

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))

        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self.log.debug('nlats={0}, nlons={1}'.format(nlats, nlons))
        self.log.debug('center lat range = {0} to {1}'.format(
            self._minLatCent, self._maxLatCent))
        self.log.debug('center lon range = {0} to {1}'.format(
            self._minLonCent, self._maxLonCent))

        # for tile in nexus_tiles:
        #    print 'lats: ', tile.latitudes.compressed()
        #    print 'lons: ', tile.longitudes.compressed()
        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]
        # print 'nexus_tiles_spark = ', nexus_tiles_spark
        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        num_time_parts = 72
        # num_time_parts = 1
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(
            len(nexus_tiles_spark)))

        # Set the time boundaries for each of the Spark map tuples.
        # Every Nth element in the array gets the same time bounds.
        spark_part_times = np.linspace(self._startTime,
                                       self._endTime,
                                       num_time_parts + 1,
                                       dtype=np.int64)

        spark_part_time_ranges = \
            np.repeat([[[spark_part_times[i],
                         spark_part_times[i + 1]] for i in range(num_time_parts)]],
                      len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
        self.log.debug(
            'spark_part_time_ranges={0}'.format(spark_part_time_ranges))
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
        # print 'nexus_tiles_spark final = '
        # for i in range(len(nexus_tiles_spark)):
        #    print nexus_tiles_spark[i]

        # Launch Spark computations
        rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
        sum_count_part = rdd.map(self._map)
        sum_count = \
            sum_count_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1]),
                                        lambda x, y: (x[0] + y[0], x[1] + y[1]))
        fill = self._fill
        avg_tiles = \
            sum_count.map(lambda (bounds, (sum_tile, cnt_tile)):
                          (bounds, [[{'avg': (sum_tile[y, x] / cnt_tile[y, x])
                          if (cnt_tile[y, x] > 0)
                          else fill,
                                      'cnt': cnt_tile[y, x]}
                                     for x in
                                     range(sum_tile.shape[1])]
                                    for y in
                                    range(sum_tile.shape[0])])).collect()

        # Combine subset results to produce global map.
        #
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        a = np.zeros((nlats, nlons), dtype=np.float64, order='C')
        n = np.zeros((nlats, nlons), dtype=np.uint32, order='C')
        for tile in avg_tiles:
            if tile is not None:
                ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
                 tile_stats) = tile
                tile_data = np.ma.array([[
                    tile_stats[y][x]['avg'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_cnt = np.array([[
                    tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_data.mask = ~(tile_cnt.astype(bool))
                y0 = self._lat2ind(tile_min_lat)
                y1 = y0 + tile_data.shape[0] - 1
                x0 = self._lon2ind(tile_min_lon)
                x1 = x0 + tile_data.shape[1] - 1
                if np.any(np.logical_not(tile_data.mask)):
                    self.log.debug(
                        'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                    n[y0:y1 + 1, x0:x1 + 1] = tile_cnt
                else:
                    self.log.debug(
                        'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))

        # Store global map in a NetCDF file.
        self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill)

        # Create dict for JSON response
        results = [[{
            'avg': a[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(a.shape[1])] for y in range(a.shape[0])]

        return TimeAvgMapSparkResults(results=results,
                                      meta={},
                                      computeOptions=computeOptions)

Example #10

Show file

    def calc(self, compute_options, **args):
        """

        :param compute_options: StatsComputeOptions
        :param args: dict
        :return:
        """
        request_start_time = datetime.now()

        metrics_record = self._create_metrics_record()

        ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments(
            compute_options)
        self._setQueryParams(ds, (float(bbox.bounds[1]), float(
            bbox.bounds[3]), float(bbox.bounds[0]), float(bbox.bounds[2])),
                             start_time, end_time)

        nexus_tiles = self._find_global_tile_set(
            metrics_callback=metrics_record.record_metrics)

        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))
        print('Found {} tiles'.format(len(nexus_tiles)))

        daysinrange = self._get_tile_service().find_days_in_range_asc(
            bbox.bounds[1],
            bbox.bounds[3],
            bbox.bounds[0],
            bbox.bounds[2],
            ds,
            start_time,
            end_time,
            metrics_callback=metrics_record.record_metrics)
        ndays = len(daysinrange)
        if ndays == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")
        self.log.debug('Found {0} days in range'.format(ndays))
        for i, d in enumerate(daysinrange):
            self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))

        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons))
        self.log.debug('center lat range = {0} to {1}'.format(
            self._minLatCent, self._maxLatCent))
        self.log.debug('center lon range = {0} to {1}'.format(
            self._minLonCent, self._maxLonCent))

        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]

        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        # Set the time boundaries for each of the Spark map tuples so that
        # every Nth element in the array gets the same time bounds.
        max_time_parts = 72
        num_time_parts = min(max_time_parts, ndays)

        spark_part_time_ranges = np.tile(
            np.array([
                a[[0, -1]]
                for a in np.array_split(np.array(daysinrange), num_time_parts)
            ]), (len(nexus_tiles_spark), 1))
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges

        # Launch Spark computations
        spark_nparts = self._spark_nparts(nparts_requested)
        self.log.info('Using {} partitions'.format(spark_nparts))

        rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)
        metrics_record.record_metrics(partitions=rdd.getNumPartitions())
        sum_count_part = rdd.map(
            partial(self._map, self._tile_service_factory,
                    metrics_record.record_metrics))
        reduce_duration = 0
        reduce_start = datetime.now()
        sum_count = sum_count_part.combineByKey(
            lambda val: val, lambda x, val: (x[0] + val[0], x[1] + val[1]),
            lambda x, y: (x[0] + y[0], x[1] + y[1]))
        reduce_duration += (datetime.now() - reduce_start).total_seconds()
        avg_tiles = sum_count.map(
            partial(calculate_means, metrics_record.record_metrics,
                    self._fill)).collect()

        reduce_start = datetime.now()
        # Combine subset results to produce global map.
        #
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        a = np.zeros((self._nlats, self._nlons), dtype=np.float64, order='C')
        n = np.zeros((self._nlats, self._nlons), dtype=np.uint32, order='C')
        for tile in avg_tiles:
            if tile is not None:
                ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
                 tile_stats) = tile
                tile_data = np.ma.array([[
                    tile_stats[y][x]['avg'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_cnt = np.array([[
                    tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_data.mask = ~(tile_cnt.astype(bool))
                y0 = self._lat2ind(tile_min_lat)
                y1 = y0 + tile_data.shape[0] - 1
                x0 = self._lon2ind(tile_min_lon)
                x1 = x0 + tile_data.shape[1] - 1
                if np.any(np.logical_not(tile_data.mask)):
                    self.log.debug(
                        'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                    n[y0:y1 + 1, x0:x1 + 1] = tile_cnt
                else:
                    self.log.debug(
                        'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))

        # Store global map in a NetCDF file for debugging purpose
        # if activated this line is not thread safe and might cause error when concurrent access occurs
        # self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill)

        # Create dict for JSON response
        results = [[{
            'mean': a[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(a.shape[1])] for y in range(a.shape[0])]

        total_duration = (datetime.now() - request_start_time).total_seconds()
        metrics_record.record_metrics(actual_time=total_duration,
                                      reduce=reduce_duration)
        metrics_record.print_metrics(self.log)

        return NexusResults(results=results,
                            meta={},
                            stats=None,
                            computeOptions=None,
                            minLat=bbox.bounds[1],
                            maxLat=bbox.bounds[3],
                            minLon=bbox.bounds[0],
                            maxLon=bbox.bounds[2],
                            ds=ds,
                            startTime=start_time,
                            endTime=end_time)

Example #11

Show file

File: TimeSeriesSpark.py Project: wphyojpl/incubator-sdap-nexus

    def calc(self, request, **args):
        """

        :param request: StatsComputeOptions
        :param args: dict
        :return:
        """
        start_time = datetime.now()
        ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested, normalize_dates = self.parse_arguments(
            request)
        metrics_record = self._create_metrics_record()

        resultsRaw = []

        for shortName in ds:

            the_time = datetime.now()
            daysinrange = self._get_tile_service().find_days_in_range_asc(
                bounding_polygon.bounds[1],
                bounding_polygon.bounds[3],
                bounding_polygon.bounds[0],
                bounding_polygon.bounds[2],
                shortName,
                start_seconds_from_epoch,
                end_seconds_from_epoch,
                metrics_callback=metrics_record.record_metrics)
            self.log.info("Finding days in range took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            ndays = len(daysinrange)
            if ndays == 0:
                raise NoDataException(
                    reason="No data found for selected timeframe")

            self.log.debug('Found {0} days in range'.format(ndays))
            for i, d in enumerate(daysinrange):
                self.log.debug('{0}, {1}'.format(i,
                                                 datetime.utcfromtimestamp(d)))
            spark_nparts = self._spark_nparts(nparts_requested)
            self.log.info('Using {} partitions'.format(spark_nparts))
            results, meta = spark_driver(daysinrange,
                                         bounding_polygon,
                                         shortName,
                                         self._tile_service_factory,
                                         metrics_record.record_metrics,
                                         normalize_dates,
                                         spark_nparts=spark_nparts,
                                         sc=self._sc)

            if apply_seasonal_cycle_filter:
                the_time = datetime.now()
                # get time series for _clim dataset
                shortName_clim = shortName + "_clim"
                daysinrange_clim = self._get_tile_service(
                ).find_days_in_range_asc(
                    bounding_polygon.bounds[1],
                    bounding_polygon.bounds[3],
                    bounding_polygon.bounds[0],
                    bounding_polygon.bounds[2],
                    shortName_clim,
                    0,
                    SECONDS_IN_ONE_YEAR,
                    metrics_callback=metrics_record.record_metrics)
                if len(daysinrange_clim) == 0:
                    raise NexusProcessingException(
                        reason=
                        "There is no climatology data present for dataset " +
                        shortName + ".")
                results_clim, _ = spark_driver(daysinrange_clim,
                                               bounding_polygon,
                                               shortName_clim,
                                               self._tile_service_factory,
                                               metrics_record.record_metrics,
                                               normalize_dates=False,
                                               spark_nparts=spark_nparts,
                                               sc=self._sc)
                clim_indexed_by_month = {
                    datetime.utcfromtimestamp(result['time']).month: result
                    for result in results_clim
                }
                if len(clim_indexed_by_month) < 12:
                    raise NexusProcessingException(
                        reason="There are only " + len(clim_indexed_by_month) +
                        " months of climatology data for dataset " +
                        shortName +
                        ". A full year of climatology data is required for computing deseasoned timeseries."
                    )

                for result in results:
                    month = datetime.utcfromtimestamp(result['time']).month

                    result['meanSeasonal'] = result[
                        'mean'] - clim_indexed_by_month[month]['mean']
                    result['minSeasonal'] = result[
                        'min'] - clim_indexed_by_month[month]['min']
                    result['maxSeasonal'] = result[
                        'max'] - clim_indexed_by_month[month]['max']
                self.log.info("Seasonal calculation took %s for dataset %s" %
                              (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            filtering.applyAllFiltersOnField(
                results,
                'mean',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'max',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'min',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)

            if apply_seasonal_cycle_filter and apply_low_pass_filter:
                try:
                    filtering.applyFiltersOnField(results,
                                                  'meanSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'minSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'maxSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                except Exception as e:
                    # If it doesn't work log the error but ignore it
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error calculating SeasonalLowPass filter:\n%s" % tb)

            resultsRaw.append([results, meta])
            self.log.info("LowPass filter calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            self._create_nc_file_time1d(np.array(results),
                                        'ts.nc',
                                        'mean',
                                        fill=-9999.)
            self.log.info("NetCDF generation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

        the_time = datetime.now()
        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = TimeSeriesSparkHandlerImpl.calculate_comparison_stats(
                    results)
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=None,
                                minLat=bounding_polygon.bounds[1],
                                maxLat=bounding_polygon.bounds[3],
                                minLon=bounding_polygon.bounds[0],
                                maxLon=bounding_polygon.bounds[2],
                                ds=ds,
                                startTime=start_seconds_from_epoch,
                                endTime=end_seconds_from_epoch)

        total_duration = (datetime.now() - start_time).total_seconds()
        metrics_record.record_metrics(actual_time=total_duration)
        metrics_record.print_metrics(logger)

        self.log.info("Merging results and calculating comparisons took %s" %
                      (str(datetime.now() - the_time)))
        return res

Example #12

Show file

    def calc(self, compute_options, **args):
        """

        :param compute_options: StatsComputeOptions
        :param args: dict
        :return:
        """

        ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments(
            compute_options)
        self._setQueryParams(ds, (float(bbox.bounds[1]), float(
            bbox.bounds[3]), float(bbox.bounds[0]), float(bbox.bounds[2])),
                             start_time, end_time)

        nexus_tiles = self._find_global_tile_set()

        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))
        print('Found {} tiles'.format(len(nexus_tiles)))

        daysinrange = self._tile_service.find_days_in_range_asc(
            bbox.bounds[1], bbox.bounds[3], bbox.bounds[0], bbox.bounds[2], ds,
            start_time, end_time)
        ndays = len(daysinrange)
        if ndays == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")
        self.log.debug('Found {0} days in range'.format(ndays))
        for i, d in enumerate(daysinrange):
            self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))

        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons))
        self.log.debug('center lat range = {0} to {1}'.format(
            self._minLatCent, self._maxLatCent))
        self.log.debug('center lon range = {0} to {1}'.format(
            self._minLonCent, self._maxLonCent))

        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]

        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        # Set the time boundaries for each of the Spark map tuples so that
        # every Nth element in the array gets the same time bounds.
        max_time_parts = 72
        num_time_parts = min(max_time_parts, ndays)

        spark_part_time_ranges = np.tile(
            np.array([
                a[[0, -1]]
                for a in np.array_split(np.array(daysinrange), num_time_parts)
            ]), (len(nexus_tiles_spark), 1))
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges

        # Launch Spark computations to calculate x_bar
        spark_nparts = self._spark_nparts(nparts_requested)
        self.log.info('Using {} partitions'.format(spark_nparts))

        rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)
        sum_count_part = rdd.map(self._map)
        sum_count = \
            sum_count_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1]),
                                        lambda x, y: (x[0] + y[0], x[1] + y[1]))
        fill = self._fill
        avg_tiles = \
            sum_count.map(lambda (bounds, (sum_tile, cnt_tile)):
                          (bounds, [[(sum_tile[y, x] / cnt_tile[y, x])
                          if (cnt_tile[y, x] > 0)
                          else fill
                                     for x in
                                     range(sum_tile.shape[1])]
                                    for y in
                                    range(sum_tile.shape[0])])).collect()

        #
        # Launch a second parallel computation to calculate variance from x_bar
        #

        # Create array of tuples to pass to Spark map function - first param are the tile bounds that were in the
        # results and the last param is the data for the results (x bar)
        nexus_tiles_spark = [[
            t[0], self._startTime, self._endTime, self._ds, t[1]
        ] for t in avg_tiles]

        self.log.info('Using {} partitions'.format(spark_nparts))
        rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)

        anomaly_squared_part = rdd.map(self._calc_variance)
        anomaly_squared = \
            anomaly_squared_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1]),
                                        lambda x, y: (x[0] + y[0], x[1] + y[1]))

        variance_tiles = \
            anomaly_squared.map(lambda (bounds, (anomaly_squared_tile, cnt_tile)):
                               (bounds, [[{'variance': (anomaly_squared_tile[y, x] / cnt_tile[y, x])
                               if (cnt_tile[y, x] > 0)
                               else fill,
                                           'cnt': cnt_tile[y, x]}
                                       for x in range(anomaly_squared_tile.shape[1])]
                                       for y in range(anomaly_squared_tile.shape[0])])).collect()

        # Combine subset results to produce global map.
        #
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        a = np.zeros((self._nlats, self._nlons), dtype=np.float64, order='C')
        n = np.zeros((self._nlats, self._nlons), dtype=np.uint32, order='C')
        for tile in variance_tiles:
            if tile is not None:
                ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
                 tile_stats) = tile
                tile_data = np.ma.array([[
                    tile_stats[y][x]['variance']
                    for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_cnt = np.array([[
                    tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_data.mask = ~(tile_cnt.astype(bool))
                y0 = self._lat2ind(tile_min_lat)
                y1 = y0 + tile_data.shape[0] - 1
                x0 = self._lon2ind(tile_min_lon)
                x1 = x0 + tile_data.shape[1] - 1
                if np.any(np.logical_not(tile_data.mask)):
                    self.log.debug(
                        'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                    n[y0:y1 + 1, x0:x1 + 1] = tile_cnt
                else:
                    self.log.debug(
                        'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))

        # Store global map in a NetCDF file.
        self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill)

        # Create dict for JSON response
        results = [[{
            'variance': a[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(a.shape[1])] for y in range(a.shape[0])]

        return NexusResults(results=results,
                            meta={},
                            stats=None,
                            computeOptions=None,
                            minLat=bbox.bounds[1],
                            maxLat=bbox.bounds[3],
                            minLon=bbox.bounds[0],
                            maxLon=bbox.bounds[2],
                            ds=ds,
                            startTime=start_time,
                            endTime=end_time)

Example #13

Show file

    def getTimeSeriesStatsForBoxSingleDataSet(self,
                                              min_lat,
                                              max_lat,
                                              min_lon,
                                              max_lon,
                                              ds,
                                              start_time=0,
                                              end_time=-1,
                                              applySeasonalFilter=False,
                                              applyLowPass=False):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time)

        if len(daysinrange) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        print 'Found %d days in range' % len(daysinrange)

        cwd = os.getcwd()

        # Configure Spark
        sp_conf = SparkConf()
        sp_conf.setAppName("Spark Time Avg Map")
        sp_conf.set("spark.executorEnv.HOME",
                    os.path.join(os.getenv('HOME'), 'spark_exec_home'))
        sp_conf.set("spark.executorEnv.PYTHONPATH", cwd)
        #sp_conf.set("spark.yarn.executor.memoryOverhead", "4000")
        sp_conf.set("spark.executor.memory", "4g")

        #num_parts = 1
        #num_parts = 16
        #num_parts = 32
        #num_parts = 64
        num_parts = 128
        #num_execs = 1
        #num_execs = 16
        #num_execs = 32
        num_execs = 64
        cores_per_exec = 1
        sp_conf.setMaster("yarn-client")
        #sp_conf.setMaster("local[16]")
        #sp_conf.setMaster("local[1]")
        sp_conf.set("spark.executor.instances", num_execs)
        sp_conf.set("spark.executor.cores", cores_per_exec)

        #print sp_conf.getAll()
        sc = SparkContext(conf=sp_conf)

        nexus_tiles_spark = [
            (min_lat, max_lat, min_lon, max_lon, ds, list(daysinrange_part),
             cwd)
            for daysinrange_part in np.array_split(daysinrange, num_parts)
        ]

        #for tile in nexus_tiles_spark:
        #    print tile

        # Launch Spark computations
        rdd = sc.parallelize(nexus_tiles_spark, num_parts)
        results = rdd.map(TimeSeriesCalculator.calc_average_on_day).collect()
        #
        results = list(itertools.chain.from_iterable(results))
        results = sorted(results, key=lambda entry: entry["time"])

        #filt.applyAllFiltersOnField(results, 'mean', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        #filt.applyAllFiltersOnField(results, 'max', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        #filt.applyAllFiltersOnField(results, 'min', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)

        self._create_nc_file_time1d(np.array(results), 'ts.nc', 'mean')
        return results, {}

Example #14

Show file

    def calc(self, request, **args):
        """
    
        :param request: StatsComputeOptions
        :param args: dict
        :return:
        """

        ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested = self.parse_arguments(
            request)

        resultsRaw = []

        for shortName in ds:

            the_time = datetime.now()
            daysinrange = self._tile_service.find_days_in_range_asc(
                bounding_polygon.bounds[1], bounding_polygon.bounds[3],
                bounding_polygon.bounds[0], bounding_polygon.bounds[2],
                shortName, start_seconds_from_epoch, end_seconds_from_epoch)
            self.log.info("Finding days in range took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            ndays = len(daysinrange)
            if ndays == 0:
                raise NoDataException(
                    reason="No data found for selected timeframe")

            self.log.debug('Found {0} days in range'.format(ndays))
            for i, d in enumerate(daysinrange):
                self.log.debug('{0}, {1}'.format(i,
                                                 datetime.utcfromtimestamp(d)))
            spark_nparts = self._spark_nparts(nparts_requested)
            self.log.info('Using {} partitions'.format(spark_nparts))
            the_time = datetime.now()
            results, meta = spark_driver(daysinrange,
                                         bounding_polygon,
                                         shortName,
                                         spark_nparts=spark_nparts,
                                         sc=self._sc)
            self.log.info("Time series calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            if apply_seasonal_cycle_filter:
                the_time = datetime.now()
                for result in results:
                    month = datetime.utcfromtimestamp(result['time']).month
                    month_mean, month_max, month_min = self.calculate_monthly_average(
                        month, bounding_polygon.wkt, shortName)
                    seasonal_mean = result['mean'] - month_mean
                    seasonal_min = result['min'] - month_min
                    seasonal_max = result['max'] - month_max
                    result['meanSeasonal'] = seasonal_mean
                    result['minSeasonal'] = seasonal_min
                    result['maxSeasonal'] = seasonal_max
                self.log.info("Seasonal calculation took %s for dataset %s" %
                              (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            filtering.applyAllFiltersOnField(
                results,
                'mean',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'max',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'min',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)

            if apply_seasonal_cycle_filter and apply_low_pass_filter:
                try:
                    filtering.applyFiltersOnField(results,
                                                  'meanSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'minSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'maxSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                except Exception as e:
                    # If it doesn't work log the error but ignore it
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error calculating SeasonalLowPass filter:\n%s" % tb)

            resultsRaw.append([results, meta])
            self.log.info("LowPass filter calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            self._create_nc_file_time1d(np.array(results),
                                        'ts.nc',
                                        'mean',
                                        fill=-9999.)
            self.log.info("NetCDF generation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

        the_time = datetime.now()
        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = TimeSeriesHandlerImpl.calculate_comparison_stats(
                    results)
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=None,
                                minLat=bounding_polygon.bounds[1],
                                maxLat=bounding_polygon.bounds[3],
                                minLon=bounding_polygon.bounds[0],
                                maxLon=bounding_polygon.bounds[2],
                                ds=ds,
                                startTime=start_seconds_from_epoch,
                                endTime=end_seconds_from_epoch)

        self.log.info("Merging results and calculating comparisons took %s" %
                      (str(datetime.now() - the_time)))
        return res