Example #1
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")
        try:
            ds = request.get_dataset()[0]
        except:
            raise NexusProcessingException(reason="'ds' argument is required", code=400)

        try:
            longitude = float(request.get_decimal_arg("longitude", default=None))
        except:
            raise NexusProcessingException(reason="'longitude' argument is required", code=400)

        try:
            latitude = float(request.get_decimal_arg("latitude", default=None))
        except:
            raise NexusProcessingException(reason="'latitude' argument is required", code=400)

        search_datetime = request.get_datetime_arg('date', default=None)
        day_of_year = request.get_int_arg('day', default=None)
        if (search_datetime is not None and day_of_year is not None) \
                or (search_datetime is None and day_of_year is None):
            raise NexusProcessingException(
                reason="At least one of 'day' or 'date' arguments are required but not both.",
                code=400)

        if search_datetime is not None:
            day_of_year = search_datetime.timetuple().tm_yday

        return_all = request.get_boolean_arg("allInTile", default=True)

        return ds, longitude, latitude, day_of_year, return_all
Example #2
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")
        try:
            ds = request.get_dataset()[0]
        except:
            raise NexusProcessingException(reason="'ds' argument is required", code=400)

        try:
            bounding_polygon = box(request.get_min_lon(), request.get_min_lat(), request.get_max_lon(),
                                   request.get_max_lat())
        except:
            raise NexusProcessingException(
                reason="'minLon', 'minLat', 'maxLon', and 'maxLat' arguments are required.",
                code=400)

        try:
            start_time = request.get_start_datetime()
        except:
            raise NexusProcessingException(
                reason="'startTime' argument is required. Can be int value milliseconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
        except:
            raise NexusProcessingException(
                reason="'endTime' argument is required. Can be int value milliseconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
        end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())

        return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch
Example #3
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")
        try:
            bounding_polygon = request.get_bounding_polygon()
        except:
            try:
                minLat = request.get_min_lat()
                maxLat = request.get_max_lat()
                minLon = request.get_min_lon()
                maxLon = request.get_max_lon()
                bounding_polygon = Polygon([
                    (minLon, minLat),  # (west, south)
                    (maxLon, minLat),  # (east, south)
                    (maxLon, maxLat),  # (east, north)
                    (minLon, maxLat),  # (west, north)
                    (minLon, minLat)
                ])  # (west, south)
            except:
                raise NexusProcessingException(
                    reason=
                    "'b' argument or 'minLon', 'minLat', 'maxLon', and 'maxLat' arguments are required. If 'b' is used, it must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude",
                    code=400)
        dataset = request.get_argument('dataset', None)
        if dataset is None:
            dataset = request.get_argument('ds1', None)
        if dataset is None:
            raise NexusProcessingException(
                reason="'dataset' or 'ds1' argument is required", code=400)
        climatology = request.get_argument('climatology', None)
        if climatology is None:
            climatology = request.get_argument('ds2', None)
        if climatology is None:
            raise NexusProcessingException(
                reason="'climatology' or 'ds2' argument is required", code=400)

        try:
            start_time = request.get_start_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
        end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())

        plot = request.get_boolean_arg("plot", default=False)

        return bounding_polygon, dataset, climatology, start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, plot
Example #4
0
    def calc(self, computeOptions, **args):
        tiles = self._get_tile_service().get_tiles_bounded_by_box(computeOptions.get_min_lat(), computeOptions.get_max_lat(),
                                                            computeOptions.get_min_lon(), computeOptions.get_max_lon(),
                                                            computeOptions.get_dataset()[0],
                                                            computeOptions.get_start_time(),
                                                            computeOptions.get_end_time())

        if len(tiles) == 0:
            raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = LongitudeHofMoellerCalculator()
            for x, tile in enumerate(tiles):
                result = calculator.longitude_time_hofmoeller_stats(tile, x)
                results.append(result)
        else:
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for x, tile in enumerate(tiles):
                work_queue.put(
                    ('longitude_time_hofmoeller_stats', tile, x))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (LONGITUDE, work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results
            for x, tile in enumerate(tiles):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating longitude_time_hofmoeller_stats.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results, computeOptions=computeOptions, type=HoffMoellerResults.LONGITUDE)
        return result
    def do_get(self, request):
        instance = self.__clazz.instance(algorithm_config=self.__algorithm_config, sc=self.__sc)

        results = instance.calc(request)

        try:
            self.set_status(results.status_code)
        except AttributeError:
            pass

        if request.get_content_type() == ContentTypes.JSON:
            self.set_header("Content-Type", "application/json")
            try:
                self.write(results.toJson())
            except AttributeError:
                traceback.print_exc(file=sys.stdout)
                self.write(json.dumps(results, indent=4))
        elif request.get_content_type() == ContentTypes.PNG:
            self.set_header("Content-Type", "image/png")
            try:
                self.write(results.toImage())
            except AttributeError:
                traceback.print_exc(file=sys.stdout)
                raise NexusProcessingException(reason="Unable to convert results to an Image.")
        elif request.get_content_type() == ContentTypes.CSV:
            self.set_header("Content-Type", "text/csv")
            self.set_header("Content-Disposition", "filename=\"%s\"" % request.get_argument('filename', "download.csv"))
            try:
                self.write(results.toCSV())
            except:
                traceback.print_exc(file=sys.stdout)
                raise NexusProcessingException(reason="Unable to convert results to CSV.")
        elif request.get_content_type() == ContentTypes.NETCDF:
            self.set_header("Content-Type", "application/x-netcdf")
            self.set_header("Content-Disposition", "filename=\"%s\"" % request.get_argument('filename', "download.nc"))
            try:
                self.write(results.toNetCDF())
            except:
                traceback.print_exc(file=sys.stdout)
                raise NexusProcessingException(reason="Unable to convert results to NetCDF.")
        elif request.get_content_type() == ContentTypes.ZIP:
            self.set_header("Content-Type", "application/zip")
            self.set_header("Content-Disposition", "filename=\"%s\"" % request.get_argument('filename', "download.zip"))
            try:
                self.write(results.toZip())
            except:
                traceback.print_exc(file=sys.stdout)
                raise NexusProcessingException(reason="Unable to convert results to Zip.")

        return results
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")

        source_name = request.get_argument('source', None)
        if source_name is None or source_name.strip() == '':
            raise NexusProcessingException(reason="'source' argument is required", code=400)

        parameter_s = request.get_argument('parameter', None)
        if parameter_s not in ['sst', 'sss', 'wind', None]:
            raise NexusProcessingException(
                reason="Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'." % parameter_s, code=400)

        try:
            start_time = request.get_start_datetime()
            start_time = start_time.strftime("%Y-%m-%dT%H:%M:%SZ")
        except:
            raise NexusProcessingException(
                reason="'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
            end_time = end_time.strftime("%Y-%m-%dT%H:%M:%SZ")
        except:
            raise NexusProcessingException(
                reason="'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        if start_time > end_time:
            raise NexusProcessingException(
                reason="The starting time must be before the ending time. Received startTime: %s, endTime: %s" % (
                    request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)),
                code=400)

        try:
            bounding_polygon = request.get_bounding_polygon()
        except:
            raise NexusProcessingException(
                reason="'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude",
                code=400)

        depth_min = request.get_decimal_arg('depthMin', default=None)
        depth_max = request.get_decimal_arg('depthMax', default=None)

        if depth_min is not None and depth_max is not None and depth_min >= depth_max:
            raise NexusProcessingException(
                reason="Depth Min should be less than Depth Max", code=400)

        platforms = request.get_argument('platforms', None)
        if platforms is not None:
            try:
                p_validation = platforms.split(',')
                p_validation = [int(p) for p in p_validation]
                del p_validation
            except:
                raise NexusProcessingException(reason="platforms must be a comma-delimited list of integers", code=400)

        return source_name, parameter_s, start_time, end_time, bounding_polygon, depth_min, depth_max, platforms
Example #7
0
    def calc(self, computeOptions, **args):
        nexus_tiles_spark = [(tile.tile_id, x, computeOptions.get_min_lat(),
                              computeOptions.get_max_lat(),
                              computeOptions.get_min_lon(),
                              computeOptions.get_max_lon())
                             for x, tile in enumerate(
                                 self._tile_service.find_tiles_in_box(
                                     computeOptions.get_min_lat(),
                                     computeOptions.get_max_lat(),
                                     computeOptions.get_min_lon(),
                                     computeOptions.get_max_lon(),
                                     computeOptions.get_dataset()[0],
                                     computeOptions.get_start_time(),
                                     computeOptions.get_end_time(),
                                     fetch_data=False))]

        if len(nexus_tiles_spark) == 0:
            raise NexusProcessingException.NoDataException(
                reason="No data found for selected timeframe")

        # Parallelize list of tile ids
        rdd = self._sc.parallelize(
            nexus_tiles_spark, determine_parllelism(len(nexus_tiles_spark)))
        results = rdd.map(LongitudeHofMoellerCalculator.
                          longitude_time_hofmoeller_stats).collect()

        results = filter(None, results)
        results = sorted(results, key=lambda entry: entry["time"])

        results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results,
                                    computeOptions=computeOptions,
                                    type=HoffMoellerResults.LONGITUDE)
        return result
 def render(self, tornado_handler, result):
     tornado_handler.set_header("Content-Type", "application/x-netcdf")
     tornado_handler.set_header("Content-Disposition", "filename=\"%s\"" % self._request.get_argument('filename', "download.nc"))
     try:
         self.write(result.toNetCDF())
     except:
         traceback.print_exc(file=sys.stdout)
         raise NexusProcessingException(reason="Unable to convert results to NetCDF.")
Example #9
0
    def calc(self, request, **args):
        bounding_polygon, dataset, climatology, start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, plot = self.parse_arguments(
            request)

        self.log.debug("Querying for tiles in search domain")
        # Get tile ids in box
        tile_ids = [
            tile.tile_id
            for tile in self._get_tile_service().find_tiles_in_polygon(
                bounding_polygon,
                dataset,
                start_seconds_from_epoch,
                end_seconds_from_epoch,
                fetch_data=False,
                fl='id',
                sort=[
                    'tile_min_time_dt asc', 'tile_min_lon asc',
                    'tile_min_lat asc'
                ],
                rows=5000)
        ]

        # Call spark_matchup
        try:
            spark_result = spark_anomalies_driver(self._tile_service_factory,
                                                  tile_ids,
                                                  wkt.dumps(bounding_polygon),
                                                  dataset,
                                                  climatology,
                                                  sc=self._sc)
        except Exception as e:
            self.log.exception(e)
            raise NexusProcessingException(
                reason=
                "An unknown error occurred while computing average differences",
                code=500)

        average_and_std_by_day = spark_result

        min_lon, min_lat, max_lon, max_lat = bounding_polygon.bounds
        result = DDAResult(results=[[{
            'time': dayms,
            'mean': avg_std[0],
            'std': avg_std[1],
            'ds': 0
        }] for dayms, avg_std in average_and_std_by_day],
                           stats={},
                           meta=self.get_meta(dataset),
                           computeOptions=None,
                           minLat=min_lat,
                           maxLat=max_lat,
                           minLon=min_lon,
                           maxLon=max_lon,
                           ds=dataset,
                           startTime=start_seconds_from_epoch,
                           endTime=end_seconds_from_epoch)
        result.meta()['climatology'] = climatology
        return result
Example #10
0
 def render(self, tornado_handler, result):
     tornado_handler.set_header("Content-Type", "image/png")
     try:
         tornado_handler.write(result.toImage())
         tornado_handler.finish()
     except AttributeError:
         traceback.print_exc(file=sys.stdout)
         raise NexusProcessingException(
             reason="Unable to convert results to an Image.")
Example #11
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")

        try:
            ds = request.get_dataset()
            if type(ds) == list or type(ds) == tuple:
                ds = next(iter(ds))
        except:
            raise NexusProcessingException(
                reason="'ds' argument is required. Must be a string", code=400)

        # Do not allow time series on Climatology
        if next(iter([clim for clim in ds if 'CLIM' in clim]), False):
            raise NexusProcessingException(
                reason=
                "Cannot compute Latitude/Longitude Time Average plot on a climatology",
                code=400)

        west, south, east, north = request.get_bounding_box()
        bounding_polygon = shapely.geometry.Polygon([(west, south),
                                                     (east, south),
                                                     (east, north),
                                                     (west, north),
                                                     (west, south)])

        start_time = request.get_start_datetime()
        end_time = request.get_end_datetime()

        if start_time > end_time:
            raise NexusProcessingException(
                reason=
                "The starting time must be before the ending time. Received startTime: %s, endTime: %s"
                % (request.get_start_datetime().strftime(ISO_8601),
                   request.get_end_datetime().strftime(ISO_8601)),
                code=400)

        nparts_requested = request.get_nparts()

        start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
        end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())

        return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, nparts_requested
 def wrapped(*args, **kwargs1):
     try:
         with SparkHandler.SparkJobContext(self.spark_job_stack) as job_context:
             # TODO Pool and Job are forced to a 1-to-1 relationship
             calc_func.im_self._sc.setLocalProperty("spark.scheduler.pool", job_context.job_name)
             calc_func.im_self._sc.setJobGroup(job_context.job_name, "a spark job")
             return calc_func(*args, **kwargs1)
     except SparkHandler.SparkJobContext.MaxConcurrentJobsReached:
         raise NexusProcessingException(code=503,
                                        reason="Max concurrent requests reached. Please try again later.")
Example #13
0
    def getTimeSeriesStatsForBoxSingleDataSet(self, min_lat, max_lat, min_lon, max_lon, ds, start_time=0, end_time=-1,
                                              applySeasonalFilter=True, applyLowPass=True):

        daysinrange = self._tile_service.find_days_in_range_asc(min_lat, max_lat, min_lon, max_lon, ds, start_time,
                                                                end_time)

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    self.log.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])

        filt.applyAllFiltersOnField(results, 'mean', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'max', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)
        filt.applyAllFiltersOnField(results, 'min', applySeasonal=applySeasonalFilter, applyLowPass=applyLowPass)

        return results, {}
Example #14
0
 def render(self, tornado_handler, result):
     tornado_handler.set_header("Content-Type", "text/csv")
     tornado_handler.set_header(
         "Content-Disposition", "filename=\"%s\"" %
         self._request.get_argument('filename', "download.csv"))
     try:
         tornado_handler.write(result.toCSV())
         tornado_handler.finish()
     except:
         traceback.print_exc(file=sys.stdout)
         raise NexusProcessingException(
             reason="Unable to convert results to CSV.")
    def get_daily_difference_average_for_box(self, min_lat, max_lat, min_lon,
                                             max_lon, dataset1, dataset2,
                                             start_time, end_time):

        daysinrange = self._tile_service.find_days_in_range_asc(
            min_lat, max_lat, min_lon, max_lon, dataset1, start_time, end_time)

        maxprocesses = int(
            self.algorithm_config.get("multiprocessing", "maxprocesses"))

        if maxprocesses == 1:
            calculator = DailyDifferenceAverageCalculator()
            averagebyday = []
            for dayinseconds in daysinrange:
                result = calculator.calc_average_diff_on_day(
                    min_lat, max_lat, min_lon, max_lon, dataset1, dataset2,
                    dayinseconds)
                averagebyday.append((result[0], result[1]))
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_diff_on_day', min_lat, max_lat, min_lon,
                     max_lon, dataset1, dataset2, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [
                pool.apply_async(pool_worker, (work_queue, done_queue))
                for _ in xrange(0, maxprocesses)
            ]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            averagebyday = []
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                if result[0] == 'error':
                    print >> sys.stderr, result[1]
                    raise NexusProcessingException(
                        reason="Error calculating average by day.")
                rdata = result
                averagebyday.append((rdata[0], rdata[1]))

            pool.terminate()
            manager.shutdown()

        return averagebyday
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")

        try:
            ds = request.get_dataset()[0]
        except:
            raise NexusProcessingException(reason="'ds' argument is required",
                                           code=400)

        parameter_s = request.get_argument('parameter', None)
        if parameter_s not in ['sst', 'sss', 'wind', None]:
            raise NexusProcessingException(
                reason=
                "Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'."
                % parameter_s,
                code=400)

        try:
            start_time = request.get_start_datetime()
            start_time = long((start_time - EPOCH).total_seconds())
        except:
            raise NexusProcessingException(
                reason=
                "'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
            end_time = long((end_time - EPOCH).total_seconds())
        except:
            raise NexusProcessingException(
                reason=
                "'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        if start_time > end_time:
            raise NexusProcessingException(
                reason=
                "The starting time must be before the ending time. Received startTime: %s, endTime: %s"
                % (request.get_start_datetime().strftime(ISO_8601),
                   request.get_end_datetime().strftime(ISO_8601)),
                code=400)

        bounding_polygon = metadata_filter = None
        try:
            bounding_polygon = request.get_bounding_polygon()
        except:
            metadata_filter = request.get_metadata_filter()
            if 0 == len(metadata_filter):
                raise NexusProcessingException(
                    reason=
                    "'b' or 'metadataFilter' argument is required. 'b' must be comma-delimited float formatted "
                    "as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, "
                    "Maximum (Northern) Latitude. 'metadataFilter' must be in the form key:value",
                    code=400)

        return ds, parameter_s, start_time, end_time, bounding_polygon, metadata_filter
Example #17
0
def __doQuery(endpoint, startTime, endTime, bbox, depth_min=None, depth_max=None, itemsPerPage=10, startIndex=0, platforms=None,
              pageCallback=None):
    params = {"startTime": startTime, "endTime": endTime, "bbox": bbox, "itemsPerPage": itemsPerPage,
              "startIndex": startIndex, "stats": "true"}

    if depth_min is not None:
        params['minDepth'] = depth_min
    if depth_max is not None:
        params['maxDepth'] = depth_max

    if platforms is not None:
        params["platform"] = platforms.split(",")

    resultsRaw = __fetchJson(endpoint["url"], params)
    boundsConstrainer = geo.BoundsConstrainer(north=-90, south=90, west=180, east=-180)

    if resultsRaw["totalResults"] == 0 or len(resultsRaw["results"]) == 0:  # Double-sanity check
        return [], resultsRaw["totalResults"], startIndex, itemsPerPage, boundsConstrainer

    try:
        results = []
        for resultdict in resultsRaw["results"]:
            result = __resultRawToUsable(resultdict)
            result["source"] = endpoint["name"]
            boundsConstrainer.testCoords(north=result["y"], south=result["y"], west=result["x"], east=result["x"])
            results.append(result)

        if "stats_fields" in resultsRaw and len(resultsRaw["results"]) == 0:
            stats = resultsRaw["stats_fields"]
            if "lat" in stats and "lon" in stats:
                boundsConstrainer.testCoords(north=stats['lat']['max'], south=stats['lat']['min'],
                                             west=stats['lon']['min'], east=stats['lon']['max'])

        if pageCallback is not None:
            pageCallback(results)

        '''
            If pageCallback was supplied, we assume this call to be asynchronous. Otherwise combine all the results data and return it.
        '''
        if pageCallback is None:
            return results, int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int(
                resultsRaw["itemsPerPage"]), boundsConstrainer
        else:
            return [], int(resultsRaw["totalResults"]), int(resultsRaw["startIndex"]), int(
                resultsRaw["itemsPerPage"]), boundsConstrainer
    except:
        print "Invalid or missing JSON in response."
        traceback.print_exc()
        raise NexusProcessingException(reason="Invalid or missing JSON in response.")
    def calc(self, computeOptions, **args):
        execution_id = computeOptions.get_argument("id", None)

        try:
            execution_id = uuid.UUID(execution_id)
        except:
            raise NexusProcessingException(reason="'id' argument must be a valid uuid", code=400)

        simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)

        with ResultsStorage.ResultsRetrieval() as storage:
            params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results)

        return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=None,
                                                computeOptions=None, executionId=execution_id)
Example #19
0
    def calc(self, computeOptions, **args):
        tiles = self._tile_service.get_tiles_bounded_by_box(computeOptions.get_min_lat(), computeOptions.get_max_lat(),
                                                            computeOptions.get_min_lon(), computeOptions.get_max_lon(),
                                                            computeOptions.get_dataset()[0],
                                                            computeOptions.get_start_time(),
                                                            computeOptions.get_end_time())

        if len(tiles) == 0:
            raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe")

        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))
        pool = ThreadPool(processes=maxprocesses)
        results = [pool.apply_async(longitude_time_hofmoeller_stats, args=(tile, x)) for x, tile in enumerate(tiles)]
        pool.close()
        pool.join()

        results = [p.get() for p in results]
        results = sorted(results, key=lambda entry: entry["time"])

        results = self.applyDeseasonToHofMoeller(results, pivot="lons")

        result = HoffMoellerResults(results=results, compute_options=computeOptions, type=HoffMoellerResults.LONGITUDE)
        return result
    def calc(self, computeOptions, **args):
        minLat = computeOptions.get_min_lat()
        maxLat = computeOptions.get_max_lat()
        minLon = computeOptions.get_min_lon()
        maxLon = computeOptions.get_max_lon()
        ds = computeOptions.get_dataset()
        startTime = computeOptions.get_start_time()
        endTime = computeOptions.get_end_time()
        resolution = computeOptions.get_decimal_arg("res", default=1.0)

        if not len(ds) == 2:
            raise Exception("Requires two datasets for comparison. Specify request parameter ds=Dataset_1,Dataset_2")

        ds1tiles = self._tile_service.find_tiles_in_polygon(box(minLon, minLat, maxLon, maxLat), ds[0], startTime,
                                                            endTime)
        ds2tiles = self._tile_service.find_tiles_in_polygon(box(minLon, minLat, maxLon, maxLat), ds[1], startTime,
                                                            endTime)

        matches = self._match_tiles(ds1tiles, ds2tiles)

        if len(matches) == 0:
            raise NexusProcessingException(reason="Could not find any data temporally co-located")

        results = [[{
            'cnt': 0,
            'slope': 0,
            'intercept': 0,
            'r': 0,
            'p': 0,
            'stderr': 0,
            'lat': float(lat),
            'lon': float(lon)
        } for lon in np.arange(minLon, maxLon, resolution)] for lat in
            np.arange(minLat, maxLat, resolution)]

        for stats in results:
            for stat in stats:
                values_x = []
                values_y = []
                for tile_matches in matches:

                    tile_1_list = tile_matches[0]
                    value_1 = get_approximate_value_for_lat_lon(tile_1_list, stat["lat"], stat["lon"])

                    tile_2_list = tile_matches[1]
                    value_2 = get_approximate_value_for_lat_lon(tile_2_list, stat["lat"], stat["lon"])

                    if not (math.isnan(value_1) or math.isnan(value_2)):
                        values_x.append(value_1)
                        values_y.append(value_2)

                if len(values_x) > 2 and len(values_y) > 2:
                    stats = linregress(values_x, values_y)

                    stat["slope"] = stats[0] if not math.isnan(stats[0]) and not math.isinf(stats[0]) else str(stats[0])
                    stat["intercept"] = stats[1] if not math.isnan(stats[1]) and not math.isinf(stats[1]) else str(
                        stats[1])
                    stat["r"] = stats[2] if not math.isnan(stats[2]) and not math.isinf(stats[2]) else str(stats[2])
                    stat["p"] = stats[3] if not math.isnan(stats[3]) and not math.isinf(stats[3]) else str(stats[3])
                    stat["stderr"] = stats[4] if not math.isnan(stats[4]) and not math.isinf(stats[4]) else str(
                        stats[4])
                    stat["cnt"] = len(values_x)

        return CorrelationResults(results)
Example #21
0
    def calc(self, computeOptions, **args):

        self._setQueryParams(computeOptions.get_dataset(),
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time())

        print 'ds = ',self._ds
        if not len(self._ds) == 2:
            raise Exception("Requires two datasets for comparison. Specify request parameter ds=Dataset_1,Dataset_2")

        self._find_native_resolution()
        print 'Using Native resolution: lat_res=%f, lon_res=%f' % (self._latRes, self._lonRes)
        self._minLatCent = self._minLat + self._latRes / 2
        self._minLonCent = self._minLon + self._lonRes / 2
        nlats = int((self._maxLat-self._minLatCent)/self._latRes)+1
        nlons = int((self._maxLon-self._minLonCent)/self._lonRes)+1
        self._maxLatCent = self._minLatCent + (nlats-1) * self._latRes
        self._maxLonCent = self._minLonCent + (nlons-1) * self._lonRes
        print 'nlats=',nlats,'nlons=',nlons
        sys.stdout.flush()

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NexusProcessingException.NoDataException(reason="No data found for selected timeframe")

        print 'Initially found %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        self._prune_tiles(nexus_tiles)
        print 'Pruned to %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        # Create array of tuples to pass to Spark map function
        nexus_tile_specs = [[self._find_tile_bounds(t), 
                             self._startTime, self._endTime, 
                             self._ds] for t in nexus_tiles]

        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tile_specs])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tile_specs[i]

        # Configure Spark
        sp_conf = SparkConf()
        sp_conf.setAppName("Spark Correlation Map")
        sp_conf.set("spark.executorEnv.HOME",
                    os.path.join(os.getenv('HOME'), 'spark_exec_home'))
        sp_conf.set("spark.executorEnv.PYTHONPATH", os.getcwd())
        sp_conf.set("spark.executor.memoryOverhead", "4g")

        #num_parts = 1
        num_parts = 16
        #num_parts = 64
        #num_parts = 128
        #num_execs = 1
        #num_execs = 8
        num_execs = 16
        #num_execs = 64
        cores_per_exec = 1
        sp_conf.setMaster("yarn-client")
        #sp_conf.setMaster("local[16]")
        #sp_conf.setMaster("local[1]")
        sp_conf.set("spark.executor.instances", num_execs)
        sp_conf.set("spark.executor.cores", cores_per_exec)

        #print sp_conf.getAll()
        sc = SparkContext(conf=sp_conf)
        
        # Launch Spark computations
        rdd = sc.parallelize(nexus_tile_specs,num_parts)
        corr_tiles = rdd.map(self._map).collect()

        r = np.zeros((nlats, nlons),dtype=np.float64,order='C')

        # The tiles below are NOT Nexus objects.  They are tuples
        # with the correlation map subset lat-lon bounding box.
        for tile in corr_tiles:
            (tile_stats, tile_min_lat, tile_max_lat, 
             tile_min_lon, tile_max_lon) = tile
            tile_data = np.ma.array([[tile_stats[y][x]['r'] for x in range(len(tile_stats[0]))] for y in range(len(tile_stats))])
            tile_cnt = np.array([[tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))] for y in range(len(tile_stats))])
            tile_data.mask = ~(tile_cnt.astype(bool))
            y0 = self._lat2ind(tile_min_lat)
            y1 = self._lat2ind(tile_max_lat)
            x0 = self._lon2ind(tile_min_lon)
            x1 = self._lon2ind(tile_max_lon)
            if np.any(np.logical_not(tile_data.mask)):
                print 'writing tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                    (tile_min_lat, tile_max_lat, 
                     tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                sys.stdout.flush()
                r[y0:y1+1,x0:x1+1] = tile_data
            else:
                print 'All pixels masked in tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                    (tile_min_lat, tile_max_lat, 
                     tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                sys.stdout.flush()
                    
        # Store global map in a NetCDF file.
        self._create_nc_file(r, 'corrmap.nc', 'r')

        return [[]], None, None
Example #22
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")
        try:
            bounding_polygon = request.get_bounding_polygon()
        except:
            raise NexusProcessingException(
                reason=
                "'b' argument is required. Must be comma-delimited float formatted as Minimum (Western) Longitude, Minimum (Southern) Latitude, Maximum (Eastern) Longitude, Maximum (Northern) Latitude",
                code=400)
        primary_ds_name = request.get_argument('primary', None)
        if primary_ds_name is None:
            raise NexusProcessingException(
                reason="'primary' argument is required", code=400)
        matchup_ds_names = request.get_argument('matchup', None)
        if matchup_ds_names is None:
            raise NexusProcessingException(
                reason="'matchup' argument is required", code=400)

        parameter_s = request.get_argument('parameter', 'sst')
        if parameter_s not in ['sst', 'sss', 'wind']:
            raise NexusProcessingException(
                reason=
                "Parameter %s not supported. Must be one of 'sst', 'sss', 'wind'."
                % parameter_s,
                code=400)

        try:
            start_time = request.get_start_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'startTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'endTime' argument is required. Can be int value seconds from epoch or string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        if start_time > end_time:
            raise NexusProcessingException(
                reason=
                "The starting time must be before the ending time. Received startTime: %s, endTime: %s"
                % (request.get_start_datetime().strftime(ISO_8601),
                   request.get_end_datetime().strftime(ISO_8601)),
                code=400)

        depth_min = request.get_decimal_arg('depthMin', default=None)
        depth_max = request.get_decimal_arg('depthMax', default=None)

        if depth_min is not None and depth_max is not None and depth_min >= depth_max:
            raise NexusProcessingException(
                reason="Depth Min should be less than Depth Max", code=400)

        time_tolerance = request.get_int_arg('tt', default=86400)
        radius_tolerance = request.get_decimal_arg('rt', default=1000.0)
        platforms = request.get_argument('platforms', None)
        if platforms is None:
            raise NexusProcessingException(
                reason="'platforms' argument is required", code=400)
        try:
            p_validation = platforms.split(',')
            p_validation = [int(p) for p in p_validation]
            del p_validation
        except:
            raise NexusProcessingException(
                reason="platforms must be a comma-delimited list of integers",
                code=400)

        match_once = request.get_boolean_arg("matchOnce", default=False)

        result_size_limit = request.get_int_arg("resultSizeLimit", default=500)

        start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
        end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())

        return bounding_polygon, primary_ds_name, matchup_ds_names, parameter_s, \
               start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
               depth_min, depth_max, time_tolerance, radius_tolerance, \
               platforms, match_once, result_size_limit
Example #23
0
    def parse_arguments(self, request):
        # Parse input arguments
        self.log.debug("Parsing arguments")

        try:
            ds = request.get_dataset()
            if type(ds) == list or type(ds) == tuple:
                ds = next(iter(ds))
        except:
            raise NexusProcessingException(
                reason="'ds' argument is required. Must be a string", code=400)

        # Do not allow time series on Climatology
        if next(iter([clim for clim in ds if 'CLIM' in clim]), False):
            raise NexusProcessingException(
                reason=
                "Cannot compute Latitude/Longitude Time Average plot on a climatology",
                code=400)

        try:
            bounding_polygon = request.get_bounding_polygon()
            request.get_min_lon = lambda: bounding_polygon.bounds[0]
            request.get_min_lat = lambda: bounding_polygon.bounds[1]
            request.get_max_lon = lambda: bounding_polygon.bounds[2]
            request.get_max_lat = lambda: bounding_polygon.bounds[3]
        except:
            try:
                west, south, east, north = request.get_min_lon(), request.get_min_lat(), \
                    request.get_max_lon(), request.get_max_lat()
                bounding_polygon = shapely.geometry.Polygon([(west, south),
                                                             (east, south),
                                                             (east, north),
                                                             (west, north),
                                                             (west, south)])
            except:
                raise NexusProcessingException(
                    reason=
                    "'b' argument is required. Must be comma-delimited float formatted as "
                    "Minimum (Western) Longitude, Minimum (Southern) Latitude, "
                    "Maximum (Eastern) Longitude, Maximum (Northern) Latitude",
                    code=400)

        try:
            start_time = request.get_start_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'startTime' argument is required. Can be int value seconds from epoch or "
                "string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)
        try:
            end_time = request.get_end_datetime()
        except:
            raise NexusProcessingException(
                reason=
                "'endTime' argument is required. Can be int value seconds from epoch or "
                "string format YYYY-MM-DDTHH:mm:ssZ",
                code=400)

        if start_time > end_time:
            raise NexusProcessingException(
                reason=
                "The starting time must be before the ending time. Received startTime: %s, endTime: %s"
                % (request.get_start_datetime().strftime(ISO_8601),
                   request.get_end_datetime().strftime(ISO_8601)),
                code=400)

        start_seconds_from_epoch = int((start_time - EPOCH).total_seconds())
        end_seconds_from_epoch = int((end_time - EPOCH).total_seconds())
        normalize_dates = request.get_normalize_dates()

        return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, normalize_dates
Example #24
0
    def calc(self, request, **args):
        start = datetime.utcnow()
        # TODO Assuming Satellite primary
        bounding_polygon, primary_ds_name, matchup_ds_names, parameter_s, \
        start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
        depth_min, depth_max, time_tolerance, radius_tolerance, \
        platforms, match_once, result_size_limit = self.parse_arguments(request)

        with ResultsStorage() as resultsStorage:

            execution_id = str(
                resultsStorage.insertExecution(None, start, None, None))

        self.log.debug("Querying for tiles in search domain")
        # Get tile ids in box
        tile_ids = [
            tile.tile_id for tile in self._tile_service.find_tiles_in_polygon(
                bounding_polygon,
                primary_ds_name,
                start_seconds_from_epoch,
                end_seconds_from_epoch,
                fetch_data=False,
                fl='id',
                sort=[
                    'tile_min_time_dt asc', 'tile_min_lon asc',
                    'tile_min_lat asc'
                ],
                rows=5000)
        ]

        # Call spark_matchup
        self.log.debug("Calling Spark Driver")
        try:
            spark_result = spark_matchup_driver(tile_ids,
                                                wkt.dumps(bounding_polygon),
                                                primary_ds_name,
                                                matchup_ds_names,
                                                parameter_s,
                                                depth_min,
                                                depth_max,
                                                time_tolerance,
                                                radius_tolerance,
                                                platforms,
                                                match_once,
                                                sc=self._sc)
        except Exception as e:
            self.log.exception(e)
            raise NexusProcessingException(
                reason="An unknown error occurred while computing matches",
                code=500)

        end = datetime.utcnow()

        self.log.debug("Building and saving results")
        args = {
            "primary": primary_ds_name,
            "matchup": matchup_ds_names,
            "startTime": start_time,
            "endTime": end_time,
            "bbox": request.get_argument('b'),
            "timeTolerance": time_tolerance,
            "radiusTolerance": float(radius_tolerance),
            "platforms": platforms,
            "parameter": parameter_s
        }

        if depth_min is not None:
            args["depthMin"] = float(depth_min)

        if depth_max is not None:
            args["depthMax"] = float(depth_max)

        total_keys = len(spark_result.keys())
        total_values = sum(len(v) for v in spark_result.itervalues())
        details = {
            "timeToComplete": int((end - start).total_seconds()),
            "numInSituRecords": 0,
            "numInSituMatched": total_values,
            "numGriddedChecked": 0,
            "numGriddedMatched": total_keys
        }

        matches = Matchup.convert_to_matches(spark_result)

        def do_result_insert():
            with ResultsStorage() as storage:
                storage.insertResults(results=matches,
                                      params=args,
                                      stats=details,
                                      startTime=start,
                                      completeTime=end,
                                      userEmail="",
                                      execution_id=execution_id)

        threading.Thread(target=do_result_insert).start()

        if 0 < result_size_limit < len(matches):
            result = DomsQueryResults(results=None,
                                      args=args,
                                      details=details,
                                      bounds=None,
                                      count=None,
                                      computeOptions=None,
                                      executionId=execution_id,
                                      status_code=202)
        else:
            result = DomsQueryResults(results=matches,
                                      args=args,
                                      details=details,
                                      bounds=None,
                                      count=None,
                                      computeOptions=None,
                                      executionId=execution_id)

        return result
Example #25
0
    def getTimeSeriesStatsForBoxSingleDataSet(self, bounding_polygon, ds, start_seconds_from_epoch,
                                              end_seconds_from_epoch,
                                              apply_seasonal_cycle_filter=True, apply_low_pass_filter=True):

        the_time = datetime.now()
        daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                bounding_polygon.bounds[3],
                                                                bounding_polygon.bounds[0],
                                                                bounding_polygon.bounds[2],
                                                                ds,
                                                                start_seconds_from_epoch,
                                                                end_seconds_from_epoch)
        logger.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if len(daysinrange) == 0:
            raise NoDataException(reason="No data found for selected timeframe")

        the_time = datetime.now()
        maxprocesses = int(self.algorithm_config.get("multiprocessing", "maxprocesses"))

        results = []
        if maxprocesses == 1:
            calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(bounding_polygon.wkt, ds, dayinseconds)
                results += [result] if result else []
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', bounding_polygon.wkt, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in range(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in range(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in range(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    logger.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results += [result] if result else []

            pool.terminate()
            manager.shutdown()

        results = sorted(results, key=lambda entry: entry["time"])
        logger.info("Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        if apply_seasonal_cycle_filter:
            the_time = datetime.now()
            for result in results:
                month = datetime.utcfromtimestamp(result['time']).month
                month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt, ds)
                seasonal_mean = result['mean'] - month_mean
                seasonal_min = result['min'] - month_min
                seasonal_max = result['max'] - month_max
                result['meanSeasonal'] = seasonal_mean
                result['minSeasonal'] = seasonal_min
                result['maxSeasonal'] = seasonal_max
            logger.info(
                "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        the_time = datetime.now()
        filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'max', applySeasonal=False, applyLowPass=apply_low_pass_filter)
        filtering.applyAllFiltersOnField(results, 'min', applySeasonal=False, applyLowPass=apply_low_pass_filter)

        if apply_seasonal_cycle_filter and apply_low_pass_filter:
            try:
                filtering.applyFiltersOnField(results, 'meanSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'minSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
                filtering.applyFiltersOnField(results, 'maxSeasonal', applySeasonal=False, applyLowPass=True,
                                              append="LowPass")
            except Exception as e:
                # If it doesn't work log the error but ignore it
                tb = traceback.format_exc()
                logger.warn("Error calculating SeasonalLowPass filter:\n%s" % tb)

        logger.info(
            "LowPass filter calculation took %s for dataset %s" % (str(datetime.now() - the_time), ds))

        return results, {}
    def calc(self, request, **args):
        """

        :param request: StatsComputeOptions
        :param args: dict
        :return:
        """
        start_time = datetime.now()
        ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested, normalize_dates = self.parse_arguments(
            request)
        metrics_record = self._create_metrics_record()

        resultsRaw = []

        for shortName in ds:

            the_time = datetime.now()
            daysinrange = self._get_tile_service().find_days_in_range_asc(
                bounding_polygon.bounds[1],
                bounding_polygon.bounds[3],
                bounding_polygon.bounds[0],
                bounding_polygon.bounds[2],
                shortName,
                start_seconds_from_epoch,
                end_seconds_from_epoch,
                metrics_callback=metrics_record.record_metrics)
            self.log.info("Finding days in range took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            ndays = len(daysinrange)
            if ndays == 0:
                raise NoDataException(
                    reason="No data found for selected timeframe")

            self.log.debug('Found {0} days in range'.format(ndays))
            for i, d in enumerate(daysinrange):
                self.log.debug('{0}, {1}'.format(i,
                                                 datetime.utcfromtimestamp(d)))
            spark_nparts = self._spark_nparts(nparts_requested)
            self.log.info('Using {} partitions'.format(spark_nparts))
            results, meta = spark_driver(daysinrange,
                                         bounding_polygon,
                                         shortName,
                                         self._tile_service_factory,
                                         metrics_record.record_metrics,
                                         normalize_dates,
                                         spark_nparts=spark_nparts,
                                         sc=self._sc)

            if apply_seasonal_cycle_filter:
                the_time = datetime.now()
                # get time series for _clim dataset
                shortName_clim = shortName + "_clim"
                daysinrange_clim = self._get_tile_service(
                ).find_days_in_range_asc(
                    bounding_polygon.bounds[1],
                    bounding_polygon.bounds[3],
                    bounding_polygon.bounds[0],
                    bounding_polygon.bounds[2],
                    shortName_clim,
                    0,
                    SECONDS_IN_ONE_YEAR,
                    metrics_callback=metrics_record.record_metrics)
                if len(daysinrange_clim) == 0:
                    raise NexusProcessingException(
                        reason=
                        "There is no climatology data present for dataset " +
                        shortName + ".")
                results_clim, _ = spark_driver(daysinrange_clim,
                                               bounding_polygon,
                                               shortName_clim,
                                               self._tile_service_factory,
                                               metrics_record.record_metrics,
                                               normalize_dates=False,
                                               spark_nparts=spark_nparts,
                                               sc=self._sc)
                clim_indexed_by_month = {
                    datetime.utcfromtimestamp(result['time']).month: result
                    for result in results_clim
                }
                if len(clim_indexed_by_month) < 12:
                    raise NexusProcessingException(
                        reason="There are only " + len(clim_indexed_by_month) +
                        " months of climatology data for dataset " +
                        shortName +
                        ". A full year of climatology data is required for computing deseasoned timeseries."
                    )

                for result in results:
                    month = datetime.utcfromtimestamp(result['time']).month

                    result['meanSeasonal'] = result[
                        'mean'] - clim_indexed_by_month[month]['mean']
                    result['minSeasonal'] = result[
                        'min'] - clim_indexed_by_month[month]['min']
                    result['maxSeasonal'] = result[
                        'max'] - clim_indexed_by_month[month]['max']
                self.log.info("Seasonal calculation took %s for dataset %s" %
                              (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            filtering.applyAllFiltersOnField(
                results,
                'mean',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'max',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)
            filtering.applyAllFiltersOnField(
                results,
                'min',
                applySeasonal=False,
                applyLowPass=apply_low_pass_filter)

            if apply_seasonal_cycle_filter and apply_low_pass_filter:
                try:
                    filtering.applyFiltersOnField(results,
                                                  'meanSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'minSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                    filtering.applyFiltersOnField(results,
                                                  'maxSeasonal',
                                                  applySeasonal=False,
                                                  applyLowPass=True,
                                                  append="LowPass")
                except Exception as e:
                    # If it doesn't work log the error but ignore it
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error calculating SeasonalLowPass filter:\n%s" % tb)

            resultsRaw.append([results, meta])
            self.log.info("LowPass filter calculation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

            the_time = datetime.now()
            self._create_nc_file_time1d(np.array(results),
                                        'ts.nc',
                                        'mean',
                                        fill=-9999.)
            self.log.info("NetCDF generation took %s for dataset %s" %
                          (str(datetime.now() - the_time), shortName))

        the_time = datetime.now()
        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = TimeSeriesSparkHandlerImpl.calculate_comparison_stats(
                    results)
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=None,
                                minLat=bounding_polygon.bounds[1],
                                maxLat=bounding_polygon.bounds[3],
                                minLon=bounding_polygon.bounds[0],
                                maxLon=bounding_polygon.bounds[2],
                                ds=ds,
                                startTime=start_seconds_from_epoch,
                                endTime=end_seconds_from_epoch)

        total_duration = (datetime.now() - start_time).total_seconds()
        metrics_record.record_metrics(actual_time=total_duration)
        metrics_record.print_metrics(logger)

        self.log.info("Merging results and calculating comparisons took %s" %
                      (str(datetime.now() - the_time)))
        return res
Example #27
0
    def calc(self, computeOptions, **args):

        spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg(
        )
        self._setQueryParams(computeOptions.get_dataset(),
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time(),
                             spark_master=spark_master,
                             spark_nexecs=spark_nexecs,
                             spark_nparts=spark_nparts)

        self.log.debug('ds = {0}'.format(self._ds))
        if not len(self._ds) == 2:
            raise NexusProcessingException(
                reason=
                "Requires two datasets for comparison. Specify request parameter ds=Dataset_1,Dataset_2",
                code=400)
        if next(iter([clim for clim in self._ds if 'CLIM' in clim]), False):
            raise NexusProcessingException(
                reason="Cannot compute correlation on a climatology", code=400)

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))
        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self.log.debug('nlats={0}, nlons={1}'.format(nlats, nlons))

        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]

        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        num_time_parts = 72
        # num_time_parts = 2
        # num_time_parts = 1
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(
            len(nexus_tiles_spark)))

        # Set the time boundaries for each of the Spark map tuples.
        # Every Nth element in the array gets the same time bounds.
        spark_part_times = np.linspace(self._startTime,
                                       self._endTime + 1,
                                       num_time_parts + 1,
                                       dtype=np.int64)

        spark_part_time_ranges = \
            np.repeat([[[spark_part_times[i],
                         spark_part_times[i + 1] - 1] for i in range(num_time_parts)]],
                      len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
        self.log.debug(
            'spark_part_time_ranges={0}'.format(spark_part_time_ranges))
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
        # print 'nexus_tiles_spark final = '
        # for i in range(len(nexus_tiles_spark)):
        #    print nexus_tiles_spark[i]

        # Launch Spark computations
        # print 'nexus_tiles_spark=',nexus_tiles_spark
        rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
        sum_tiles_part = rdd.map(self._map)
        # print "sum_tiles_part = ",sum_tiles_part.collect()
        sum_tiles = \
            sum_tiles_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1],
                                                        x[2] + val[2],
                                                        x[3] + val[3],
                                                        x[4] + val[4],
                                                        x[5] + val[5]),
                                        lambda x, y: (x[0] + y[0],
                                                      x[1] + y[1],
                                                      x[2] + y[2],
                                                      x[3] + y[3],
                                                      x[4] + y[4],
                                                      x[5] + y[5]))
        # Convert the N (pixel-wise count) array for each tile to be a
        # NumPy masked array.  That is the last array in the tuple of
        # intermediate summation arrays.  Set mask to True if count is 0.
        sum_tiles = \
            sum_tiles.map(lambda (bounds, (sum_x, sum_y, sum_xx,
            sum_yy, sum_xy, n)):
                          (bounds, (sum_x, sum_y, sum_xx, sum_yy, sum_xy,
                                    np.ma.array(n,
                                                mask=~(n.astype(bool))))))

        # print 'sum_tiles = ',sum_tiles.collect()

        # For each pixel in each tile compute an array of Pearson
        # correlation coefficients.  The map function is called once
        # per tile.  The result of this map operation is a list of 3-tuples of
        # (bounds, r, n) for each tile (r=Pearson correlation coefficient
        # and n=number of input values that went into each pixel with
        # any masked values not included).
        corr_tiles = \
            sum_tiles.map(lambda (bounds, (sum_x, sum_y, sum_xx, sum_yy,
            sum_xy, n)):
                          (bounds,
                           np.ma.array(((sum_xy - sum_x * sum_y / n) /
                                        np.sqrt((sum_xx - sum_x * sum_x / n) *
                                                (sum_yy - sum_y * sum_y / n))),
                                       mask=~(n.astype(bool))),
                           n)).collect()

        r = np.zeros((nlats, nlons), dtype=np.float64, order='C')
        n = np.zeros((nlats, nlons), dtype=np.uint32, order='C')

        # The tiles below are NOT Nexus objects.  They are tuples
        # with the following for each correlation map subset:
        # (1) lat-lon bounding box, (2) array of correlation r values,
        # and (3) array of count n values.
        for tile in corr_tiles:
            ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
             tile_data, tile_cnt) = tile
            y0 = self._lat2ind(tile_min_lat)
            y1 = self._lat2ind(tile_max_lat)
            x0 = self._lon2ind(tile_min_lon)
            x1 = self._lon2ind(tile_max_lon)
            self.log.debug(
                'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon,
                        y0, y1, x0, x1))
            r[y0:y1 + 1, x0:x1 + 1] = tile_data
            n[y0:y1 + 1, x0:x1 + 1] = tile_cnt

        # Store global map in a NetCDF file.
        self._create_nc_file(r, 'corrmap.nc', 'r')

        # Create dict for JSON response
        results = [[{
            'r': r[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(r.shape[1])] for y in range(r.shape[0])]

        return CorrelationResults(results)
Example #28
0
    def calc(self, computeOptions, **args):
        """

        :param computeOptions: StatsComputeOptions
        :param args: dict
        :return:
        """

        self._setQueryParams(computeOptions.get_dataset()[0],
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time())

        self._find_native_resolution()
        print 'Using Native resolution: lat_res=%f, lon_res=%f' % (
            self._latRes, self._lonRes)
        self._minLatCent = self._minLat + self._latRes / 2
        self._minLonCent = self._minLon + self._lonRes / 2
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self._maxLatCent = self._minLatCent + (nlats - 1) * self._latRes
        self._maxLonCent = self._minLonCent + (nlons - 1) * self._lonRes
        print 'nlats=', nlats, 'nlons=', nlons
        print 'center lat range = %f to %f' % (self._minLatCent,
                                               self._maxLatCent)
        print 'center lon range = %f to %f' % (self._minLonCent,
                                               self._maxLonCent)
        sys.stdout.flush()
        a = np.zeros((nlats, nlons), dtype=np.float64, order='C')
        n = np.zeros((nlats, nlons), dtype=np.float64, order='C')

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NexusProcessingException.NoDataException(
                reason="No data found for selected timeframe")

        print 'Initially found %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        self._prune_tiles(nexus_tiles)
        print 'Pruned to %d tiles' % len(nexus_tiles)
        sys.stdout.flush()
        #for tile in nexus_tiles:
        #    print 'lats: ', tile.latitudes.compressed()
        #    print 'lons: ', tile.longitudes.compressed()
        # Create array of tuples to pass to Spark map function
        cwd = os.getcwd()
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime,
            self._ds, cwd
        ] for t in nexus_tiles]
        #print 'nexus_tiles_spark = ', nexus_tiles_spark
        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        num_time_parts = 18
        #nexus_tiles_spark = list(itertools.chain.from_iterable(itertools.repeat(t, num_time_parts) for t in nexus_tiles_spark))
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        print 'repeated len(nexus_tiles_spark) = ', len(nexus_tiles_spark)

        # Set the time boundaries for each of the Spark map tuples.
        # Every Nth element in the array gets the same time bounds.
        spark_part_times = np.linspace(self._startTime,
                                       self._endTime,
                                       num_time_parts + 1,
                                       dtype=np.int64)

        spark_part_time_ranges = \
            np.repeat([[[spark_part_times[i],
                         spark_part_times[i+1]] for i in range(num_time_parts)]],
                      len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
        print 'spark_part_time_ranges=', spark_part_time_ranges
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
        print 'nexus_tiles_spark final = '
        for i in range(len(nexus_tiles_spark)):
            print nexus_tiles_spark[i]

        # Configure Spark
        sp_conf = SparkConf()
        sp_conf.setAppName("Spark Time Avg Map")
        sp_conf.set("spark.executorEnv.HOME",
                    os.path.join(os.getenv('HOME'), 'spark_exec_home'))
        sp_conf.set("spark.executorEnv.PYTHONPATH", cwd)
        #sp_conf.set("spark.yarn.executor.memoryOverhead", "4000")
        sp_conf.set("spark.executor.memory", "4g")

        #num_parts = 1
        num_parts = 16
        #num_parts = 64
        #num_parts = 128
        #num_execs = 1
        num_execs = 16
        #num_execs = 64
        cores_per_exec = 1
        sp_conf.setMaster("yarn-client")
        #sp_conf.setMaster("local[16]")
        #sp_conf.setMaster("local[1]")
        sp_conf.set("spark.executor.instances", num_execs)
        sp_conf.set("spark.executor.cores", cores_per_exec)

        #print sp_conf.getAll()
        sc = SparkContext(conf=sp_conf)

        # Launch Spark computations
        rdd = sc.parallelize(nexus_tiles_spark, num_parts)
        sum_count_part = rdd.map(self._map)
        sum_count = \
            sum_count_part.combineByKey(lambda val: val,
                                        lambda x,val: (x[0]+val[0],
                                                       x[1]+val[1]),
                                        lambda x,y: (x[0]+y[0], x[1]+y[1]))
        avg_tiles = \
            sum_count.map(lambda (bounds, (sum_tile, cnt_tile)):
                              (bounds, [[{'avg': (sum_tile[y,x]/cnt_tile[y,x])
                                          if (cnt_tile[y,x] > 0) else 0.,
                                          'cnt': cnt_tile[y,x]}
                                         for x in
                                         range(sum_tile.shape[1])]
                                        for y in
                                        range(sum_tile.shape[0])])).collect()

        #avg_tiles = map(self._map, nexus_tiles)

        # Combine subset results to produce global map.
        #
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        for tile in avg_tiles:
            if tile is not None:
                ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
                 tile_stats) = tile
                tile_data = np.ma.array([[
                    tile_stats[y][x]['avg'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_cnt = np.array([[
                    tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_data.mask = ~(tile_cnt.astype(bool))
                y0 = self._lat2ind(tile_min_lat)
                y1 = y0 + tile_data.shape[0] - 1
                x0 = self._lon2ind(tile_min_lon)
                x1 = x0 + tile_data.shape[1] - 1
                if np.any(np.logical_not(tile_data.mask)):
                    print 'writing tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                        (tile_min_lat, tile_max_lat,
                         tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                    sys.stdout.flush()
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                    n[y0:y1 + 1, x0:x1 + 1] = tile_cnt
                else:
                    print 'All pixels masked in tile lat %f-%f, lon %f-%f, map y %d-%d, map x %d-%d' % \
                        (tile_min_lat, tile_max_lat,
                         tile_min_lon, tile_max_lon, y0, y1, x0, x1)
                    sys.stdout.flush()

        # Store global map in a NetCDF file.
        self._create_nc_file(a, 'tam.nc', 'val')

        # Create dict for JSON response
        results = [[{
            'avg': a[x, y],
            'cnt': n[x, y]
        } for x in range(a.shape[0])] for y in range(a.shape[1])]
        return TimeAvgMapSparkResults(results=results,
                                      meta={},
                                      computeOptions=computeOptions)
Example #29
0
    def calc(self, computeOptions, **args):
        """

        :param computeOptions: StatsComputeOptions
        :param args: dict
        :return:
        """

        spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg(
        )
        self._setQueryParams(computeOptions.get_dataset()[0],
                             (float(computeOptions.get_min_lat()),
                              float(computeOptions.get_max_lat()),
                              float(computeOptions.get_min_lon()),
                              float(computeOptions.get_max_lon())),
                             computeOptions.get_start_time(),
                             computeOptions.get_end_time(),
                             spark_master=spark_master,
                             spark_nexecs=spark_nexecs,
                             spark_nparts=spark_nparts)

        if 'CLIM' in self._ds:
            raise NexusProcessingException(
                reason=
                "Cannot compute Latitude/Longitude Time Average plot on a climatology",
                code=400)

        nexus_tiles = self._find_global_tile_set()
        # print 'tiles:'
        # for tile in nexus_tiles:
        #     print tile.granule
        #     print tile.section_spec
        #     print 'lat:', tile.latitudes
        #     print 'lon:', tile.longitudes

        #                                                          nexus_tiles)
        if len(nexus_tiles) == 0:
            raise NoDataException(
                reason="No data found for selected timeframe")

        self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))

        self.log.debug(
            'Using Native resolution: lat_res={0}, lon_res={1}'.format(
                self._latRes, self._lonRes))
        nlats = int((self._maxLat - self._minLatCent) / self._latRes) + 1
        nlons = int((self._maxLon - self._minLonCent) / self._lonRes) + 1
        self.log.debug('nlats={0}, nlons={1}'.format(nlats, nlons))
        self.log.debug('center lat range = {0} to {1}'.format(
            self._minLatCent, self._maxLatCent))
        self.log.debug('center lon range = {0} to {1}'.format(
            self._minLonCent, self._maxLonCent))

        # for tile in nexus_tiles:
        #    print 'lats: ', tile.latitudes.compressed()
        #    print 'lons: ', tile.longitudes.compressed()
        # Create array of tuples to pass to Spark map function
        nexus_tiles_spark = [[
            self._find_tile_bounds(t), self._startTime, self._endTime, self._ds
        ] for t in nexus_tiles]
        # print 'nexus_tiles_spark = ', nexus_tiles_spark
        # Remove empty tiles (should have bounds set to None)
        bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
        for i in np.flipud(bad_tile_inds):
            del nexus_tiles_spark[i]

        # Expand Spark map tuple array by duplicating each entry N times,
        # where N is the number of ways we want the time dimension carved up.
        num_time_parts = 72
        # num_time_parts = 1
        nexus_tiles_spark = np.repeat(nexus_tiles_spark,
                                      num_time_parts,
                                      axis=0)
        self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(
            len(nexus_tiles_spark)))

        # Set the time boundaries for each of the Spark map tuples.
        # Every Nth element in the array gets the same time bounds.
        spark_part_times = np.linspace(self._startTime,
                                       self._endTime,
                                       num_time_parts + 1,
                                       dtype=np.int64)

        spark_part_time_ranges = \
            np.repeat([[[spark_part_times[i],
                         spark_part_times[i + 1]] for i in range(num_time_parts)]],
                      len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
        self.log.debug(
            'spark_part_time_ranges={0}'.format(spark_part_time_ranges))
        nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
        # print 'nexus_tiles_spark final = '
        # for i in range(len(nexus_tiles_spark)):
        #    print nexus_tiles_spark[i]

        # Launch Spark computations
        rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
        sum_count_part = rdd.map(self._map)
        sum_count = \
            sum_count_part.combineByKey(lambda val: val,
                                        lambda x, val: (x[0] + val[0],
                                                        x[1] + val[1]),
                                        lambda x, y: (x[0] + y[0], x[1] + y[1]))
        fill = self._fill
        avg_tiles = \
            sum_count.map(lambda (bounds, (sum_tile, cnt_tile)):
                          (bounds, [[{'avg': (sum_tile[y, x] / cnt_tile[y, x])
                          if (cnt_tile[y, x] > 0)
                          else fill,
                                      'cnt': cnt_tile[y, x]}
                                     for x in
                                     range(sum_tile.shape[1])]
                                    for y in
                                    range(sum_tile.shape[0])])).collect()

        # Combine subset results to produce global map.
        #
        # The tiles below are NOT Nexus objects.  They are tuples
        # with the time avg map data and lat-lon bounding box.
        a = np.zeros((nlats, nlons), dtype=np.float64, order='C')
        n = np.zeros((nlats, nlons), dtype=np.uint32, order='C')
        for tile in avg_tiles:
            if tile is not None:
                ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon),
                 tile_stats) = tile
                tile_data = np.ma.array([[
                    tile_stats[y][x]['avg'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_cnt = np.array([[
                    tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))
                ] for y in range(len(tile_stats))])
                tile_data.mask = ~(tile_cnt.astype(bool))
                y0 = self._lat2ind(tile_min_lat)
                y1 = y0 + tile_data.shape[0] - 1
                x0 = self._lon2ind(tile_min_lon)
                x1 = x0 + tile_data.shape[1] - 1
                if np.any(np.logical_not(tile_data.mask)):
                    self.log.debug(
                        'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))
                    a[y0:y1 + 1, x0:x1 + 1] = tile_data
                    n[y0:y1 + 1, x0:x1 + 1] = tile_cnt
                else:
                    self.log.debug(
                        'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'
                        .format(tile_min_lat, tile_max_lat, tile_min_lon,
                                tile_max_lon, y0, y1, x0, x1))

        # Store global map in a NetCDF file.
        self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill)

        # Create dict for JSON response
        results = [[{
            'avg': a[y, x],
            'cnt': int(n[y, x]),
            'lat': self._ind2lat(y),
            'lon': self._ind2lon(x)
        } for x in range(a.shape[1])] for y in range(a.shape[0])]

        return TimeAvgMapSparkResults(results=results,
                                      meta={},
                                      computeOptions=computeOptions)
Example #30
0
    def calc(self, computeOptions, **args):
        """

        :param computeOptions: StatsComputeOptions
        :param args: dict
        :return:
        """

        ds = computeOptions.get_dataset()

        if type(ds) != list and type(ds) != tuple:
            ds = (ds, )

        if next(iter([clim for clim in ds if 'CLIM' in clim]), False):
            raise NexusProcessingException(
                reason="Cannot compute time series on a climatology", code=400)

        resultsRaw = []

        spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg(
        )
        for shortName in ds:
            results, meta = self.getTimeSeriesStatsForBoxSingleDataSet(
                computeOptions.get_min_lat(),
                computeOptions.get_max_lat(),
                computeOptions.get_min_lon(),
                computeOptions.get_max_lon(),
                shortName,
                computeOptions.get_start_time(),
                computeOptions.get_end_time(),
                computeOptions.get_apply_seasonal_cycle_filter(),
                computeOptions.get_apply_low_pass_filter(),
                spark_master=spark_master,
                spark_nexecs=spark_nexecs,
                spark_nparts=spark_nparts)
            resultsRaw.append([results, meta])

        results = self._mergeResults(resultsRaw)

        if len(ds) == 2:
            try:
                stats = self.calculateComparisonStats(results, suffix="")
            except Exception:
                stats = {}
                tb = traceback.format_exc()
                self.log.warn("Error when calculating comparison stats:\n%s" %
                              tb)
            if computeOptions.get_apply_seasonal_cycle_filter():
                try:
                    s = self.calculateComparisonStats(results,
                                                      suffix="Seasonal")
                    stats = self._mergeDicts(stats, s)
                except Exception:
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error when calculating Seasonal comparison stats:\n%s"
                        % tb)
            if computeOptions.get_apply_low_pass_filter():
                try:
                    s = self.calculateComparisonStats(results,
                                                      suffix="LowPass")
                    stats = self._mergeDicts(stats, s)
                except Exception:
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error when calculating LowPass comparison stats:\n%s"
                        % tb)
            if computeOptions.get_apply_seasonal_cycle_filter(
            ) and computeOptions.get_apply_low_pass_filter():
                try:
                    s = self.calculateComparisonStats(results,
                                                      suffix="SeasonalLowPass")
                    stats = self._mergeDicts(stats, s)
                except Exception:
                    tb = traceback.format_exc()
                    self.log.warn(
                        "Error when calculating SeasonalLowPass comparison stats:\n%s"
                        % tb)
        else:
            stats = {}

        meta = []
        for singleRes in resultsRaw:
            meta.append(singleRes[1])

        res = TimeSeriesResults(results=results,
                                meta=meta,
                                stats=stats,
                                computeOptions=computeOptions)
        return res