def calc(self, request, **args): min_lat, max_lat, min_lon, max_lon = request.get_min_lat( ), request.get_max_lat(), request.get_min_lon(), request.get_max_lon() dataset1 = request.get_argument("ds1", None) dataset2 = request.get_argument("ds2", None) start_time = request.get_start_time() end_time = request.get_end_time() simple = request.get_argument("simple", None) is not None averagebyday = self.get_daily_difference_average_for_box( min_lat, max_lat, min_lon, max_lon, dataset1, dataset2, start_time, end_time) averagebyday = sorted(averagebyday, key=lambda dayavg: dayavg[0]) if simple: import matplotlib.pyplot as plt from matplotlib.dates import date2num times = [ date2num(self.date_from_ms(dayavg[0])) for dayavg in averagebyday ] means = [dayavg[1] for dayavg in averagebyday] plt.plot_date(times, means, ls='solid') plt.xlabel('Date') plt.xticks(rotation=70) plt.ylabel(u'Difference from 5-Day mean (\u00B0C)') plt.title('Sea Surface Temperature (SST) Anomalies') plt.grid(True) plt.tight_layout() plt.savefig("test.png") return averagebyday, None, None else: result = NexusResults(results=[[{ 'time': dayms, 'mean': avg, 'ds': 0 }] for dayms, avg in averagebyday], stats={}, meta=self.get_meta()) result.extendMeta(min_lat, max_lat, min_lon, max_lon, "", start_time, end_time) result.meta()['label'] = u'Difference from 5-Day mean (\u00B0C)' return result
def calc(self, computeOptions, **args): minLat = computeOptions.get_min_lat() maxLat = computeOptions.get_max_lat() minLon = computeOptions.get_min_lon() maxLon = computeOptions.get_max_lon() ds = computeOptions.get_dataset()[0] startTime = computeOptions.get_start_time() endTime = computeOptions.get_end_time() res = self._tile_service.find_tiles_in_box(minLat, maxLat, minLon, maxLon, ds, startTime, endTime, fetch_data=False) res = NexusResults(results=res) res.extendMeta(minLat, maxLat, minLon, maxLon, ds, startTime, endTime) return res
def calc(self, compute_options, **args): min_lat = compute_options.get_min_lat() max_lat = compute_options.get_max_lat() min_lon = compute_options.get_min_lon() max_lon = compute_options.get_max_lon() ds = compute_options.get_dataset()[0] start_time = compute_options.get_start_time() end_time = compute_options.get_end_time() includemeta = compute_options.get_include_meta() tiles = self._tile_service.get_tiles_bounded_by_box( min_lat, max_lat, min_lon, max_lon, ds, start_time, end_time) data = [] for tile in tiles: for nexus_point in tile.nexus_point_generator(): data.append({ 'latitude': nexus_point.latitude, 'longitude': nexus_point.longitude, 'time': nexus_point.time, 'data': [{ 'id': tile.tile_id, 'value': nexus_point.data_val }] }) if includemeta and len(tiles) > 0: meta = [tile.get_summary() for tile in tiles] else: meta = None result = NexusResults(results=data, stats={}, meta=meta) result.extendMeta(min_lat, max_lat, min_lon, max_lon, "", start_time, end_time) return result
def calc(self, computeOptions, **args): minLat = computeOptions.get_min_lat() maxLat = computeOptions.get_max_lat() minLon = computeOptions.get_min_lon() maxLon = computeOptions.get_max_lon() ds = computeOptions.get_dataset()[0] startTime = computeOptions.get_start_time() endTime = computeOptions.get_end_time() # TODO update to expect tile objects back res = [ tile.get_summary() for tile in self._tile_service.find_tiles_in_box(minLat, maxLat, minLon, maxLon, ds, startTime, endTime, fetch_data=False) ] res = NexusResults(results=res) res.extendMeta(minLat, maxLat, minLon, maxLon, ds, startTime, endTime) return res
def calc(self, compute_options, **args): """ :param compute_options: StatsComputeOptions :param args: dict :return: """ request_start_time = datetime.now() metrics_record = self._create_metrics_record() ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments( compute_options) self._setQueryParams(ds, (float(bbox.bounds[1]), float( bbox.bounds[3]), float(bbox.bounds[0]), float(bbox.bounds[2])), start_time, end_time) nexus_tiles = self._find_global_tile_set( metrics_callback=metrics_record.record_metrics) if len(nexus_tiles) == 0: raise NoDataException( reason="No data found for selected timeframe") self.log.debug('Found {0} tiles'.format(len(nexus_tiles))) print('Found {} tiles'.format(len(nexus_tiles))) daysinrange = self._get_tile_service().find_days_in_range_asc( bbox.bounds[1], bbox.bounds[3], bbox.bounds[0], bbox.bounds[2], ds, start_time, end_time, metrics_callback=metrics_record.record_metrics) ndays = len(daysinrange) if ndays == 0: raise NoDataException( reason="No data found for selected timeframe") self.log.debug('Found {0} days in range'.format(ndays)) for i, d in enumerate(daysinrange): self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d))) self.log.debug( 'Using Native resolution: lat_res={0}, lon_res={1}'.format( self._latRes, self._lonRes)) self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons)) self.log.debug('center lat range = {0} to {1}'.format( self._minLatCent, self._maxLatCent)) self.log.debug('center lon range = {0} to {1}'.format( self._minLonCent, self._maxLonCent)) # Create array of tuples to pass to Spark map function nexus_tiles_spark = [[ self._find_tile_bounds(t), self._startTime, self._endTime, self._ds ] for t in nexus_tiles] # Remove empty tiles (should have bounds set to None) bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0] for i in np.flipud(bad_tile_inds): del nexus_tiles_spark[i] # Expand Spark map tuple array by duplicating each entry N times, # where N is the number of ways we want the time dimension carved up. # Set the time boundaries for each of the Spark map tuples so that # every Nth element in the array gets the same time bounds. max_time_parts = 72 num_time_parts = min(max_time_parts, ndays) spark_part_time_ranges = np.tile( np.array([ a[[0, -1]] for a in np.array_split(np.array(daysinrange), num_time_parts) ]), (len(nexus_tiles_spark), 1)) nexus_tiles_spark = np.repeat(nexus_tiles_spark, num_time_parts, axis=0) nexus_tiles_spark[:, 1:3] = spark_part_time_ranges # Launch Spark computations spark_nparts = self._spark_nparts(nparts_requested) self.log.info('Using {} partitions'.format(spark_nparts)) rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts) metrics_record.record_metrics(partitions=rdd.getNumPartitions()) sum_count_part = rdd.map( partial(self._map, self._tile_service_factory, metrics_record.record_metrics)) reduce_duration = 0 reduce_start = datetime.now() sum_count = sum_count_part.combineByKey( lambda val: val, lambda x, val: (x[0] + val[0], x[1] + val[1]), lambda x, y: (x[0] + y[0], x[1] + y[1])) reduce_duration += (datetime.now() - reduce_start).total_seconds() avg_tiles = sum_count.map( partial(calculate_means, metrics_record.record_metrics, self._fill)).collect() reduce_start = datetime.now() # Combine subset results to produce global map. # # The tiles below are NOT Nexus objects. They are tuples # with the time avg map data and lat-lon bounding box. a = np.zeros((self._nlats, self._nlons), dtype=np.float64, order='C') n = np.zeros((self._nlats, self._nlons), dtype=np.uint32, order='C') for tile in avg_tiles: if tile is not None: ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon), tile_stats) = tile tile_data = np.ma.array([[ tile_stats[y][x]['avg'] for x in range(len(tile_stats[0])) ] for y in range(len(tile_stats))]) tile_cnt = np.array([[ tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0])) ] for y in range(len(tile_stats))]) tile_data.mask = ~(tile_cnt.astype(bool)) y0 = self._lat2ind(tile_min_lat) y1 = y0 + tile_data.shape[0] - 1 x0 = self._lon2ind(tile_min_lon) x1 = x0 + tile_data.shape[1] - 1 if np.any(np.logical_not(tile_data.mask)): self.log.debug( 'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}' .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) a[y0:y1 + 1, x0:x1 + 1] = tile_data n[y0:y1 + 1, x0:x1 + 1] = tile_cnt else: self.log.debug( 'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}' .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) # Store global map in a NetCDF file for debugging purpose # if activated this line is not thread safe and might cause error when concurrent access occurs # self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill) # Create dict for JSON response results = [[{ 'mean': a[y, x], 'cnt': int(n[y, x]), 'lat': self._ind2lat(y), 'lon': self._ind2lon(x) } for x in range(a.shape[1])] for y in range(a.shape[0])] total_duration = (datetime.now() - request_start_time).total_seconds() metrics_record.record_metrics(actual_time=total_duration, reduce=reduce_duration) metrics_record.print_metrics(self.log) return NexusResults(results=results, meta={}, stats=None, computeOptions=None, minLat=bbox.bounds[1], maxLat=bbox.bounds[3], minLon=bbox.bounds[0], maxLon=bbox.bounds[2], ds=ds, startTime=start_time, endTime=end_time)
def calc(self, compute_options, **args): """ :param compute_options: StatsComputeOptions :param args: dict :return: """ ds, bbox, start_time, end_time, spark_master, spark_nexecs, spark_nparts = self.parse_arguments(compute_options) compute_options.get_spark_cfg() self._setQueryParams(ds, (float(bbox.bounds[1]), float(bbox.bounds[3]), float(bbox.bounds[0]), float(bbox.bounds[2])), start_time, end_time, spark_master=spark_master, spark_nexecs=spark_nexecs, spark_nparts=spark_nparts) nexus_tiles = self._find_global_tile_set() if len(nexus_tiles) == 0: raise NoDataException(reason="No data found for selected timeframe") self.log.debug('Found {0} tiles'.format(len(nexus_tiles))) self.log.debug('Using Native resolution: lat_res={0}, lon_res={1}'.format(self._latRes, self._lonRes)) self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons)) self.log.debug('center lat range = {0} to {1}'.format(self._minLatCent, self._maxLatCent)) self.log.debug('center lon range = {0} to {1}'.format(self._minLonCent, self._maxLonCent)) # Create array of tuples to pass to Spark map function nexus_tiles_spark = [[self._find_tile_bounds(t), self._startTime, self._endTime, self._ds] for t in nexus_tiles] # Remove empty tiles (should have bounds set to None) bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0] for i in np.flipud(bad_tile_inds): del nexus_tiles_spark[i] # Expand Spark map tuple array by duplicating each entry N times, # where N is the number of ways we want the time dimension carved up. num_time_parts = 72 nexus_tiles_spark = np.repeat(nexus_tiles_spark, num_time_parts, axis=0) self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(len(nexus_tiles_spark))) # Set the time boundaries for each of the Spark map tuples. # Every Nth element in the array gets the same time bounds. spark_part_times = np.linspace(self._startTime, self._endTime, num_time_parts + 1, dtype=np.int64) spark_part_time_ranges = \ np.repeat([[[spark_part_times[i], spark_part_times[i + 1]] for i in range(num_time_parts)]], len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2)) self.log.debug('spark_part_time_ranges={0}'.format(spark_part_time_ranges)) nexus_tiles_spark[:, 1:3] = spark_part_time_ranges # Launch Spark computations rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts) sum_count_part = rdd.map(self._map) sum_count = \ sum_count_part.combineByKey(lambda val: val, lambda x, val: (x[0] + val[0], x[1] + val[1]), lambda x, y: (x[0] + y[0], x[1] + y[1])) fill = self._fill avg_tiles = \ sum_count.map(lambda (bounds, (sum_tile, cnt_tile)): (bounds, [[{'avg': (sum_tile[y, x] / cnt_tile[y, x]) if (cnt_tile[y, x] > 0) else fill, 'cnt': cnt_tile[y, x]} for x in range(sum_tile.shape[1])] for y in range(sum_tile.shape[0])])).collect() # Combine subset results to produce global map. # # The tiles below are NOT Nexus objects. They are tuples # with the time avg map data and lat-lon bounding box. a = np.zeros((self._nlats, self._nlons), dtype=np.float64, order='C') n = np.zeros((self._nlats, self._nlons), dtype=np.uint32, order='C') for tile in avg_tiles: if tile is not None: ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon), tile_stats) = tile tile_data = np.ma.array( [[tile_stats[y][x]['avg'] for x in range(len(tile_stats[0]))] for y in range(len(tile_stats))]) tile_cnt = np.array( [[tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0]))] for y in range(len(tile_stats))]) tile_data.mask = ~(tile_cnt.astype(bool)) y0 = self._lat2ind(tile_min_lat) y1 = y0 + tile_data.shape[0] - 1 x0 = self._lon2ind(tile_min_lon) x1 = x0 + tile_data.shape[1] - 1 if np.any(np.logical_not(tile_data.mask)): self.log.debug( 'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'.format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) a[y0:y1 + 1, x0:x1 + 1] = tile_data n[y0:y1 + 1, x0:x1 + 1] = tile_cnt else: self.log.debug( 'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}'.format( tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) # Store global map in a NetCDF file. self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill) # Create dict for JSON response results = [[{'mean': a[y, x], 'cnt': int(n[y, x]), 'lat': self._ind2lat(y), 'lon': self._ind2lon(x)} for x in range(a.shape[1])] for y in range(a.shape[0])] return NexusResults(results=results, meta={}, stats=None, computeOptions=None, minLat=bbox.bounds[1], maxLat=bbox.bounds[3], minLon=bbox.bounds[0], maxLon=bbox.bounds[2], ds=ds, startTime=start_time, endTime=end_time)
def calc(self, compute_options, **args): """ :param compute_options: StatsComputeOptions :param args: dict :return: """ ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments( compute_options) self._setQueryParams(ds, (float(bbox.bounds[1]), float( bbox.bounds[3]), float(bbox.bounds[0]), float(bbox.bounds[2])), start_time, end_time) nexus_tiles = self._find_global_tile_set() if len(nexus_tiles) == 0: raise NoDataException( reason="No data found for selected timeframe") self.log.debug('Found {0} tiles'.format(len(nexus_tiles))) print('Found {} tiles'.format(len(nexus_tiles))) daysinrange = self._tile_service.find_days_in_range_asc( bbox.bounds[1], bbox.bounds[3], bbox.bounds[0], bbox.bounds[2], ds, start_time, end_time) ndays = len(daysinrange) if ndays == 0: raise NoDataException( reason="No data found for selected timeframe") self.log.debug('Found {0} days in range'.format(ndays)) for i, d in enumerate(daysinrange): self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d))) self.log.debug( 'Using Native resolution: lat_res={0}, lon_res={1}'.format( self._latRes, self._lonRes)) self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons)) self.log.debug('center lat range = {0} to {1}'.format( self._minLatCent, self._maxLatCent)) self.log.debug('center lon range = {0} to {1}'.format( self._minLonCent, self._maxLonCent)) # Create array of tuples to pass to Spark map function nexus_tiles_spark = [[ self._find_tile_bounds(t), self._startTime, self._endTime, self._ds ] for t in nexus_tiles] # Remove empty tiles (should have bounds set to None) bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0] for i in np.flipud(bad_tile_inds): del nexus_tiles_spark[i] # Expand Spark map tuple array by duplicating each entry N times, # where N is the number of ways we want the time dimension carved up. # Set the time boundaries for each of the Spark map tuples so that # every Nth element in the array gets the same time bounds. max_time_parts = 72 num_time_parts = min(max_time_parts, ndays) spark_part_time_ranges = np.tile( np.array([ a[[0, -1]] for a in np.array_split(np.array(daysinrange), num_time_parts) ]), (len(nexus_tiles_spark), 1)) nexus_tiles_spark = np.repeat(nexus_tiles_spark, num_time_parts, axis=0) nexus_tiles_spark[:, 1:3] = spark_part_time_ranges # Launch Spark computations to calculate x_bar spark_nparts = self._spark_nparts(nparts_requested) self.log.info('Using {} partitions'.format(spark_nparts)) rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts) sum_count_part = rdd.map(self._map) sum_count = \ sum_count_part.combineByKey(lambda val: val, lambda x, val: (x[0] + val[0], x[1] + val[1]), lambda x, y: (x[0] + y[0], x[1] + y[1])) fill = self._fill avg_tiles = \ sum_count.map(lambda (bounds, (sum_tile, cnt_tile)): (bounds, [[(sum_tile[y, x] / cnt_tile[y, x]) if (cnt_tile[y, x] > 0) else fill for x in range(sum_tile.shape[1])] for y in range(sum_tile.shape[0])])).collect() # # Launch a second parallel computation to calculate variance from x_bar # # Create array of tuples to pass to Spark map function - first param are the tile bounds that were in the # results and the last param is the data for the results (x bar) nexus_tiles_spark = [[ t[0], self._startTime, self._endTime, self._ds, t[1] ] for t in avg_tiles] self.log.info('Using {} partitions'.format(spark_nparts)) rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts) anomaly_squared_part = rdd.map(self._calc_variance) anomaly_squared = \ anomaly_squared_part.combineByKey(lambda val: val, lambda x, val: (x[0] + val[0], x[1] + val[1]), lambda x, y: (x[0] + y[0], x[1] + y[1])) variance_tiles = \ anomaly_squared.map(lambda (bounds, (anomaly_squared_tile, cnt_tile)): (bounds, [[{'variance': (anomaly_squared_tile[y, x] / cnt_tile[y, x]) if (cnt_tile[y, x] > 0) else fill, 'cnt': cnt_tile[y, x]} for x in range(anomaly_squared_tile.shape[1])] for y in range(anomaly_squared_tile.shape[0])])).collect() # Combine subset results to produce global map. # # The tiles below are NOT Nexus objects. They are tuples # with the time avg map data and lat-lon bounding box. a = np.zeros((self._nlats, self._nlons), dtype=np.float64, order='C') n = np.zeros((self._nlats, self._nlons), dtype=np.uint32, order='C') for tile in variance_tiles: if tile is not None: ((tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon), tile_stats) = tile tile_data = np.ma.array([[ tile_stats[y][x]['variance'] for x in range(len(tile_stats[0])) ] for y in range(len(tile_stats))]) tile_cnt = np.array([[ tile_stats[y][x]['cnt'] for x in range(len(tile_stats[0])) ] for y in range(len(tile_stats))]) tile_data.mask = ~(tile_cnt.astype(bool)) y0 = self._lat2ind(tile_min_lat) y1 = y0 + tile_data.shape[0] - 1 x0 = self._lon2ind(tile_min_lon) x1 = x0 + tile_data.shape[1] - 1 if np.any(np.logical_not(tile_data.mask)): self.log.debug( 'writing tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}' .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) a[y0:y1 + 1, x0:x1 + 1] = tile_data n[y0:y1 + 1, x0:x1 + 1] = tile_cnt else: self.log.debug( 'All pixels masked in tile lat {0}-{1}, lon {2}-{3}, map y {4}-{5}, map x {6}-{7}' .format(tile_min_lat, tile_max_lat, tile_min_lon, tile_max_lon, y0, y1, x0, x1)) # Store global map in a NetCDF file. self._create_nc_file(a, 'tam.nc', 'val', fill=self._fill) # Create dict for JSON response results = [[{ 'variance': a[y, x], 'cnt': int(n[y, x]), 'lat': self._ind2lat(y), 'lon': self._ind2lon(x) } for x in range(a.shape[1])] for y in range(a.shape[0])] return NexusResults(results=results, meta={}, stats=None, computeOptions=None, minLat=bbox.bounds[1], maxLat=bbox.bounds[3], minLon=bbox.bounds[0], maxLon=bbox.bounds[2], ds=ds, startTime=start_time, endTime=end_time)