def latitude_time_hofmoeller_stats(tile_in_spark): (tile_id, index, min_lat, max_lat, min_lon, max_lon) = tile_in_spark tile_service = NexusTileService() try: # Load the dataset tile tile = tile_service.find_tile_by_id(tile_id)[0] # Mask it to the search domain tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, [tile])[0] except IndexError: return None stat = {'sequence': index, 'time': np.ma.min(tile.times), 'lats': []} points = list(tile.nexus_point_generator()) data = sorted(points, key=lambda p: p.latitude) points_by_lat = itertools.groupby(data, key=lambda p: p.latitude) for lat, points_at_lat in points_by_lat: values_at_lat = np.array( [point.data_val for point in points_at_lat]) stat['lats'].append({ 'latitude': float(lat), 'cnt': len(values_at_lat), 'avg': np.mean(values_at_lat).item(), 'max': np.max(values_at_lat).item(), 'min': np.min(values_at_lat).item(), 'std': np.std(values_at_lat).item() }) return stat
def _map(tile_in_spark): tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) days_at_a_time = 30 t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 return (min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile)
def hofmoeller_stats(tile_in_spark): (latlon, tile_id, index, min_lat, max_lat, min_lon, max_lon) = tile_in_spark tile_service = NexusTileService() try: # Load the dataset tile tile = tile_service.find_tile_by_id(tile_id)[0] # Mask it to the search domain tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, [tile])[0] except IndexError: # return None return [] t = np.ma.min(tile.times) stats = [] points = list(tile.nexus_point_generator()) if latlon == 0: # Latitude-Time Map (Average over longitudes) data = sorted(points, key=lambda p: p.latitude) points_by_coord = itertools.groupby(data, key=lambda p: p.latitude) else: # Longitude-Time Map (Average over latitudes) data = sorted(points, key=lambda p: p.longitude) points_by_coord = itertools.groupby(data, key=lambda p: p.longitude) for coord, points_at_coord in points_by_coord: values_at_coord = np.array([[p.data_val, np.cos(np.radians(p.latitude))] for p in points_at_coord]) vals = np.nan_to_num(values_at_coord[:, 0]) weights = values_at_coord[:, 1] coord_cnt = len(values_at_coord) if latlon == 0: # Latitude-Time Map (Average over longitudes) # In this case there is no weighting by cos(lat) weighted_sum = np.sum(vals).item() sum_of_weights = coord_cnt else: # Longitude-Time Map (Average over latitudes) # In this case we need to weight by cos(lat) weighted_sum = np.dot(vals, weights) sum_of_weights = np.sum(weights).item() stats.append(((t, float(coord)), (t, index, float(coord), coord_cnt, weighted_sum, sum_of_weights, np.max(vals).item(), np.min(vals).item(), np.var(vals).item()))) return stats
def _calc_variance(tile_in_spark): # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] x_bar = tile_in_spark[4] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # hardcorded - limiting the amount of nexus tiles pulled at a time days_at_a_time = 30 t_incr = 86400 * days_at_a_time data_anomaly_squared_tile = np.array( np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) x_bar = np.asarray(x_bar) x_bar[:, :] = np.nan_to_num(x_bar) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: # Taking the data, converted masked nans to 0 tile.data.data[:, :] = np.nan_to_num(tile.data.data) # subtract x_bar from each value, then square it data_anomaly_tile = tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] - x_bar data_anomaly_squared_tile += data_anomaly_tile * data_anomaly_tile # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 return (min_lat, max_lat, min_lon, max_lon), (data_anomaly_squared_tile, cnt_tile)
class TimeSeriesCalculator(object): def __init__(self): self.__tile_service = NexusTileService() def calc_average_on_day(self, min_lat, max_lat, min_lon, max_lon, dataset, timeinseconds): # Get stats using solr only ds1_nexus_tiles_stats = self.__tile_service.get_stats_within_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset, timeinseconds) data_min_within = min( [tile["tile_min_val_d"] for tile in ds1_nexus_tiles_stats]) data_max_within = max( [tile["tile_max_val_d"] for tile in ds1_nexus_tiles_stats]) data_sum_within = sum([ tile["product(tile_avg_val_d, tile_count_i)"] for tile in ds1_nexus_tiles_stats ]) data_count_within = sum( [tile["tile_count_i"] for tile in ds1_nexus_tiles_stats]) # Get boundary tiles and calculate stats ds1_nexus_tiles = self.__tile_service.get_boundary_tiles_at_time( min_lat, max_lat, min_lon, max_lon, dataset, timeinseconds) tile_data_agg = np.ma.array([tile.data for tile in ds1_nexus_tiles]) data_min_boundary = np.ma.min(tile_data_agg) data_max_boundary = np.ma.max(tile_data_agg) # daily_mean = np.ma.mean(tile_data_agg).item() data_sum_boundary = np.ma.sum(tile_data_agg) data_count_boundary = np.ma.count(tile_data_agg).item() # data_std = np.ma.std(tile_data_agg) # Combine stats data_min = min(data_min_within, data_min_boundary) data_max = max(data_max_within, data_max_boundary) data_count = data_count_within + data_count_boundary daily_mean = (data_sum_within + data_sum_boundary) / data_count data_std = 0 # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } return stat
def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=32769 [solr] host=localhost:8986 core=nexustiles""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp)
class TestLongitudeLatitudeMap(unittest.TestCase): def setUp(self): self.tile_service = NexusTileService() def test_lin_reg(self): LongitudeLatitudeMap.tile_service = self.tile_service print next( LongitudeLatitudeMap.regression_on_tiles( (175.01, -42.68, 180.0, -40.2), box(-180, -90, 180, 90).wkt, 1, time.time(), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1")) def test_lat_lon_map_driver_mur(self): # LongitudeLatitudeMap.tile_service = self.tile_service print next( iter( LongitudeLatitudeMap.lat_lon_map_driver( box(-180, -90, 180, 90), 1, time.time(), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", [(175.01, -42.68, 180.0, -40.2)]))) def test_lat_lon_map_driver_ecco(self): bounding = box(-148, 38, -129, 53) ds = "MXLDEPTH_ECCO_version4_release1" start_seconds_from_epoch = 1 end_seconds_from_epoch = time.time() boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon( bounding, ds, start_seconds_from_epoch, end_seconds_from_epoch) print LongitudeLatitudeMap.LongitudeLatitudeMapHandlerImpl.results_to_dicts( LongitudeLatitudeMap.lat_lon_map_driver( bounding, start_seconds_from_epoch, end_seconds_from_epoch, ds, [a_box.bounds for a_box in boxes]))
class TimeSeriesCalculator(object): def __init__(self): self.__tile_service = NexusTileService() def calc_average_on_day(self, min_lat, max_lat, min_lon, max_lon, dataset, timeinseconds): ds1_nexus_tiles = self.__tile_service.get_tiles_bounded_by_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset, timeinseconds) tile_data_agg = np.ma.array([tile.data for tile in ds1_nexus_tiles]) data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = np.ma.mean(tile_data_agg).item() data_count = np.ma.count(tile_data_agg) try: data_count = data_count.item() except AttributeError: pass data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } return stat
class TimeSeriesCalculator(object): def __init__(self): self.__tile_service = NexusTileService() def calc_average_on_day(self, bounding_polygon_wkt, dataset, timeinseconds): bounding_polygon = shapely.wkt.loads(bounding_polygon_wkt) ds1_nexus_tiles = self.__tile_service.get_tiles_bounded_by_polygon_at_time(bounding_polygon, dataset, timeinseconds) # If all data ends up getting masked, ds1_nexus_tiles will be empty if len(ds1_nexus_tiles) == 0: return {} tile_data_agg = np.ma.array([tile.data for tile in ds1_nexus_tiles]) data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = np.ma.mean(tile_data_agg).item() data_count = np.ma.count(tile_data_agg) try: data_count = data_count.item() except AttributeError: pass data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } return stat
def _map(tile_in_spark): tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] cwd = tile_in_spark[4] os.chdir(cwd) tile_service = NexusTileService() print 'Started tile', tile_bounds sys.stdout.flush() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) days_at_a_time = 90 #days_at_a_time = 7 #days_at_a_time = 1 print 'days_at_a_time = ', days_at_a_time t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) t1 = time() print 'nexus call start at time %f' % t1 sys.stdout.flush() nexus_tiles = \ TimeAvgMapSparkHandlerImpl.query_by_parts(tile_service, min_lat, max_lat, min_lon, max_lon, ds, t_start, t_end, part_dim=2) t2 = time() print 'nexus call end at time %f' % t2 print 'secs in nexus call: ', t2 - t1 sys.stdout.flush() TimeAvgMapSparkHandlerImpl._prune_tiles(nexus_tiles) print 't %d to %d - Got %d tiles' % (t_start, t_end, len(nexus_tiles)) sys.stdout.flush() for tile in nexus_tiles: tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 #print 'cnt_tile = ', cnt_tile #cnt_tile.mask = ~(cnt_tile.data.astype(bool)) #sum_tile.mask = cnt_tile.mask #avg_tile = sum_tile / cnt_tile #stats_tile = [[{'avg': avg_tile.data[y,x], 'cnt': cnt_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])] print 'Finished tile', tile_bounds #print 'Tile avg = ', avg_tile sys.stdout.flush() return ((min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile))
class TestService(unittest.TestCase): def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=32769 [solr] host=localhost:8986 core=nexustiles""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp) def test_get_distinct_bounding_boxes_in_polygon(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "MXLDEPTH_ECCO_version4_release1", 1, time.time()) for b in boxes: print b.bounds def test_get_distinct_bounding_boxes_in_polygon_mur(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for b in boxes: print b.bounds def test_find_tiles_by_exact_bounds(self): tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.get_summary() def test_sorted_box(self): tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.min_time
def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=9042 [solr] host=http://localhost:8983 core=nexustiles [datastore] store=cassandra""") cp = configparser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp)
def pool_initializer(): from nexustiles.nexustiles import NexusTileService global tile_service tile_service = NexusTileService() # TODO This is a hack to make sure each sub-process uses it's own connection to cassandra. data-access needs to be updated from cassandra.cqlengine import connection from multiprocessing import current_process connection.register_connection(current_process().name, [host.address for host in connection.get_session().hosts]) connection.set_default_connection(current_process().name)
class DailyDifferenceAverageCalculator(object): def __init__(self): self.__tile_service = NexusTileService() def calc_average_diff_on_day(self, min_lat, max_lat, min_lon, max_lon, dataset1, dataset2, timeinseconds): day_of_year = datetime.fromtimestamp(timeinseconds, pytz.utc).timetuple().tm_yday ds1_nexus_tiles = self.__tile_service.find_all_tiles_in_box_at_time( min_lat, max_lat, min_lon, max_lon, dataset1, timeinseconds) # Initialize list of differences differences = [] # For each ds1tile for ds1_tile in ds1_nexus_tiles: # Get tile for ds2 using bbox from ds1_tile and day ms try: ds2_tile = self.__tile_service.find_tile_by_polygon_and_most_recent_day_of_year( box(ds1_tile.bbox.min_lon, ds1_tile.bbox.min_lat, ds1_tile.bbox.max_lon, ds1_tile.bbox.max_lat), dataset2, day_of_year)[0] # Subtract ds2 tile from ds1 tile diff = np.subtract(ds1_tile.data, ds2_tile.data) except NexusTileServiceException: # This happens when there is data in ds1tile but all NaNs in ds2tile because the # Solr query being used filters out results where stats_count = 0. # Technically, this should never happen if ds2 is a climatology generated in part from ds1 # and it is probably a data error # For now, just treat ds2 as an array of all masked data (which essentially discards the ds1 data) ds2_tile = np.ma.masked_all(ds1_tile.data.shape) diff = np.subtract(ds1_tile.data, ds2_tile) # Put results in list of differences differences.append(np.ma.array(diff).ravel()) # Average List of differences diffaverage = np.ma.mean(differences).item() # Return Average by day return int(timeinseconds), diffaverage
def _map(tile_in_spark): # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # hardcorded - limiting the amount of nexus tiles pulled at a time days_at_a_time = 30 t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: # Taking the data, converted masked nans to 0 tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 print("sum tile", sum_tile) print("count tile", cnt_tile) return tile_bounds, (sum_tile, cnt_tile)
def test_ascatb_match(self): from shapely.wkt import loads from nexustiles.nexustiles import NexusTileService polygon = loads( "POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))" ) primary_ds = "ASCATB-L2-Coastal" matchup_ds = "spurs" parameter = "wind" start_time = 1351468800 # 2012-10-29T00:00:00Z end_time = 1351555200 # 2012-10-30T00:00:00Z time_tolerance = 86400 depth_tolerance = 5.0 radius_tolerance = 110000.0 # 110 km platforms = "1,2,3,4,5,6,7,8,9" tile_service = NexusTileService() tile_ids = [ tile.tile_id for tile in tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, fl='id') ] result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, depth_tolerance, radius_tolerance, platforms) for k, v in result.iteritems(): print "primary: %s\n\tmatches:\n\t\t%s" % ( "lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % (k.longitude, k.latitude, k.time, k.wind_u, k.wind_v), '\n\t\t'.join([ "lon: %s, lat: %s, time: %s, wind u,v: %s,%s" % (i.longitude, i.latitude, i.time, i.wind_u, i.wind_v) for i in v ]))
def lat_lon_map_driver(search_bounding_polygon, search_start, search_end, ds, distinct_boxes): from functools import partial from nexustiles.nexustiles import NexusTileService # Start new processes to handle the work # pool = Pool(5, pool_initializer) func = partial(regression_on_tiles, search_bounding_polygon_wkt=search_bounding_polygon.wkt, search_start=search_start, search_end=search_end, ds=ds) global tile_service tile_service = NexusTileService() map_result = map(func, distinct_boxes) return [item for sublist in map_result for item in sublist]
class TestService(unittest.TestCase): def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=9042 [solr] host=http://localhost:8983 core=nexustiles [datastore] store=cassandra""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp) def test_get_distinct_bounding_boxes_in_polygon(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "MXLDEPTH_ECCO_version4_release1", 1, time.time()) for b in boxes: print b.bounds def test_get_distinct_bounding_boxes_in_polygon_mur(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for b in boxes: print b.bounds def test_find_tiles_by_exact_bounds(self): tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.get_summary() def test_sorted_box(self): tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.min_time def test_time_series_tile(self): tiles = self.tile_service.find_tile_by_id("055c0b51-d0fb-3f39-b48a-4f762bf0c994") for tile in tiles: print tile.get_summary() def test_get_tiles_by_metadata(self): tiles = self.tile_service.get_tiles_by_metadata(['id:60758e00-5721-3a6e-bf57-78448bb0aeeb'], "MUR-JPL-L4-GLOB-v4.1", 1514764800, 1514764800) for tile in tiles: print tile.get_summary()
def test_smap_match(self): from shapely.wkt import loads from nexustiles.nexustiles import NexusTileService polygon = loads( "POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))" ) primary_ds = "SMAP_L2B_SSS" matchup_ds = "spurs" parameter = "sss" start_time = 1350259200 # 2012-10-15T00:00:00Z end_time = 1350345600 # 2012-10-16T00:00:00Z time_tolerance = 86400 depth_tolerance = 5.0 radius_tolerance = 1500.0 platforms = "1,2,3,4,5,6,7,8,9" tile_service = NexusTileService() tile_ids = [ tile.tile_id for tile in tile_service.find_tiles_in_polygon(polygon, primary_ds, start_time, end_time, fetch_data=False, fl='id') ] result = spark_matchup_driver(tile_ids, wkt.dumps(polygon), primary_ds, matchup_ds, parameter, time_tolerance, depth_tolerance, radius_tolerance, platforms) for k, v in result.iteritems(): print "primary: %s\n\tmatches:\n\t\t%s" % ( "lon: %s, lat: %s, time: %s, sst: %s" % (k.longitude, k.latitude, k.time, k.sst), '\n\t\t'.join([ "lon: %s, lat: %s, time: %s, sst: %s" % (i.longitude, i.latitude, i.time, i.sst) for i in v ]))
def calculate_diff(tile_ids, bounding_wkt, dataset, climatology): from itertools import chain # Construct a list of generators that yield (day, sum, count, variance) diff_generators = [] tile_ids = list(tile_ids) if len(tile_ids) == 0: return [] tile_service = NexusTileService() for tile_id in tile_ids: # Get the dataset tile try: dataset_tile = get_dataset_tile(tile_service, wkt.loads(bounding_wkt.value), tile_id) except NoDatasetTile: # This should only happen if all measurements in a tile become masked after applying the bounding polygon continue tile_day_of_year = dataset_tile.min_time.timetuple().tm_yday # Get the climatology tile try: climatology_tile = get_climatology_tile(tile_service, wkt.loads(bounding_wkt.value), box(dataset_tile.bbox.min_lon, dataset_tile.bbox.min_lat, dataset_tile.bbox.max_lon, dataset_tile.bbox.max_lat), climatology.value, tile_day_of_year) except NoClimatologyTile: continue diff_generators.append(generate_diff(dataset_tile, climatology_tile)) return chain(*diff_generators)
""" Copyright (c) 2016 Jet Propulsion Laboratory, California Institute of Technology. All rights reserved """ import pyximport pyximport.install() from nexustiles.nexustiles import NexusTileService from nexustiles.model.nexusmodel import get_approximate_value_for_lat_lon import numpy as np service = NexusTileService() assert service is not None # tiles = service.find_tiles_in_box(-90, 90, -180, 180, ds='AVHRR_OI_L4_GHRSST_NCEI') # # print '\n'.join([str(tile.data.shape) for tile in tiles]) #ASCATB # tiles = service.find_tile_by_id('43c63dce-1f6e-3c09-a7b2-e0efeb3a72f2') #MUR # tiles = service.find_tile_by_id('d9b5afe3-bd7f-3824-ad8a-d8d3b364689c') #SMAP # tiles = service.find_tile_by_id('7eee40ef-4c6e-32d8-9a67-c83d4183f724') # tile = tiles[0] # # print get_approximate_value_for_lat_lon([tile], np.min(tile.latitudes), np.min(tile.longitudes) + .005) # print tile.latitudes # print tile.longitudes
def match_satellite_to_insitu(tile_ids, primary_b, matchup_b, parameter_b, tt_b, rt_b, platforms_b, bounding_wkt_b, depth_min_b, depth_max_b): the_time = datetime.now() tile_ids = list(tile_ids) if len(tile_ids) == 0: return [] tile_service = NexusTileService() # Determine the spatial temporal extents of this partition of tiles tiles_bbox = tile_service.get_bounding_box(tile_ids) tiles_min_time = tile_service.get_min_time(tile_ids) tiles_max_time = tile_service.get_max_time(tile_ids) # Increase spatial extents by the radius tolerance matchup_min_lon, matchup_min_lat = add_meters_to_lon_lat( tiles_bbox.bounds[0], tiles_bbox.bounds[1], -1 * rt_b.value) matchup_max_lon, matchup_max_lat = add_meters_to_lon_lat( tiles_bbox.bounds[2], tiles_bbox.bounds[3], rt_b.value) # Don't go outside of the search domain search_min_x, search_min_y, search_max_x, search_max_y = wkt.loads( bounding_wkt_b.value).bounds matchup_min_lon = max(matchup_min_lon, search_min_x) matchup_min_lat = max(matchup_min_lat, search_min_y) matchup_max_lon = min(matchup_max_lon, search_max_x) matchup_max_lat = min(matchup_max_lat, search_max_y) # Find the centroid of the matchup bounding box and initialize the projections matchup_center = box(matchup_min_lon, matchup_min_lat, matchup_max_lon, matchup_max_lat).centroid.coords[0] aeqd_proj = pyproj.Proj(proj='aeqd', lon_0=matchup_center[0], lat_0=matchup_center[1]) lonlat_proj = pyproj.Proj(proj='lonlat') # Increase temporal extents by the time tolerance matchup_min_time = tiles_min_time - tt_b.value matchup_max_time = tiles_max_time + tt_b.value print "%s Time to determine spatial-temporal extents for partition %s to %s" % ( str(datetime.now() - the_time), tile_ids[0], tile_ids[-1]) # Query edge for all points within the spatial-temporal extents of this partition the_time = datetime.now() edge_session = requests.Session() edge_results = [] with edge_session: for insitudata_name in matchup_b.value.split(','): bbox = ','.join([ str(matchup_min_lon), str(matchup_min_lat), str(matchup_max_lon), str(matchup_max_lat) ]) edge_response = query_edge(insitudata_name, parameter_b.value, matchup_min_time, matchup_max_time, bbox, platforms_b.value, depth_min_b.value, depth_max_b.value, session=edge_session) if edge_response['totalResults'] == 0: continue r = edge_response['results'] for p in r: p['source'] = insitudata_name edge_results.extend(r) print "%s Time to call edge for partition %s to %s" % ( str(datetime.now() - the_time), tile_ids[0], tile_ids[-1]) if len(edge_results) == 0: return [] # Convert edge points to utm the_time = datetime.now() matchup_points = np.ndarray((len(edge_results), 2), dtype=np.float32) for n, edge_point in enumerate(edge_results): try: x, y = wkt.loads(edge_point['point']).coords[0] except ReadingError: try: x, y = Point( *[float(c) for c in edge_point['point'].split(' ')]).coords[0] except ValueError: y, x = Point( *[float(c) for c in edge_point['point'].split(',')]).coords[0] matchup_points[n][0], matchup_points[n][1] = pyproj.transform( p1=lonlat_proj, p2=aeqd_proj, x=x, y=y) print "%s Time to convert match points for partition %s to %s" % ( str(datetime.now() - the_time), tile_ids[0], tile_ids[-1]) # Build kdtree from matchup points the_time = datetime.now() m_tree = spatial.cKDTree(matchup_points, leafsize=30) print "%s Time to build matchup tree" % (str(datetime.now() - the_time)) # The actual matching happens in the generator. This is so that we only load 1 tile into memory at a time match_generators = [ match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, bounding_wkt_b.value, parameter_b.value, rt_b.value, lonlat_proj, aeqd_proj) for tile_id in tile_ids ] return chain(*match_generators)
def calculate_monthly_average(month=None, bounding_polygon_wkt=None, ds=None): EPOCH = pytz.timezone('UTC').localize(datetime(1970, 1, 1)) tile_service = NexusTileService() min_date, max_date = get_min_max_date(tile_service, ds=ds) monthly_averages, monthly_counts = [], [] monthly_mins, monthly_maxes = [], [] bounding_polygon = shapely.wkt.loads(bounding_polygon_wkt) for year in range(min_date.year, max_date.year + 1): if (max_date.year - year) > 10: continue beginning_of_month = datetime(year, month, 1) end_of_month = datetime(year, month, calendar.monthrange(year, month)[1], 23, 59, 59) start = (pytz.UTC.localize(beginning_of_month) - EPOCH).total_seconds() end = (pytz.UTC.localize(end_of_month) - EPOCH).total_seconds() tile_stats = tile_service.find_tiles_in_polygon( bounding_polygon, ds, start, end, fl=('id,' 'tile_avg_val_d,tile_count_i,' 'tile_min_val_d,tile_max_val_d,' 'tile_min_lat,tile_max_lat,' 'tile_min_lon,tile_max_lon'), fetch_data=False) if len(tile_stats) == 0: continue print('calculate_monthly_average: Got {} tiles'.format( len(tile_stats))) # Split list into tiles on the border of the bounding box and tiles completely inside the bounding box. border_tiles, inner_tiles = [], [] for tile in tile_stats: inner_tiles.append(tile) if bounding_polygon.contains( shapely.geometry.box( tile.bbox.min_lon, tile.bbox.min_lat, tile.bbox.max_lon, tile.bbox.max_lat)) else border_tiles.append(tile) # We can use the stats of the inner tiles directly tile_means = [tile.tile_stats.mean for tile in inner_tiles] tile_mins = [tile.tile_stats.min for tile in inner_tiles] tile_maxes = [tile.tile_stats.max for tile in inner_tiles] tile_counts = [tile.tile_stats.count for tile in inner_tiles] # Border tiles need have the data loaded, masked, and stats recalculated border_tiles = list(tile_service.fetch_data_for_tiles(*border_tiles)) border_tiles = tile_service.mask_tiles_to_polygon( bounding_polygon, border_tiles) for tile in border_tiles: tile.update_stats() tile_means.append(tile.tile_stats.mean) tile_mins.append(tile.tile_stats.min) tile_maxes.append(tile.tile_stats.max) tile_counts.append(tile.tile_stats.count) tile_means = np.array(tile_means) tile_mins = np.array(tile_mins) tile_maxes = np.array(tile_maxes) tile_counts = np.array(tile_counts) sum_tile_counts = np.sum(tile_counts) * 1.0 monthly_averages += [ np.average(tile_means, None, tile_counts / sum_tile_counts).item() ] monthly_mins += [ np.average(tile_mins, None, tile_counts / sum_tile_counts).item() ] monthly_maxes += [ np.average(tile_maxes, None, tile_counts / sum_tile_counts).item() ] monthly_counts += [sum_tile_counts] count_sum = np.sum(monthly_counts) * 1.0 weights = np.array(monthly_counts) / count_sum return np.average(monthly_averages, None, weights).item(), \ np.average(monthly_averages, None, weights).item(), \ np.average(monthly_averages, None, weights).item()
def map(tile_in_spark): from nexustiles.nexustiles import NexusTileService import shapely.wkt (bounding_wkt, dataset, time_range) = tile_in_spark tile_service = NexusTileService() ds1_nexus_tiles = \ tile_service.get_tiles_bounded_by_polygon(shapely.wkt.loads(bounding_wkt), dataset, time_range[0], time_range[1], rows=5000) if len(ds1_nexus_tiles) == 0: print 'get_tiles_bounded_by_polygon returned 0 tiles for dataset {} in time {} - {} for bounds {}'.format( dataset, time_range[0], time_range[1], bounding_wkt) return [] # Create a dictionary mapping each time stamp to a list of tuples. # Each tuple has 2 elements, the index of a tile that contains the # time stamp, and the index of the time stamp among all the time stamps # contained in that tile. tile_dict = {} for i in range(len(ds1_nexus_tiles)): tile = ds1_nexus_tiles[i] for j in range(len(tile.times)): t = tile.times[j] if t not in tile_dict: tile_dict[t] = [] tile_dict[t].append((i, j)) # Create an aggregate array with all the data and associated mask for # each time stamp and an aggregate array with the latitude corresponding # to each data element. Then compute the statistics, weighting each # data element by cos(latitude). stats_arr = [] for timeinseconds in sorted(tile_dict.keys()): cur_tile_list = tile_dict[timeinseconds] if len(cur_tile_list) == 0: continue for i, j in cur_tile_list: ds1_nexus_tiles[i].data[j].mask = ds1_nexus_tiles[i].data[ j].mask | (ds1_nexus_tiles[i].data[j].data < 0.) tile_data_agg = \ np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data[j].data.flatten() for i,j in cur_tile_list]), mask=np.hstack([ds1_nexus_tiles[i].data[j].mask.flatten() for i,j in cur_tile_list])) lats_agg = np.hstack([ np.repeat(ds1_nexus_tiles[i].latitudes, len(ds1_nexus_tiles[i].longitudes)) for i, j in cur_tile_list ]) if (len(tile_data_agg) == 0) or tile_data_agg.mask.all(): continue else: data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = \ np.ma.average(tile_data_agg, weights=np.cos(np.radians(lats_agg))).item() data_count = np.ma.count(tile_data_agg) data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } stats_arr.append(stat) return stats_arr
def __init__(self, skipCassandra=False, skipSolr=False): CalcHandler.__init__(self) self.algorithm_config = None self._tile_service = NexusTileService(skipCassandra, skipSolr)
def _map(tile_in_spark): tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() # print 'Started tile {0}'.format(tile_bounds) # sys.stdout.flush() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # days_at_a_time = 90 days_at_a_time = 30 # days_at_a_time = 7 # days_at_a_time = 1 # print 'days_at_a_time = {0}'.format(days_at_a_time) t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) # t1 = time() # print 'nexus call start at time {0}'.format(t1) # sys.stdout.flush() # nexus_tiles = \ # TimeAvgMapSparkHandlerImpl.query_by_parts(tile_service, # min_lat, max_lat, # min_lon, max_lon, # ds, # t_start, # t_end, # part_dim=2) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) # t2 = time() # print 'nexus call end at time %f' % t2 # print 'secs in nexus call: ', t2 - t1 # print 't %d to %d - Got %d tiles' % (t_start, t_end, # len(nexus_tiles)) # for nt in nexus_tiles: # print nt.granule # print nt.section_spec # print 'lat min/max:', np.ma.min(nt.latitudes), np.ma.max(nt.latitudes) # print 'lon min/max:', np.ma.min(nt.longitudes), np.ma.max(nt.longitudes) # sys.stdout.flush() for tile in nexus_tiles: tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 # print 'cnt_tile = ', cnt_tile # cnt_tile.mask = ~(cnt_tile.data.astype(bool)) # sum_tile.mask = cnt_tile.mask # avg_tile = sum_tile / cnt_tile # stats_tile = [[{'avg': avg_tile.data[y,x], 'cnt': cnt_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])] # print 'Finished tile', tile_bounds # print 'Tile avg = ', avg_tile # sys.stdout.flush() return ((min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile))
def _map(tile_in): # Unpack input tile_bounds, start_time, end_time, ds = tile_in (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds # Create arrays to hold intermediate results during # correlation coefficient calculation. tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) sumx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumyy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) n_tile = np.zeros(tile_inbounds_shape, dtype=np.uint32) # Can only retrieve some number of days worth of data from Solr # at a time. Set desired value here. days_at_a_time = 90 # days_at_a_time = 30 # days_at_a_time = 7 # days_at_a_time = 1 # print 'days_at_a_time = ', days_at_a_time t_incr = 86400 * days_at_a_time tile_service = NexusTileService() # Compute the intermediate summations needed for the Pearson # Correlation Coefficient. We use a one-pass online algorithm # so that not all of the data needs to be kept in memory all at once. t_start = start_time while t_start <= end_time: t_end = min(t_start + t_incr, end_time) # t1 = time() # print 'nexus call start at time %f' % t1 # sys.stdout.flush() ds1tiles = tile_service.get_tiles_bounded_by_box( min_lat, max_lat, min_lon, max_lon, ds[0], t_start, t_end) ds2tiles = tile_service.get_tiles_bounded_by_box( min_lat, max_lat, min_lon, max_lon, ds[1], t_start, t_end) # t2 = time() # print 'nexus call end at time %f' % t2 # print 'secs in nexus call: ', t2-t1 # sys.stdout.flush() len1 = len(ds1tiles) len2 = len(ds2tiles) # print 't %d to %d - Got %d and %d tiles' % (t_start, t_end, # len1, len2) # sys.stdout.flush() i1 = 0 i2 = 0 time1 = 0 time2 = 0 while i1 < len1 and i2 < len2: tile1 = ds1tiles[i1] tile2 = ds2tiles[i2] # print 'tile1.data = ',tile1.data # print 'tile2.data = ',tile2.data # print 'i1, i2, t1, t2 times: ', i1, i2, tile1.times[0], tile2.times[0] assert tile1.times[0] >= time1, 'DS1 time out of order!' assert tile2.times[0] >= time2, 'DS2 time out of order!' time1 = tile1.times[0] time2 = tile2.times[0] # print 'i1=%d,i2=%d,time1=%d,time2=%d'%(i1,i2,time1,time2) if time1 < time2: i1 += 1 continue elif time2 < time1: i2 += 1 continue assert (time1 == time2), \ "Mismatched tile times %d and %d" % (time1, time2) # print 'processing time:',time1,time2 t1_data = tile1.data.data t1_mask = tile1.data.mask t2_data = tile2.data.data t2_mask = tile2.data.mask t1_data = np.nan_to_num(t1_data) t2_data = np.nan_to_num(t2_data) joint_mask = ((~t1_mask).astype(np.uint8) * (~t2_mask).astype(np.uint8)) # print 'joint_mask=',joint_mask sumx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumx_tile=',sumx_tile sumy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumy_tile=',sumy_tile sumxx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumxx_tile=',sumxx_tile sumyy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumyy_tile=',sumyy_tile sumxy_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumxy_tile=',sumxy_tile n_tile += joint_mask[0, min_y:max_y + 1, min_x:max_x + 1] # print 'n_tile=',n_tile i1 += 1 i2 += 1 t_start = t_end + 1 # print 'Finished tile', tile_bounds # sys.stdout.flush() return ((min_lat, max_lat, min_lon, max_lon), (sumx_tile, sumy_tile, sumxx_tile, sumyy_tile, sumxy_tile, n_tile))
def setUp(self): self.tile_service = NexusTileService()
def __init__(self): self.__tile_service = NexusTileService()
""" Copyright (c) 2016 Jet Propulsion Laboratory, California Institute of Technology. All rights reserved """ from nexustiles.nexustiles import NexusTileService print NexusTileService()