def latitude_time_hofmoeller_stats(tile_in_spark): (tile_id, index, min_lat, max_lat, min_lon, max_lon) = tile_in_spark tile_service = NexusTileService() try: # Load the dataset tile tile = tile_service.find_tile_by_id(tile_id)[0] # Mask it to the search domain tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, [tile])[0] except IndexError: return None stat = {'sequence': index, 'time': np.ma.min(tile.times), 'lats': []} points = list(tile.nexus_point_generator()) data = sorted(points, key=lambda p: p.latitude) points_by_lat = itertools.groupby(data, key=lambda p: p.latitude) for lat, points_at_lat in points_by_lat: values_at_lat = np.array( [point.data_val for point in points_at_lat]) stat['lats'].append({ 'latitude': float(lat), 'cnt': len(values_at_lat), 'avg': np.mean(values_at_lat).item(), 'max': np.max(values_at_lat).item(), 'min': np.min(values_at_lat).item(), 'std': np.std(values_at_lat).item() }) return stat
class TestService(unittest.TestCase): def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=9042 [solr] host=http://localhost:8983 core=nexustiles [datastore] store=cassandra""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp) def test_get_distinct_bounding_boxes_in_polygon(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "MXLDEPTH_ECCO_version4_release1", 1, time.time()) for b in boxes: print b.bounds def test_get_distinct_bounding_boxes_in_polygon_mur(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for b in boxes: print b.bounds def test_find_tiles_by_exact_bounds(self): tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.get_summary() def test_sorted_box(self): tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.min_time def test_time_series_tile(self): tiles = self.tile_service.find_tile_by_id("055c0b51-d0fb-3f39-b48a-4f762bf0c994") for tile in tiles: print tile.get_summary() def test_get_tiles_by_metadata(self): tiles = self.tile_service.get_tiles_by_metadata(['id:60758e00-5721-3a6e-bf57-78448bb0aeeb'], "MUR-JPL-L4-GLOB-v4.1", 1514764800, 1514764800) for tile in tiles: print tile.get_summary()
def hofmoeller_stats(tile_in_spark): (latlon, tile_id, index, min_lat, max_lat, min_lon, max_lon) = tile_in_spark tile_service = NexusTileService() try: # Load the dataset tile tile = tile_service.find_tile_by_id(tile_id)[0] # Mask it to the search domain tile = tile_service.mask_tiles_to_bbox(min_lat, max_lat, min_lon, max_lon, [tile])[0] except IndexError: # return None return [] t = np.ma.min(tile.times) stats = [] points = list(tile.nexus_point_generator()) if latlon == 0: # Latitude-Time Map (Average over longitudes) data = sorted(points, key=lambda p: p.latitude) points_by_coord = itertools.groupby(data, key=lambda p: p.latitude) else: # Longitude-Time Map (Average over latitudes) data = sorted(points, key=lambda p: p.longitude) points_by_coord = itertools.groupby(data, key=lambda p: p.longitude) for coord, points_at_coord in points_by_coord: values_at_coord = np.array([[p.data_val, np.cos(np.radians(p.latitude))] for p in points_at_coord]) vals = np.nan_to_num(values_at_coord[:, 0]) weights = values_at_coord[:, 1] coord_cnt = len(values_at_coord) if latlon == 0: # Latitude-Time Map (Average over longitudes) # In this case there is no weighting by cos(lat) weighted_sum = np.sum(vals).item() sum_of_weights = coord_cnt else: # Longitude-Time Map (Average over latitudes) # In this case we need to weight by cos(lat) weighted_sum = np.dot(vals, weights) sum_of_weights = np.sum(weights).item() stats.append(((t, float(coord)), (t, index, float(coord), coord_cnt, weighted_sum, sum_of_weights, np.max(vals).item(), np.min(vals).item(), np.var(vals).item()))) return stats