Beispiel #1
0
    def _map(tile_in_spark):
        tile_bounds = tile_in_spark[0]
        (min_lat, max_lat, min_lon, max_lon,
         min_y, max_y, min_x, max_x) = tile_bounds
        startTime = tile_in_spark[1]
        endTime = tile_in_spark[2]
        ds = tile_in_spark[3]
        tile_service = NexusTileService()

        tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1)

        days_at_a_time = 30

        t_incr = 86400 * days_at_a_time
        sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64))
        cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32))
        t_start = startTime
        while t_start <= endTime:
            t_end = min(t_start + t_incr, endTime)

            nexus_tiles = \
                tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                      min_lon, max_lon,
                                                      ds=ds,
                                                      start_time=t_start,
                                                      end_time=t_end)

            for tile in nexus_tiles:
                tile.data.data[:, :] = np.nan_to_num(tile.data.data)
                sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1]
                cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8)
            t_start = t_end + 1

        return (min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile)
class TestService(unittest.TestCase):
    def setUp(self):
        config = StringIO("""[cassandra]
host=127.0.0.1
keyspace=nexustiles
local_datacenter=datacenter1
protocol_version=3
port=9042

[solr]
host=http://localhost:8983
core=nexustiles

[datastore]
store=cassandra""")
        cp = ConfigParser.RawConfigParser()
        cp.readfp(config)

        self.tile_service = NexusTileService(config=cp)

    def test_get_distinct_bounding_boxes_in_polygon(self):
        boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90),
                                                                         "MXLDEPTH_ECCO_version4_release1",
                                                                         1, time.time())
        for b in boxes:
            print b.bounds

    def test_get_distinct_bounding_boxes_in_polygon_mur(self):
        boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90),
                                                                         "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                                         1, time.time())
        for b in boxes:
            print b.bounds

    def test_find_tiles_by_exact_bounds(self):
        tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2),
                                                             "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                             1, time.time())
        for tile in tiles:
            print tile.get_summary()

    def test_sorted_box(self):

        tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0,
                                                   "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                   1, time.time())
        for tile in tiles:
            print tile.min_time

    def test_time_series_tile(self):
        tiles = self.tile_service.find_tile_by_id("055c0b51-d0fb-3f39-b48a-4f762bf0c994")
        for tile in tiles:
            print tile.get_summary()

    def test_get_tiles_by_metadata(self):
        tiles = self.tile_service.get_tiles_by_metadata(['id:60758e00-5721-3a6e-bf57-78448bb0aeeb'],
                                                        "MUR-JPL-L4-GLOB-v4.1", 1514764800, 1514764800)
        for tile in tiles:
            print tile.get_summary()
Beispiel #3
0
    def _calc_variance(tile_in_spark):
        # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area
        tile_bounds = tile_in_spark[0]
        (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x,
         max_x) = tile_bounds
        startTime = tile_in_spark[1]
        endTime = tile_in_spark[2]
        ds = tile_in_spark[3]
        x_bar = tile_in_spark[4]
        tile_service = NexusTileService()

        tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1)

        # hardcorded - limiting the amount of nexus tiles pulled at a time
        days_at_a_time = 30

        t_incr = 86400 * days_at_a_time
        data_anomaly_squared_tile = np.array(
            np.zeros(tile_inbounds_shape, dtype=np.float64))
        cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32))

        x_bar = np.asarray(x_bar)
        x_bar[:, :] = np.nan_to_num(x_bar)

        t_start = startTime
        while t_start <= endTime:
            t_end = min(t_start + t_incr, endTime)

            nexus_tiles = \
                tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                      min_lon, max_lon,
                                                      ds=ds,
                                                      start_time=t_start,
                                                      end_time=t_end)

            for tile in nexus_tiles:
                # Taking the data, converted masked nans to 0
                tile.data.data[:, :] = np.nan_to_num(tile.data.data)

                # subtract x_bar from each value, then square it
                data_anomaly_tile = tile.data.data[0, min_y:max_y + 1,
                                                   min_x:max_x + 1] - x_bar
                data_anomaly_squared_tile += data_anomaly_tile * data_anomaly_tile

                # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value
                cnt_tile += (~tile.data.mask[0, min_y:max_y + 1,
                                             min_x:max_x + 1]).astype(np.uint8)
            t_start = t_end + 1

        return (min_lat, max_lat, min_lon,
                max_lon), (data_anomaly_squared_tile, cnt_tile)
Beispiel #4
0
class TestService(unittest.TestCase):
    def setUp(self):
        config = StringIO("""[cassandra]
host=127.0.0.1
keyspace=nexustiles
local_datacenter=datacenter1
protocol_version=3
port=32769

[solr]
host=localhost:8986
core=nexustiles""")
        cp = ConfigParser.RawConfigParser()
        cp.readfp(config)

        self.tile_service = NexusTileService(config=cp)

    def test_get_distinct_bounding_boxes_in_polygon(self):
        boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90),
                                                                         "MXLDEPTH_ECCO_version4_release1",
                                                                         1, time.time())
        for b in boxes:
            print b.bounds

    def test_get_distinct_bounding_boxes_in_polygon_mur(self):
        boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90),
                                                                         "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                                         1, time.time())
        for b in boxes:
            print b.bounds

    def test_find_tiles_by_exact_bounds(self):
        tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2),
                                                             "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                             1, time.time())
        for tile in tiles:
            print tile.get_summary()

    def test_sorted_box(self):

        tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0,
                                                   "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1",
                                                   1, time.time())
        for tile in tiles:
            print tile.min_time
Beispiel #5
0
    def _map(tile_in_spark):
        # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area
        tile_bounds = tile_in_spark[0]
        (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x,
         max_x) = tile_bounds
        startTime = tile_in_spark[1]
        endTime = tile_in_spark[2]
        ds = tile_in_spark[3]
        tile_service = NexusTileService()

        tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1)

        # hardcorded - limiting the amount of nexus tiles pulled at a time
        days_at_a_time = 30

        t_incr = 86400 * days_at_a_time
        sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64))
        cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32))
        t_start = startTime
        while t_start <= endTime:
            t_end = min(t_start + t_incr, endTime)

            nexus_tiles = \
                tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                      min_lon, max_lon,
                                                      ds=ds,
                                                      start_time=t_start,
                                                      end_time=t_end)

            for tile in nexus_tiles:
                # Taking the data, converted masked nans to 0
                tile.data.data[:, :] = np.nan_to_num(tile.data.data)
                sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1]
                # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value
                cnt_tile += (~tile.data.mask[0, min_y:max_y + 1,
                                             min_x:max_x + 1]).astype(np.uint8)
            t_start = t_end + 1

        print("sum tile", sum_tile)
        print("count tile", cnt_tile)
        return tile_bounds, (sum_tile, cnt_tile)
Beispiel #6
0
    def _map(tile_in):
        # Unpack input
        tile_bounds, start_time, end_time, ds = tile_in
        (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x,
         max_x) = tile_bounds

        # Create arrays to hold intermediate results during
        # correlation coefficient calculation.
        tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1)
        sumx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumxx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumyy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumxy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        n_tile = np.zeros(tile_inbounds_shape, dtype=np.uint32)

        # Can only retrieve some number of days worth of data from Solr
        # at a time.  Set desired value here.
        days_at_a_time = 90
        # days_at_a_time = 30
        # days_at_a_time = 7
        # days_at_a_time = 1
        # print 'days_at_a_time = ', days_at_a_time
        t_incr = 86400 * days_at_a_time

        tile_service = NexusTileService()

        # Compute the intermediate summations needed for the Pearson
        # Correlation Coefficient.  We use a one-pass online algorithm
        # so that not all of the data needs to be kept in memory all at once.
        t_start = start_time
        while t_start <= end_time:
            t_end = min(t_start + t_incr, end_time)
            # t1 = time()
            # print 'nexus call start at time %f' % t1
            # sys.stdout.flush()
            ds1tiles = tile_service.get_tiles_bounded_by_box(
                min_lat, max_lat, min_lon, max_lon, ds[0], t_start, t_end)
            ds2tiles = tile_service.get_tiles_bounded_by_box(
                min_lat, max_lat, min_lon, max_lon, ds[1], t_start, t_end)
            # t2 = time()
            # print 'nexus call end at time %f' % t2
            # print 'secs in nexus call: ', t2-t1
            # sys.stdout.flush()

            len1 = len(ds1tiles)
            len2 = len(ds2tiles)
            # print 't %d to %d - Got %d and %d tiles' % (t_start, t_end,
            #                                            len1, len2)
            # sys.stdout.flush()
            i1 = 0
            i2 = 0
            time1 = 0
            time2 = 0
            while i1 < len1 and i2 < len2:
                tile1 = ds1tiles[i1]
                tile2 = ds2tiles[i2]
                # print 'tile1.data = ',tile1.data
                # print 'tile2.data = ',tile2.data
                # print 'i1, i2, t1, t2 times: ', i1, i2, tile1.times[0], tile2.times[0]
                assert tile1.times[0] >= time1, 'DS1 time out of order!'
                assert tile2.times[0] >= time2, 'DS2 time out of order!'
                time1 = tile1.times[0]
                time2 = tile2.times[0]
                # print 'i1=%d,i2=%d,time1=%d,time2=%d'%(i1,i2,time1,time2)
                if time1 < time2:
                    i1 += 1
                    continue
                elif time2 < time1:
                    i2 += 1
                    continue
                assert (time1 == time2), \
                    "Mismatched tile times %d and %d" % (time1, time2)
                # print 'processing time:',time1,time2
                t1_data = tile1.data.data
                t1_mask = tile1.data.mask
                t2_data = tile2.data.data
                t2_mask = tile2.data.mask
                t1_data = np.nan_to_num(t1_data)
                t2_data = np.nan_to_num(t2_data)
                joint_mask = ((~t1_mask).astype(np.uint8) *
                              (~t2_mask).astype(np.uint8))
                # print 'joint_mask=',joint_mask
                sumx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                              joint_mask[0, min_y:max_y + 1, min_x:max_x + 1])
                # print 'sumx_tile=',sumx_tile
                sumy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                              joint_mask[0, min_y:max_y + 1, min_x:max_x + 1])
                # print 'sumy_tile=',sumy_tile
                sumxx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               t1_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               joint_mask[0, min_y:max_y + 1, min_x:max_x + 1])
                # print 'sumxx_tile=',sumxx_tile
                sumyy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               t2_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               joint_mask[0, min_y:max_y + 1, min_x:max_x + 1])
                # print 'sumyy_tile=',sumyy_tile
                sumxy_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               t2_data[0, min_y:max_y + 1, min_x:max_x + 1] *
                               joint_mask[0, min_y:max_y + 1, min_x:max_x + 1])
                # print 'sumxy_tile=',sumxy_tile
                n_tile += joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]
                # print 'n_tile=',n_tile
                i1 += 1
                i2 += 1
            t_start = t_end + 1

        # print 'Finished tile', tile_bounds
        # sys.stdout.flush()
        return ((min_lat, max_lat, min_lon, max_lon),
                (sumx_tile, sumy_tile, sumxx_tile, sumyy_tile, sumxy_tile,
                 n_tile))
Beispiel #7
0
    def _map(tile_in_spark):
        tile_bounds = tile_in_spark[0]
        (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x,
         max_x) = tile_bounds
        startTime = tile_in_spark[1]
        endTime = tile_in_spark[2]
        ds = tile_in_spark[3]
        tile_service = NexusTileService()
        # print 'Started tile {0}'.format(tile_bounds)
        # sys.stdout.flush()
        tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1)
        # days_at_a_time = 90
        days_at_a_time = 30
        # days_at_a_time = 7
        # days_at_a_time = 1
        # print 'days_at_a_time = {0}'.format(days_at_a_time)
        t_incr = 86400 * days_at_a_time
        sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64))
        cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32))
        t_start = startTime
        while t_start <= endTime:
            t_end = min(t_start + t_incr, endTime)
            # t1 = time()
            # print 'nexus call start at time {0}'.format(t1)
            # sys.stdout.flush()
            # nexus_tiles = \
            #    TimeAvgMapSparkHandlerImpl.query_by_parts(tile_service,
            #                                              min_lat, max_lat,
            #                                              min_lon, max_lon,
            #                                              ds,
            #                                              t_start,
            #                                              t_end,
            #                                              part_dim=2)
            nexus_tiles = \
                tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                      min_lon, max_lon,
                                                      ds=ds,
                                                      start_time=t_start,
                                                      end_time=t_end)
            # t2 = time()
            # print 'nexus call end at time %f' % t2
            # print 'secs in nexus call: ', t2 - t1
            # print 't %d to %d - Got %d tiles' % (t_start, t_end,
            #                                     len(nexus_tiles))
            # for nt in nexus_tiles:
            #    print nt.granule
            #    print nt.section_spec
            #    print 'lat min/max:', np.ma.min(nt.latitudes), np.ma.max(nt.latitudes)
            #    print 'lon min/max:', np.ma.min(nt.longitudes), np.ma.max(nt.longitudes)
            # sys.stdout.flush()

            for tile in nexus_tiles:
                tile.data.data[:, :] = np.nan_to_num(tile.data.data)
                sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1]
                cnt_tile += (~tile.data.mask[0, min_y:max_y + 1,
                                             min_x:max_x + 1]).astype(np.uint8)
            t_start = t_end + 1

        # print 'cnt_tile = ', cnt_tile
        # cnt_tile.mask = ~(cnt_tile.data.astype(bool))
        # sum_tile.mask = cnt_tile.mask
        # avg_tile = sum_tile / cnt_tile
        # stats_tile = [[{'avg': avg_tile.data[y,x], 'cnt': cnt_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])]
        # print 'Finished tile', tile_bounds
        # print 'Tile avg = ', avg_tile
        # sys.stdout.flush()
        return ((min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile))
Beispiel #8
0
    def _map(tile_in):
        # Unpack input
        tile_bounds, start_time, end_time, ds = tile_in
        (min_lat, max_lat, min_lon, max_lon, 
         min_y, max_y, min_x, max_x) = tile_bounds

        # Create masked arrays to hold intermediate results during
        # correlation coefficient calculation.
        tile_inbounds_shape = (max_y-min_y+1, max_x-min_x+1)
        sumx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumxx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumyy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        sumxy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64)
        n_tile = np.ma.array(np.zeros(tile_inbounds_shape, dtype=np.uint32))

        # Can only retrieve some number of days worth of data from Solr
        # at a time.  Set desired value here.
        days_at_a_time = 90
        #days_at_a_time = 30
        #days_at_a_time = 7
        #days_at_a_time = 1
        print 'days_at_a_time = ', days_at_a_time
        t_incr = 86400 * days_at_a_time

        tile_service = NexusTileService()

        # Compute Pearson Correlation Coefficient.  We use an online algorithm
        # so that not all of the data needs to be kept in memory all at once.
        t_start = start_time
        while t_start <= end_time:
            t_end = min(t_start+t_incr,end_time)
            t1 = time()
            print 'nexus call start at time %f' % t1
            sys.stdout.flush()
            ds1tiles = tile_service.get_tiles_bounded_by_box(min_lat, 
                                                             max_lat, 
                                                             min_lon, 
                                                             max_lon, 
                                                             ds[0], 
                                                             t_start,
                                                             t_end)
            ds2tiles = tile_service.get_tiles_bounded_by_box(min_lat, 
                                                             max_lat, 
                                                             min_lon, 
                                                             max_lon, 
                                                             ds[1], 
                                                             t_start,
                                                             t_end)
            t2 = time()
            print 'nexus call end at time %f' % t2
            print 'secs in nexus call: ', t2-t1
            sys.stdout.flush()
            CorrMapSparkHandlerImpl._prune_tiles(ds1tiles)
            CorrMapSparkHandlerImpl._prune_tiles(ds2tiles)
            
            len1 = len(ds1tiles)
            len2 = len(ds2tiles)
            print 't %d to %d - Got %d and %d tiles' % (t_start, t_end, 
                                                        len1, len2)
            sys.stdout.flush()
            i1 = 0
            i2 = 0
            while i1 < len1 and i2 < len2:
                tile1 = ds1tiles[i1]
                tile2 = ds2tiles[i2]
                #print 'tile1.data = ',tile1.data
                #print 'tile2.data = ',tile2.data
                time1 = tile1.times[0]
                time2 = tile2.times[0]
                if time1 < time2: 
                    i1 += 1
                    continue
                elif time2 < time1:
                    i2 += 1
                    continue
                assert (time1 == time2),\
                    "Mismatched tile times %d and %d" % (time1, time2)
                t1_data = tile1.data.data
                t1_mask = tile1.data.mask
                t2_data = tile2.data.data
                t2_mask = tile2.data.mask
                t1_data = np.nan_to_num(t1_data)
                t2_data = np.nan_to_num(t2_data)
                joint_mask = ((~t1_mask).astype(np.uint8) *
                              (~t2_mask).astype(np.uint8))
                #print 'joint_mask=',joint_mask
                sumx_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] *
                              joint_mask[0,min_y:max_y+1,min_x:max_x+1])
                #print 'sumx_tile=',sumx_tile
                sumy_tile += (t2_data[0,min_y:max_y+1,min_x:max_x+1] *
                              joint_mask[0,min_y:max_y+1,min_x:max_x+1])
                #print 'sumy_tile=',sumy_tile
                sumxx_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] *
                               t1_data[0,min_y:max_y+1,min_x:max_x+1] *
                               joint_mask[0,min_y:max_y+1,min_x:max_x+1])
                #print 'sumxx_tile=',sumxx_tile
                sumyy_tile += (t2_data[0,min_y:max_y+1,min_x:max_x+1] *
                               t2_data[0,min_y:max_y+1,min_x:max_x+1] *
                               joint_mask[0,min_y:max_y+1,min_x:max_x+1])
                #print 'sumyy_tile=',sumyy_tile
                sumxy_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] *
                               t2_data[0,min_y:max_y+1,min_x:max_x+1] *
                               joint_mask[0,min_y:max_y+1,min_x:max_x+1])
                #print 'sumxy_tile=',sumxy_tile
                n_tile.data[:,:] += joint_mask[0,min_y:max_y+1,min_x:max_x+1]
                #print 'n_tile=',n_tile
                i1 += 1
                i2 += 1
            t_start = t_end + 1

        r_tile = np.ma.array((sumxy_tile-sumx_tile*sumy_tile/n_tile) /
                             np.sqrt((sumxx_tile-sumx_tile*sumx_tile/n_tile)*
                                     (sumyy_tile - sumy_tile*sumy_tile/n_tile)))
        #print 'r_tile=',r_tile
        n_tile.mask = ~(n_tile.data.astype(bool))
        r_tile.mask = n_tile.mask
        #print 'r_tile masked=',r_tile
        stats_tile = [[{'r': r_tile.data[y,x], 'cnt': n_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])]
        #print 'stats_tile = ', stats_tile
        print 'Finished tile', tile_bounds
        sys.stdout.flush()
        return (stats_tile,min_lat,max_lat,min_lon,max_lon)
Beispiel #9
0
    def calc_average_on_day(tile_in_spark):
        (min_lat, max_lat, min_lon, max_lon, dataset, timestamps,
         cwd) = tile_in_spark
        os.chdir(cwd)
        start_time = timestamps[0]
        end_time = timestamps[-1]
        tile_service = NexusTileService()
        #ds1_nexus_tiles = \
        #    tile_service.get_tiles_bounded_by_box_at_time(min_lat, max_lat,
        #                                                  min_lon, max_lon,
        #                                                  dataset,
        #                                                  timeinseconds)
        ds1_nexus_tiles = \
            tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                  min_lon, max_lon,
                                                  dataset,
                                                  timestamps[0],
                                                  timestamps[-1])
        #ds1_nexus_tiles = TimeSeriesCalculator.query_by_parts(tile_service,
        #                                                      min_lat,
        #                                                      max_lat,
        #                                                      min_lon,
        #                                                      max_lon,
        #                                                      dataset,
        #                                                      timestamps[0],
        #                                                      timestamps[-1])

        # debug code
        #for tile in ds1_nexus_tiles:
        #print 'tile shape: ', tile.data.shape
        #print 'tile data shape: ', tile.data.data.shape
        #print 'tile mask shape: ', tile.data.mask.shape
        #print 'tile: ', tile.data

        stats_arr = []
        for timeinseconds in timestamps:
            tile_data_agg = np.ma.array([tile.data.flatten() \
                                             for tile in ds1_nexus_tiles \
                                             if (tile.times[0] == timeinseconds)])
            if (len(tile_data_agg) == 0) or tile_data_agg.mask.all():
                data_min = 0.
                data_max = 0.
                daily_mean = 0.
                data_count = 0
                data_std = 0.
            else:
                data_min = np.ma.min(tile_data_agg)
                data_max = np.ma.max(tile_data_agg)
                daily_mean = np.ma.mean(tile_data_agg).item()
                data_count = np.ma.count(tile_data_agg)
                data_std = np.ma.std(tile_data_agg)

        # Return Stats by day
            stat = {
                'min': data_min,
                'max': data_max,
                'mean': daily_mean,
                'cnt': data_count,
                'std': data_std,
                'time': int(timeinseconds)
            }
            stats_arr.append(stat)
        return stats_arr
Beispiel #10
0
    def calc_average_on_day(tile_in_spark):
        (min_lat, max_lat, min_lon, max_lon, dataset, timestamps,
         fill) = tile_in_spark
        if len(timestamps) == 0:
            return []
        start_time = timestamps[0]
        end_time = timestamps[-1]
        tile_service = NexusTileService()
        # ds1_nexus_tiles = \
        #    tile_service.get_tiles_bounded_by_box_at_time(min_lat, max_lat,
        #                                                  min_lon, max_lon,
        #                                                  dataset,
        #                                                  timeinseconds)
        ds1_nexus_tiles = \
            tile_service.get_tiles_bounded_by_box(min_lat, max_lat,
                                                  min_lon, max_lon,
                                                  dataset,
                                                  timestamps[0],
                                                  timestamps[-1],
                                                  rows=5000)
        # ds1_nexus_tiles = TimeSeriesCalculator.query_by_parts(tile_service,
        #                                                      min_lat,
        #                                                      max_lat,
        #                                                      min_lon,
        #                                                      max_lon,
        #                                                      dataset,
        #                                                      timestamps[0],
        #                                                      timestamps[-1])

        tile_dict = {}
        for timeinseconds in timestamps:
            tile_dict[timeinseconds] = []

        for i in range(len(ds1_nexus_tiles)):
            tile = ds1_nexus_tiles[i]
            tile_dict[tile.times[0]].append(i)

        stats_arr = []
        for timeinseconds in timestamps:
            cur_tile_list = tile_dict[timeinseconds]
            if len(cur_tile_list) == 0:
                continue
            tile_data_agg = \
                np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data.data.flatten()
                                            for i in cur_tile_list
                                            if (ds1_nexus_tiles[i].times[0] ==
                                                timeinseconds)]),
                            mask=np.hstack([ds1_nexus_tiles[i].data.mask.flatten()
                                            for i in cur_tile_list
                                            if (ds1_nexus_tiles[i].times[0] ==
                                                timeinseconds)]))
            lats_agg = np.hstack([
                np.repeat(ds1_nexus_tiles[i].latitudes,
                          len(ds1_nexus_tiles[i].longitudes))
                for i in cur_tile_list
                if (ds1_nexus_tiles[i].times[0] == timeinseconds)
            ])
            if (len(tile_data_agg) == 0) or tile_data_agg.mask.all():
                continue
            else:
                data_min = np.ma.min(tile_data_agg)
                data_max = np.ma.max(tile_data_agg)
                # daily_mean = np.ma.mean(tile_data_agg).item()
                daily_mean = \
                    np.ma.average(tile_data_agg,
                                  weights=np.cos(np.radians(lats_agg))).item()
                data_count = np.ma.count(tile_data_agg)
                data_std = np.ma.std(tile_data_agg)

                # Return Stats by day
            stat = {
                'min': data_min,
                'max': data_max,
                'mean': daily_mean,
                'cnt': data_count,
                'std': data_std,
                'time': int(timeinseconds)
            }
            stats_arr.append(stat)
        return stats_arr