Ejemplo n.º 1
0
def map(tile_in_spark):
    from nexustiles.nexustiles import NexusTileService
    import shapely.wkt

    (bounding_wkt, dataset, time_range) = tile_in_spark
    tile_service = NexusTileService()

    ds1_nexus_tiles = \
        tile_service.get_tiles_bounded_by_polygon(shapely.wkt.loads(bounding_wkt),
                                                  dataset,
                                                  time_range[0],
                                                  time_range[1],
                                                  rows=5000)
    if len(ds1_nexus_tiles) == 0:
        print 'get_tiles_bounded_by_polygon returned 0 tiles for dataset {} in time {} - {} for bounds {}'.format(
            dataset, time_range[0], time_range[1], bounding_wkt)
        return []

    # Create a dictionary mapping each time stamp to a list of tuples.
    # Each tuple has 2 elements, the index of a tile that contains the
    # time stamp, and the index of the time stamp among all the time stamps
    # contained in that tile.
    tile_dict = {}
    for i in range(len(ds1_nexus_tiles)):
        tile = ds1_nexus_tiles[i]
        for j in range(len(tile.times)):
            t = tile.times[j]
            if t not in tile_dict:
                tile_dict[t] = []
            tile_dict[t].append((i, j))

    # Create an aggregate array with all the data and associated mask for
    # each time stamp and an aggregate array with the latitude corresponding
    # to each data element.  Then compute the statistics, weighting each
    # data element by cos(latitude).
    stats_arr = []
    for timeinseconds in sorted(tile_dict.keys()):
        cur_tile_list = tile_dict[timeinseconds]
        if len(cur_tile_list) == 0:
            continue

        for i, j in cur_tile_list:
            ds1_nexus_tiles[i].data[j].mask = ds1_nexus_tiles[i].data[
                j].mask | (ds1_nexus_tiles[i].data[j].data < 0.)

        tile_data_agg = \
            np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data[j].data.flatten()
                                        for i,j in cur_tile_list]),
                        mask=np.hstack([ds1_nexus_tiles[i].data[j].mask.flatten()
                                        for i,j in cur_tile_list]))
        lats_agg = np.hstack([
            np.repeat(ds1_nexus_tiles[i].latitudes,
                      len(ds1_nexus_tiles[i].longitudes))
            for i, j in cur_tile_list
        ])
        if (len(tile_data_agg) == 0) or tile_data_agg.mask.all():
            continue
        else:
            data_min = np.ma.min(tile_data_agg)
            data_max = np.ma.max(tile_data_agg)
            daily_mean = \
                np.ma.average(tile_data_agg,
                              weights=np.cos(np.radians(lats_agg))).item()
            data_count = np.ma.count(tile_data_agg)
            data_std = np.ma.std(tile_data_agg)

        # Return Stats by day
        stat = {
            'min': data_min,
            'max': data_max,
            'mean': daily_mean,
            'cnt': data_count,
            'std': data_std,
            'time': int(timeinseconds)
        }
        stats_arr.append(stat)
    return stats_arr
Ejemplo n.º 2
0
def calc_average_on_day(tile_in_spark):
    import shapely.wkt
    from datetime import datetime
    from pytz import timezone
    ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'

    (bounding_wkt, dataset, timestamps, fill) = tile_in_spark
    if len(timestamps) == 0:
        return []
    tile_service = NexusTileService()
    ds1_nexus_tiles = \
        tile_service.get_tiles_bounded_by_polygon(shapely.wkt.loads(bounding_wkt),
                                                  dataset,
                                                  timestamps[0],
                                                  timestamps[-1],
                                                  rows=5000)

    tile_dict = {}
    for timeinseconds in timestamps:
        tile_dict[timeinseconds] = []

    for i in range(len(ds1_nexus_tiles)):
        tile = ds1_nexus_tiles[i]
        tile_dict[tile.times[0]].append(i)

    stats_arr = []
    for timeinseconds in timestamps:
        cur_tile_list = tile_dict[timeinseconds]
        if len(cur_tile_list) == 0:
            continue
        tile_data_agg = \
            np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data.data.flatten()
                                        for i in cur_tile_list
                                        if (ds1_nexus_tiles[i].times[0] ==
                                            timeinseconds)]),
                        mask=np.hstack([ds1_nexus_tiles[i].data.mask.flatten()
                                        for i in cur_tile_list
                                        if (ds1_nexus_tiles[i].times[0] ==
                                            timeinseconds)]))
        lats_agg = np.hstack([
            np.repeat(ds1_nexus_tiles[i].latitudes,
                      len(ds1_nexus_tiles[i].longitudes))
            for i in cur_tile_list
            if (ds1_nexus_tiles[i].times[0] == timeinseconds)
        ])
        if (len(tile_data_agg) == 0) or tile_data_agg.mask.all():
            continue
        else:
            data_min = np.ma.min(tile_data_agg)
            data_max = np.ma.max(tile_data_agg)
            daily_mean = \
                np.ma.average(tile_data_agg,
                              weights=np.cos(np.radians(lats_agg))).item()
            data_count = np.ma.count(tile_data_agg)
            data_std = np.ma.std(tile_data_agg)

        # Return Stats by day
        stat = {
            'min':
            data_min,
            'max':
            data_max,
            'mean':
            daily_mean,
            'cnt':
            data_count,
            'std':
            data_std,
            'time':
            int(timeinseconds),
            'iso_time':
            datetime.utcfromtimestamp(int(timeinseconds)).replace(
                tzinfo=timezone('UTC')).strftime(ISO_8601)
        }
        stats_arr.append(stat)
    return stats_arr