def map(tile_in_spark): from nexustiles.nexustiles import NexusTileService import shapely.wkt (bounding_wkt, dataset, time_range) = tile_in_spark tile_service = NexusTileService() ds1_nexus_tiles = \ tile_service.get_tiles_bounded_by_polygon(shapely.wkt.loads(bounding_wkt), dataset, time_range[0], time_range[1], rows=5000) if len(ds1_nexus_tiles) == 0: print 'get_tiles_bounded_by_polygon returned 0 tiles for dataset {} in time {} - {} for bounds {}'.format( dataset, time_range[0], time_range[1], bounding_wkt) return [] # Create a dictionary mapping each time stamp to a list of tuples. # Each tuple has 2 elements, the index of a tile that contains the # time stamp, and the index of the time stamp among all the time stamps # contained in that tile. tile_dict = {} for i in range(len(ds1_nexus_tiles)): tile = ds1_nexus_tiles[i] for j in range(len(tile.times)): t = tile.times[j] if t not in tile_dict: tile_dict[t] = [] tile_dict[t].append((i, j)) # Create an aggregate array with all the data and associated mask for # each time stamp and an aggregate array with the latitude corresponding # to each data element. Then compute the statistics, weighting each # data element by cos(latitude). stats_arr = [] for timeinseconds in sorted(tile_dict.keys()): cur_tile_list = tile_dict[timeinseconds] if len(cur_tile_list) == 0: continue for i, j in cur_tile_list: ds1_nexus_tiles[i].data[j].mask = ds1_nexus_tiles[i].data[ j].mask | (ds1_nexus_tiles[i].data[j].data < 0.) tile_data_agg = \ np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data[j].data.flatten() for i,j in cur_tile_list]), mask=np.hstack([ds1_nexus_tiles[i].data[j].mask.flatten() for i,j in cur_tile_list])) lats_agg = np.hstack([ np.repeat(ds1_nexus_tiles[i].latitudes, len(ds1_nexus_tiles[i].longitudes)) for i, j in cur_tile_list ]) if (len(tile_data_agg) == 0) or tile_data_agg.mask.all(): continue else: data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = \ np.ma.average(tile_data_agg, weights=np.cos(np.radians(lats_agg))).item() data_count = np.ma.count(tile_data_agg) data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } stats_arr.append(stat) return stats_arr
def calc_average_on_day(tile_in_spark): import shapely.wkt from datetime import datetime from pytz import timezone ISO_8601 = '%Y-%m-%dT%H:%M:%S%z' (bounding_wkt, dataset, timestamps, fill) = tile_in_spark if len(timestamps) == 0: return [] tile_service = NexusTileService() ds1_nexus_tiles = \ tile_service.get_tiles_bounded_by_polygon(shapely.wkt.loads(bounding_wkt), dataset, timestamps[0], timestamps[-1], rows=5000) tile_dict = {} for timeinseconds in timestamps: tile_dict[timeinseconds] = [] for i in range(len(ds1_nexus_tiles)): tile = ds1_nexus_tiles[i] tile_dict[tile.times[0]].append(i) stats_arr = [] for timeinseconds in timestamps: cur_tile_list = tile_dict[timeinseconds] if len(cur_tile_list) == 0: continue tile_data_agg = \ np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data.data.flatten() for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds)]), mask=np.hstack([ds1_nexus_tiles[i].data.mask.flatten() for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds)])) lats_agg = np.hstack([ np.repeat(ds1_nexus_tiles[i].latitudes, len(ds1_nexus_tiles[i].longitudes)) for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds) ]) if (len(tile_data_agg) == 0) or tile_data_agg.mask.all(): continue else: data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = \ np.ma.average(tile_data_agg, weights=np.cos(np.radians(lats_agg))).item() data_count = np.ma.count(tile_data_agg) data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds), 'iso_time': datetime.utcfromtimestamp(int(timeinseconds)).replace( tzinfo=timezone('UTC')).strftime(ISO_8601) } stats_arr.append(stat) return stats_arr