def process_file(rap_file): provider_name = "RAP" srid = RAP_Spatial_Reference.epsg logger.info("Ingesting file %s" % rap_file) for variable in variables: logger.info("Processing variable %s" % variable) band_num = find_band_num(rap_file, filterr=variables[variable]) if band_num is None: #raise Exception("Could not find band for %s" % variable) logger.error("Could not find band for %s" % variable) else: vars = ["GRIB_REF_TIME", "GRIB_VALID_TIME"] datevalues = get_band_metadata(rap_file, band_num, vars) startdate_utc_str = (datevalues["GRIB_REF_TIME"].split())[0] enddate_utc_str = (datevalues["GRIB_VALID_TIME"].split())[0] start_date = datetime.utcfromtimestamp(float(startdate_utc_str)) #end_date = datetime.fromtimestamp(float(enddate_utc_str)) end_date = start_date + timedelta(hours=1) block_size = (10, 10) ras = GDALRaster(rap_file, srid) ras.set_band_num(band_num) if variable == "RAP_REFL": ras.nodata_range = [-999, -9] level = int((variables[variable]["GRIB_SHORT_NAME"].split("-"))[0]) granule_name = "%s_%s %s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d %H:%M"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d%H%M"), level) bbox = proj_helper.get_bbox(srid) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable, granule_name=granule_name, table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False, start_time=start_date, end_time=end_date, subset_bbox=bbox, overwrite=True, threshold=None)
def generate_mrms_image(mrms_granule, threshold): mrms_granule_id = mrms_granule[0] sql = """ select st_astiff(st_colormap((st_union(st_reclass(rast, 1, '[-100-%f]:0, (%f-100):1', '8BUI', NULL))), 'fire', 'INTERPOLATE')) from rastertile where datagranule_id=%d """ % (threshold, threshold, mrms_granule_id) rows = pgdb_helper.query(sql) start_time = mrms_granule[1] filename = "./images/mrms_%s_%d.tif" % ( start_time.strftime("%m-%d-%H-%M-%S"), mrms_granule_id) if os.path.exists(filename): os.remove(filename) with open(filename, mode='wb') as f: data = rows[0][0] f.write(data) logger.info("saved %s" % filename)
def save_raster(lats, lons, t_start, t_end): x, y = proj_helper.latlon2xy1(lats, lons, RAP_Spatial_Reference.proj4) data = [1 for i in range(0, len(x))] array_raster = ArrayRaster(ds_name="", data_array=None, size=size, ul=ul, scale=scale, skew=(0, 0), srid=RAP_Spatial_Reference.epsg, gdal_datatype=gdalconst.GDT_Int16, nodata_value=999) array_raster.set_data_with_xy(x=x, y=y, data=data, stat="count") level = 0 block_size = 50, 50 #array_raster.size # 100, 100 variable_name = "CI_COUNT" provider_name = "MRMS" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=RAP_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=t_start, end_time=t_end, subset_bbox=bbox, overwrite=True) logger.info("Inserted %s" % granule_name)
def save_storm_polys(mrms_granules, threshold, srid=4326): min_storm_area = 4e6 max_storm_area = 1e8 pgdb_helper.submit("drop table if exists roi_polys") sql = """ create table roi_polys ( id serial not null, lat double precision not null, lon double precision not null, starttime timestamp without time zone not null, endtime timestamp without time zone not null, geom geometry not null, mrms_granule_id integer not null ) """ pgdb_helper.submit(sql) for mrms_granule in mrms_granules: granule_id = mrms_granule[0] start_time = mrms_granule[1] end_time = mrms_granule[2] sql = """ select st_astext(st_transform(geom, {srid})) poly, st_x(st_transform(st_centroid(geom), {srid})) center_lon, st_y(st_transform(st_centroid(geom), {srid})) center_lat from ( select st_transform(((foo.gv).geom), {area_srid}) geom, ((foo.gv).val) val from ( select st_dumpaspolygons( st_union( st_reclass(rast, 1, '[-100-{threshold}]:0, ({threshold}-100):1', '8BUI', NULL) ) ) gv from rastertile where datagranule_id={granule_id} ) as foo ) as bar where ST_Area(geom) > {min_area} and ST_Area(geom) < {max_area} """.format( **{ "srid": srid, "area_srid": SRID_ALBERS, "threshold": threshold, "granule_id": granule_id, "min_area": min_storm_area, "max_area": max_storm_area }) rows = pgdb_helper.query(sql) for row in rows: pgdb_helper.submit( """ insert into roi_polys (lat, lon, starttime, endtime, geom, mrms_granule_id) values ('%f', '%f', '%s', '%s', ST_GeomFromText('%s', 4326), %d) """ % (row[2], row[1], start_time.strftime("%Y-%m-%d %H:%M:%S"), end_time.strftime("%Y-%m-%d %H:%M:%S"), row[0], granule_id)) logger.info("Inserted Storm Polys for granule %d, time %s" % (granule_id, start_time.strftime("%Y-%m-%d %H:%M"))) pgdb_helper.submit("create index on roi_polys using gist(geom)")
def generate_ci_events(): #create table for roi geoms pgdb_helper.submit(""" drop table if exists ci_events; """) pgdb_helper.submit(""" create table ci_events ( id serial not null, track_id varchar not null, starttime timestamp without time zone NOT NULL, endtime timestamp without time zone NOT NULL, geom geometry not null, center_lat float not null, center_lon float not null, type int not null, constraint ci_events_pkey primary key (id) ); """) sql = """ select id, starttime, endtime, st_astext(geom) from roi_tracks order by starttime asc """ tracks = pgdb_helper.query(sql) for tr in tracks: g = ogr.CreateGeometryFromWkt(tr[3]) if g.GetGeometryName() == "LINESTRING": init_point = g.GetPoint(0) lon = init_point[0] lat = init_point[1] elif g.GetGeometryName() == "POINT": init_point = g.GetPoint(0) lon = init_point[0] lat = init_point[1] else: continue data = { "track_id": tr[0], "starttime": tr[1], "endtime": tr[2], "geom": "POINT(%f %f)" % (lon, lat), "center_lat": lat, "center_lon": lon, "type": 2 } sql = """ insert into ci_events (track_id, starttime, endtime, geom, center_lat, center_lon, type) values (%s, %s, %s, ST_GeomFromText('POINT(%s %s)', 4326), %s, %s, %s) """ values = (data["track_id"], data["starttime"], data["endtime"], data["center_lon"], data["center_lat"], data["center_lat"], data["center_lon"], data["type"]) pgdb_helper.insert(sql, values) logger.info("Inserted geoms for track %s" % tr[0]) #create indexes on roi_geoms pgdb_helper.submit( "create index ci_events_geom_index on ci_events using gist(geom)") pgdb_helper.submit( "create index ci_events_time_index on ci_events (starttime, endtime)")
def generate_storm_tracks(mrms_granules, threshold=35, ci_lifetime_hours=4): active_roi_tracks = {} pgdb_helper.submit("drop table if exists roi_tracks") sql = """ create table roi_tracks ( id character varying not null, starttime timestamp without time zone not null, endtime timestamp without time zone not null, geom geometry not null ) """ pgdb_helper.submit(sql) for mrms_granule in mrms_granules: mrms_granule_id = mrms_granule[0] #storm_polys = get_storm_polys(granule_id=mrms_granule_id, threshold=threshold, srid=4326) storm_polys = pgdb_helper.query(""" select id,lat,lon,starttime, endtime, st_astext(geom), mrms_granule_id from roi_polys where mrms_granule_id=%d order by starttime asc """ % mrms_granule_id) new_storms = [] for rt in active_roi_tracks: active_roi_tracks[rt]["updated"] = False for row in storm_polys: poly = { "type": row[0], "lat": row[1], "lon": row[2], "starttime": row[3], "endtime": row[4], "geom": ogr.CreateGeometryFromWkt(row[5]), "granule_id": row[6] } is_new = True for rt in active_roi_tracks: if len(active_roi_tracks[rt]): g1 = poly["geom"] g2 = active_roi_tracks[rt]["track"][-1]["geom"] intersection = g1.Intersection(g2) if intersection.IsEmpty() or ( intersection.GetGeometryName() <> 'POLYGON'): frac = 0 else: frac = intersection.GetArea() / g1.GetArea() if frac > 0.5: active_roi_tracks[rt]["track"].append(poly) active_roi_tracks[rt]["updated"] = True is_new = False #break so that each stormy poly is added to one storm track only break if is_new: new_storms.append(poly) rts2remove = [] for rt in active_roi_tracks: if not active_roi_tracks[rt]["updated"]: rts2remove.append(rt) for rt in rts2remove: sql = """ insert into roi_tracks (id, starttime, endtime, geom) values (%s, %s, %s, ST_GeomFromText(%s)) """ points = [] start_time = datetime.max end_time = datetime.min for p in active_roi_tracks[rt]["track"]: if start_time > p["starttime"]: start_time = p["starttime"] if end_time < p["endtime"]: end_time = p["endtime"] points.append((p["lon"], p["lat"])) if len(points) > 1: str_geom = "LINESTRING(" + ",".join( ["%s %s" % (x[0], x[1]) for x in points]) + ")" pgdb_helper.insert(sql, (rt, start_time, end_time, str_geom)) elif len(points) == 1: str_geom = "POINT(%f %f)" % (points[0][0], points[0][1]) pgdb_helper.insert(sql, (rt, start_time, end_time, str_geom)) active_roi_tracks.pop(rt) for ns in new_storms: rt = str(uuid.uuid4()) active_roi_tracks[rt] = {"track": [ns]} logger.info("Generated tracks for granule %d" % mrms_granule_id)
def get_granules(var_name, start_date, end_date): sql = """ select datagranule.id, datagranule.starttime, datagranule.endtime from datagranule join provider on provider.id = datagranule.provider_id join variable on variable.id = datagranule.variable_id where provider.name like 'RAP' and variable.name like '%s' and (('%s', '%s') overlaps (datagranule.starttime, datagranule.endtime)) order by datagranule.starttime asc """ % (var_name, start_date, end_date) rows = pgdb_helper.query(sql) return rows if __name__ == '__main__': logger.info("Ingesting RAP Derived granules") with SqaAccess(engine=engine) as sqa_access: rap_provider = sqa_access.findOne(Provider, {'name': 'RAP'}) ugrd_var = sqa_access.findOne(Variable, {'name': 'UGRD'}) vgrd_var = sqa_access.findOne(Variable, {'name': 'VGRD'}) windconv_var = sqa_access.findOne(Variable, {'name': 'WINDCONV'}) ugrd_granules = sqa_access.session.query(DataGranule).filter(DataGranule.variable == ugrd_var)\ .order_by(DataGranule.starttime).all() logger.info("%d ugrd granules" % len(ugrd_granules)) vgrd_granules = sqa_access.session.query(DataGranule).filter(DataGranule.variable == vgrd_var)\ .order_by(DataGranule.starttime).all() logger.info("%d vgrd granules" % len(vgrd_granules))
logger.info("Inserted %s" % granule_name) start_time = datetime(year=2014, month=7, day=23, hour=13, minute=0, second=0) end_time = datetime(year=2014, month=7, day=23, hour=18, minute=0, second=0) dtime = start_time tstep = timedelta(hours=1) pgdb_helper = PGDbHelper(conn_str=config.pgsql_conn_str(), echo=True) while dtime < end_time: sql = """ select id, track_id, starttime, endtime, center_lat, center_lon, type from ci_events where starttime >= %s and starttime < %s order by starttime """ values = (dtime, dtime + tstep) rows = pgdb_helper.query(sql, values) if len(rows): lats = [] lons = [] for row in rows: lats.append(row[4]) lons.append(row[5]) save_raster(lats, lons, dtime, dtime + tstep) else: logger.info("No CI Events for %s" % dtime) dtime = dtime + tstep
overwrite=True, threshold=threshold) if remove_after_process: os.remove(mrms_file) if __name__ == "__main__": df = config.datafiles["MRMS_MREFL"] if isinstance(df["wildcard"], list): files = [] for wc in df["wildcard"]: files += base_ingestor.get_ingest_files(df["folder"], wc) else: files = base_ingestor.get_ingest_files(df["folder"], df["wildcard"]) parallel = config.parallel if parallel: n_proc = config.nprocs pool_size = min(n_proc, len(files)) logger.info("Using pool size %d" % pool_size) p = Pool(pool_size) p.map(process_file, files) p.close() p.join() else: for f in files: process_file(f)
def process_file(ahps_file): logger.info("Processing %s" % ahps_file) #ahps_file = get_ingest_file_path(r'AHPS_Precip_1day/nws_precip_conus_20140722.nc') vars = nc_get_1d_vars_as_list( ahps_file, ["timeofdata", "lat", "lon", "true_lat", "true_lon"]) time_chars = vars["timeofdata"] lat = vars["lat"] lon = vars["lon"] true_lat = vars["true_lat"] true_lon = -1 * vars["true_lon"] #bottom-left, bottom-right, top-right and top-left bottom_left = lat[0], -1 * lon[0] bottom_right = lat[1], -1 * lon[1] top_right = lat[2], -1 * lon[2] top_left = lat[3], -1 * lon[3] bottom_left_xy = proj_helper.latlon2xy(bottom_left[0], bottom_left[1], SRID_HRAP) bottom_right_xy = proj_helper.latlon2xy(bottom_right[0], bottom_right[1], SRID_HRAP) top_left_xy = proj_helper.latlon2xy(top_left[0], top_left[1], SRID_HRAP) top_right_xy = proj_helper.latlon2xy(top_right[0], top_right[1], SRID_HRAP) time_str = "".join([ch for ch in time_chars]) dtime = datetime.strptime(time_str, "%Y%m%d%HZ") logger.info("write to postgis - %s" % ahps_file) block_size = (50, 50) level = 0 ras = GDALRaster(ahps_file, SRID_HRAP) ras.set_band_num(1) ras.nodata_value = -1 ras.nodata_range = (-1, 1) scale_x1 = (top_right_xy[0] - top_left_xy[0]) / ras.size[0] scale_x2 = (bottom_right_xy[0] - bottom_left_xy[0]) / ras.size[0] scale_y1 = (bottom_right_xy[1] - top_right_xy[1]) / ras.size[1] scale_y2 = (bottom_left_xy[1] - top_left_xy[1]) / ras.size[1] scale_x = scale_x1 scale_y = scale_y1 skew_y = 0 skew_x = 0 ul_x = top_left_xy[0] ul_y = top_left_xy[1] #explicitly set project params since netcdf file does not have it ras.scale = (scale_x, scale_y) ras.ul = (ul_x, ul_y) ras.skew = (skew_x, skew_y) ras.geo_bounds = [ ras.ul[0], ras.ul[0] + ras.size[0] * ras.scale[0], ras.ul[1], ras.ul[1] + ras.size[1] * ras.scale[1] ] granule_name = "%s_%s %s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d %H:%M"), level) table_name = "%s_%s_%s_%d" % (provider_name, variable_name, dtime.strftime("%Y%m%d%H%M"), level) bbox = proj_helper.get_bbox(SRID_HRAP) #bbox = None start_time = dtime end_time = dtime + timedelta(days=1) base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=SRID_HRAP, level=level, block_size=block_size, dynamic=False, subset_bbox=bbox, start_time=start_time, end_time=end_time, overwrite=True)
def process_file(tf): fname = tf["file"] prev_time = tf["nt"] dtime = tf["dt"] logger.info("Processing file %s " % fname) ext_parts = os.path.splitext(fname) ext = ext_parts[1] remove_after_process = False if ext == ".gz": nc_file_name = ext_parts[0] nc_file_copy = os.path.join(os.path.dirname(fname), nc_file_name) with open(nc_file_copy, 'wb') as nc_file: gz_file = gzip.open(fname, 'rb') gz_bytes = gz_file.read() nc_file.write(gz_bytes) gz_file.close() data_file = nc_file_copy remove_after_process = True else: data_file = fname provider_name = "GOES" #variable_name = "CLOUDTYPE" cloud_mask = bin_reader(data_file, typechar='f', chunk_size=CHUNK_SIZE, recycle=False) cum_vals = [] tcum_vals = [] num_chunk = 0 while True: try: val_chunk = cloud_mask.next() for k in range(0, len(val_chunk), 1): if (num_chunk * CHUNK_SIZE + k) in indexes: #special masking for goes cm data, only include type 2 & 4 if val_chunk[k] == 2: #cumulus clouds cum_vals.append(1) else: cum_vals.append(0) if val_chunk[k] == 4: #towering cumulus clouds tcum_vals.append(1) else: tcum_vals.append(0) num_chunk += 1 except StopIteration: break level = 0 block_size = 50, 50 #array_raster.size # 100, 100 variable_name = "CUM_CLOUD" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) array_raster.set_data_with_xy(x=all_x, y=all_y, data=cum_vals) array_raster.dsname = granule_name base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True) variable_name = "TCUM_CLOUD" granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M")) array_raster.set_data_with_xy(x=all_x, y=all_y, data=tcum_vals) array_raster.dsname = granule_name base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name, granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level, block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True) if remove_after_process: os.remove(data_file)