Exemple #1
0
def process_file(rap_file):
    provider_name = "RAP"
    srid = RAP_Spatial_Reference.epsg

    logger.info("Ingesting file %s" % rap_file)
    for variable in variables:
        logger.info("Processing variable %s" % variable)
        band_num = find_band_num(rap_file, filterr=variables[variable])

        if band_num is None:
            #raise Exception("Could not find band for %s" % variable)
            logger.error("Could not find band for %s" % variable)
        else:
            vars = ["GRIB_REF_TIME", "GRIB_VALID_TIME"]
            datevalues = get_band_metadata(rap_file, band_num, vars)
            startdate_utc_str = (datevalues["GRIB_REF_TIME"].split())[0]
            enddate_utc_str = (datevalues["GRIB_VALID_TIME"].split())[0]

            start_date = datetime.utcfromtimestamp(float(startdate_utc_str))
            #end_date = datetime.fromtimestamp(float(enddate_utc_str))
            end_date = start_date + timedelta(hours=1)

            block_size = (10, 10)
            ras = GDALRaster(rap_file, srid)
            ras.set_band_num(band_num)
            if variable == "RAP_REFL":
                ras.nodata_range = [-999, -9]

            level = int((variables[variable]["GRIB_SHORT_NAME"].split("-"))[0])
            granule_name = "%s_%s %s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d %H:%M"), level)
            table_name = "%s_%s_%s_%d" % (provider_name, variable, start_date.strftime("%Y%m%d%H%M"), level)
            bbox = proj_helper.get_bbox(srid)
            base_ingestor.ingest(ras=ras, provider_name=provider_name, variable_name=variable, granule_name=granule_name,
                   table_name=granule_name, srid=srid, level=level, block_size=block_size, dynamic=False,
                   start_time=start_date, end_time=end_date, subset_bbox=bbox, overwrite=True, threshold=None)
Exemple #2
0
def ingest_gtopo_file(fmt):

    df = config.datafiles["GTOPO30_ELEV"]
    if isinstance(df["wildcard"], list):
        files = []
        for wc in df["wildcard"]:
            files += base_ingestor.get_ingest_files(df["folder"],
                                                    df["wildcard"])
    else:
        files = base_ingestor.get_ingest_files(df["folder"], df["wildcard"])

    gtopo_file = files[0]

    ras = GDALRaster(gtopo_file, srid)
    ras.nodata_value = -9999
    bbox = proj_helper.get_bbox(srid)

    if fmt == 'ras':
        granule_name = "GTOPO30Elev_ras"
        level = 0
        base_ingestor.ingest(ras=ras,
                             provider_name=provider_name,
                             variable_name=variable_name,
                             granule_name=granule_name,
                             table_name=granule_name,
                             srid=srid,
                             level=level,
                             block_size=block_size,
                             dynamic=False,
                             start_time=dtime,
                             end_time=datetime.max,
                             subset_bbox=bbox,
                             overwrite=True)

        pgdb_helper.submit("""
            drop if exists index rastertile_geom_gist_idx;
            create index rastertile_geom_gist_idx on rastertile using gist(st_convexhull(rast));
            """)

    if fmt == 'vec':
        granule_name = "GTOPO30Elev_vec"
        level = 1
        base_ingestor.ingest_vector(ras=ras,
                                    provider_name=provider_name,
                                    variable_name=variable_name,
                                    granule_name=granule_name,
                                    table_name=granule_name,
                                    srid=srid,
                                    level=level,
                                    block_size=block_size,
                                    start_time=dtime,
                                    end_time=datetime.max,
                                    subset_bbox=bbox,
                                    overwrite=True)
Exemple #3
0
def ingest_gtopo_file():
    provider_name = "GTOPO30"
    variable_name = "ELEV"
    df = config.datafiles["GTOPO30_ELEV"]

    if isinstance(df["wildcard"], list):
        files = []
        for wc in df["wildcard"]:
            files += base_ingestor.get_ingest_files(df["folder"],
                                                    df["wildcard"])
    else:
        files = base_ingestor.get_ingest_files(df["folder"], df["wildcard"])

    gtopo_file = files[0]
    srid = 4326
    band_num = 1
    dtime = datetime(year=1979, month=1, day=1, hour=0, minute=0, second=0)

    for block_size in block_sizes:
        level = block_size[0]
        granule_name = "GTOPO30Elev_%d" % level

        ras = GDALRaster(gtopo_file, srid)
        ras.nodata_value = -9999
        bbox = proj_helper.get_bbox(srid)

        base_ingestor.ingest(ras=ras,
                             provider_name=provider_name,
                             variable_name=variable_name,
                             granule_name=granule_name,
                             table_name=granule_name,
                             srid=srid,
                             level=level,
                             block_size=block_size,
                             dynamic=False,
                             start_time=dtime,
                             end_time=datetime.max,
                             subset_bbox=bbox,
                             overwrite=True)
Exemple #4
0
def save_raster(lats, lons, t_start, t_end):
    x, y = proj_helper.latlon2xy1(lats, lons, RAP_Spatial_Reference.proj4)
    data = [1 for i in range(0, len(x))]
    array_raster = ArrayRaster(ds_name="",
                               data_array=None,
                               size=size,
                               ul=ul,
                               scale=scale,
                               skew=(0, 0),
                               srid=RAP_Spatial_Reference.epsg,
                               gdal_datatype=gdalconst.GDT_Int16,
                               nodata_value=999)

    array_raster.set_data_with_xy(x=x, y=y, data=data, stat="count")

    level = 0
    block_size = 50, 50  #array_raster.size # 100, 100
    variable_name = "CI_COUNT"
    provider_name = "MRMS"
    granule_name = "%s_%s_%s" % (provider_name, variable_name,
                                 dtime.strftime("%Y%d%m%H%M"))

    base_ingestor.ingest(ras=array_raster,
                         provider_name=provider_name,
                         variable_name=variable_name,
                         granule_name=granule_name,
                         table_name=granule_name,
                         srid=RAP_Spatial_Reference.epsg,
                         level=level,
                         block_size=block_size,
                         dynamic=False,
                         start_time=t_start,
                         end_time=t_end,
                         subset_bbox=bbox,
                         overwrite=True)

    logger.info("Inserted %s" % granule_name)
Exemple #5
0
def process_mrms_file(mrms_file):

    provider_name = "MRMS"
    variable_name = "REFL"

    ext_parts = os.path.splitext(mrms_file)
    ext = ext_parts[1]
    remove_after_process = False

    if ext == ".gz":
        nc_file_name = ext_parts[0]
        nc_file_copy = os.path.join("./", os.path.basename(nc_file_name))
        if os.path.exists(nc_file_copy):
            mrms_file = nc_file_copy
        else:
            with open(nc_file_copy, 'wb') as nc_file:
                gz_file = gzip.open(mrms_file, 'rb')
                gz_bytes = gz_file.read()

                nc_file.write(gz_bytes)
                gz_file.close()

                mrms_file = nc_file_copy
                remove_after_process = True

    vars = nc_get_1d_vars_as_list(mrms_file, ["Ht", "time"])
    heights = vars["Ht"]
    times = vars["time"]
    srid = 4326

    #dtime = datetime(year=2014, month=8, day=18, hour=19, minute=0, second=0)
    dtime = datetime.utcfromtimestamp(times[0])
    bbox = proj_helper.get_bbox(srid)
    start_time = dtime
    end_time = dtime + timedelta(minutes=2)

    for block_size in block_sizes:
        level = block_size[0]  #put various tiles in various levels
        granule_name = "%s_%s %s_%d" % (provider_name, variable_name,
                                        dtime.strftime("%Y%m%d %H:%M"), level)
        table_name = "%s_%s_%s_%d" % (provider_name, variable_name,
                                      dtime.strftime("%Y%m%d%H%M"), level)

        bottom_up_data = True
        ras = GDALRaster(mrms_file, srid, bottom_up_data)
        l = 14
        ras.set_band_num(l + 1)

        #explicitly override the noddata_value since netcdf file is not correct
        ras.nodata_value = -999
        ras.nodata_range = (-999, 0)
        #ras.reclassifier_callback = cb

        base_ingestor.ingest(ras=ras,
                             provider_name=provider_name,
                             variable_name=variable_name,
                             granule_name=granule_name,
                             table_name=granule_name,
                             srid=srid,
                             level=level,
                             block_size=block_size,
                             dynamic=False,
                             subset_bbox=bbox,
                             start_time=start_time,
                             end_time=end_time,
                             overwrite=True,
                             threshold=34)

    if remove_after_process:
        os.remove(mrms_file)
Exemple #6
0
        granule_name = "%s_%s %s_%d" % (provider_name, variable_name,
                                        start_date.strftime("%Y%m%d"), level)
        table_name = "%s_%s_%s_%d" % (provider_name, variable_name,
                                      start_date.strftime("%Y%m%d"), level)

        ras = GDALRaster(sds, srid)
        #ras.nodata_range = [0.5, 256] #only 0 (water)

        bbox = proj_helper.get_bbox(srid)
        base_ingestor.ingest(ras=ras,
                             provider_name=provider_name,
                             variable_name=variable_name,
                             granule_name=granule_name,
                             table_name=granule_name,
                             srid=srid,
                             level=level,
                             block_size=block_size,
                             dynamic=False,
                             subset_bbox=bbox,
                             start_time=start_date,
                             end_time=end_date,
                             overwrite=True)

    #also ingest same data as land water mask
    for hdf_file in files:
        sds = get_sds(hdf_file, "Land_Cover_Type_2")
        provider_name = "MODIS"
        variable_name = "WATERBODY"

        vars = ["RANGEBEGINNINGDATE", "RANGEENDINGDATE"]
        datevalues = get_metadata(hdf_file, vars)
def process_file(ahps_file):

    logger.info("Processing %s" % ahps_file)
    #ahps_file = get_ingest_file_path(r'AHPS_Precip_1day/nws_precip_conus_20140722.nc')
    vars = nc_get_1d_vars_as_list(
        ahps_file, ["timeofdata", "lat", "lon", "true_lat", "true_lon"])

    time_chars = vars["timeofdata"]
    lat = vars["lat"]
    lon = vars["lon"]
    true_lat = vars["true_lat"]
    true_lon = -1 * vars["true_lon"]

    #bottom-left, bottom-right, top-right and top-left
    bottom_left = lat[0], -1 * lon[0]
    bottom_right = lat[1], -1 * lon[1]
    top_right = lat[2], -1 * lon[2]
    top_left = lat[3], -1 * lon[3]

    bottom_left_xy = proj_helper.latlon2xy(bottom_left[0], bottom_left[1],
                                           SRID_HRAP)
    bottom_right_xy = proj_helper.latlon2xy(bottom_right[0], bottom_right[1],
                                            SRID_HRAP)
    top_left_xy = proj_helper.latlon2xy(top_left[0], top_left[1], SRID_HRAP)
    top_right_xy = proj_helper.latlon2xy(top_right[0], top_right[1], SRID_HRAP)

    time_str = "".join([ch for ch in time_chars])
    dtime = datetime.strptime(time_str, "%Y%m%d%HZ")

    logger.info("write to postgis - %s" % ahps_file)
    block_size = (50, 50)
    level = 0

    ras = GDALRaster(ahps_file, SRID_HRAP)
    ras.set_band_num(1)
    ras.nodata_value = -1
    ras.nodata_range = (-1, 1)

    scale_x1 = (top_right_xy[0] - top_left_xy[0]) / ras.size[0]
    scale_x2 = (bottom_right_xy[0] - bottom_left_xy[0]) / ras.size[0]
    scale_y1 = (bottom_right_xy[1] - top_right_xy[1]) / ras.size[1]
    scale_y2 = (bottom_left_xy[1] - top_left_xy[1]) / ras.size[1]

    scale_x = scale_x1
    scale_y = scale_y1
    skew_y = 0
    skew_x = 0
    ul_x = top_left_xy[0]
    ul_y = top_left_xy[1]

    #explicitly set project params since netcdf file does not have it
    ras.scale = (scale_x, scale_y)
    ras.ul = (ul_x, ul_y)
    ras.skew = (skew_x, skew_y)
    ras.geo_bounds = [
        ras.ul[0], ras.ul[0] + ras.size[0] * ras.scale[0], ras.ul[1],
        ras.ul[1] + ras.size[1] * ras.scale[1]
    ]

    granule_name = "%s_%s %s_%d" % (provider_name, variable_name,
                                    dtime.strftime("%Y%m%d %H:%M"), level)
    table_name = "%s_%s_%s_%d" % (provider_name, variable_name,
                                  dtime.strftime("%Y%m%d%H%M"), level)

    bbox = proj_helper.get_bbox(SRID_HRAP)
    #bbox = None

    start_time = dtime
    end_time = dtime + timedelta(days=1)

    base_ingestor.ingest(ras=ras,
                         provider_name=provider_name,
                         variable_name=variable_name,
                         granule_name=granule_name,
                         table_name=granule_name,
                         srid=SRID_HRAP,
                         level=level,
                         block_size=block_size,
                         dynamic=False,
                         subset_bbox=bbox,
                         start_time=start_time,
                         end_time=end_time,
                         overwrite=True)
Exemple #8
0
def process_file(tf):

    fname = tf["file"]
    prev_time = tf["nt"]
    dtime = tf["dt"]

    logger.info("Processing file %s " % fname)
    ext_parts = os.path.splitext(fname)
    ext = ext_parts[1]
    remove_after_process = False

    if ext == ".gz":
        nc_file_name = ext_parts[0]
        nc_file_copy = os.path.join(os.path.dirname(fname), nc_file_name)
        with open(nc_file_copy, 'wb') as nc_file:
            gz_file = gzip.open(fname, 'rb')
            gz_bytes = gz_file.read()

            nc_file.write(gz_bytes)
            gz_file.close()

            data_file = nc_file_copy
            remove_after_process = True
    else:
        data_file = fname

    provider_name = "GOES"
    #variable_name = "CLOUDTYPE"
    cloud_mask = bin_reader(data_file, typechar='f', chunk_size=CHUNK_SIZE, recycle=False)
    cum_vals = []
    tcum_vals = []
    num_chunk = 0

    while True:
        try:
            val_chunk = cloud_mask.next()
            for k in range(0, len(val_chunk), 1):
                if (num_chunk * CHUNK_SIZE + k) in indexes:
                    #special masking for goes cm data, only include type 2 & 4
                    if val_chunk[k] == 2: #cumulus clouds
                        cum_vals.append(1)
                    else:
                        cum_vals.append(0)

                    if val_chunk[k] == 4: #towering cumulus clouds
                        tcum_vals.append(1)
                    else:
                        tcum_vals.append(0)

            num_chunk += 1
        except StopIteration:
            break

    level = 0
    block_size = 50, 50 #array_raster.size # 100, 100
    variable_name = "CUM_CLOUD"
    granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M"))
    array_raster.set_data_with_xy(x=all_x, y=all_y, data=cum_vals)
    array_raster.dsname = granule_name
    base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name,
        granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level,
        block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True)

    variable_name = "TCUM_CLOUD"
    granule_name = "%s_%s_%s" % (provider_name, variable_name, dtime.strftime("%Y%d%m%H%M"))
    array_raster.set_data_with_xy(x=all_x, y=all_y, data=tcum_vals)
    array_raster.dsname = granule_name
    base_ingestor.ingest(ras=array_raster, provider_name=provider_name, variable_name=variable_name,
        granule_name=granule_name, table_name=granule_name, srid=ALBERS_Spatial_Reference.epsg, level=level,
        block_size=block_size, dynamic=False, start_time=prev_time, end_time=dtime, subset_bbox=bbox, overwrite=True)

    if remove_after_process:
        os.remove(data_file)