def download(dbname, dts, bbox=None): """Downloads SMAP soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" res = 0.36 url = "n5eil01u.ecs.nsidc.org" ftp = FTP(url) ftp.login() for dt in [dts[0] + timedelta(tt) for tt in range((dts[1] - dts[0]).days + 1)]: r = ftp.cwd("/pub/SAN/SMAP/SPL3SMP.002/{0}".format(dt.strftime("%Y.%m.%d"))) if r.find("successful") > 0: outpath = tempfile.mkdtemp() fname = [f for f in ftp.nlst() if f.find("h5") > 0][0] with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) f = h5py.File("{0}/{1}".format(outpath, fname)) lat = f['Soil_Moisture_Retrieval_Data']['latitude'][:, 0] lon = f['Soil_Moisture_Retrieval_Data']['longitude'][0, :] lon[lon > 180] -= 360.0 # FIXME: Need to add reprojection from EASE grid i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] sm = np.zeros((len(lati), len(loni))) for i in range(len(lati)): for j in range(len(loni)): sm[i, j] = f['Soil_Moisture_Retrieval_Data']['soil_moisture'][i, j] # FIXME: Use spatially variable observation error # sme = f['Soil_Moisture_Retrieval_Data']['soil_moisture_error'][i1:i2, j1:j2] lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] filename = dbio.writeGeotif(lat, lon, res, sm) dbio.ingest(dbname, filename, dt, table, False) else: print("No SMAP data available for {0}.".format(dt.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dts, bbox): """Download specific variable from the MERRA Reanalysis dataset.""" # FIXME: Grid is not rectangular, but 0.5 x 0.625 degrees res = 0.5 for ts in [dts[0] + timedelta(dti) for dti in range((dts[1] - dts[0]).days + 1)]: try: url = "http://goldsmr4.sci.gsfc.nasa.gov:80/opendap/MERRA2/M2T1NXSLV.5.12.4/{0}/{1:02d}/MERRA2_400.tavg1_2d_slv_Nx.{0:04d}{1:02d}{2:02d}.nc4".format(ts.year, ts.month, ts.day) ds = netcdf.Dataset(url) lat = ds.variables["lat"][:] lon = ds.variables["lon"][:] lon[lon > 180] -= 360.0 if bbox is not None: i = np.where(np.logical_and(lat > bbox[1], lat < bbox[3]))[0] j = np.where(np.logical_and(lon > bbox[0], lon < bbox[2]))[0] lat = lat[i] lon = lon[j] else: i = range(len(lat)) j = range(len(lon)) data = np.zeros((len(i), len(j))) if varname == "tmax": hdata = ds.variables["T2M"][:, i, j] data = np.amax(hdata, axis=0) - 273.15 elif varname == "tmin": hdata = ds.variables["T2M"][:, i, j] data = np.amin(hdata, axis=0) - 273.15 elif varname in ["wind"]: hdata = np.sqrt(ds.variables["U10M"][:, i, j]**2 + ds.variables["V10M"][:, i, j]**2) data = np.mean(hdata, axis=0) filename = dbio.writeGeotif(lat, lon, res, data) dbio.ingest(dbname, filename, ts, "{0}.merra".format(varname)) os.remove(filename) except: print("Cannot import MERRA dataset for {0}!".format(ts.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dts, bbox): """Downloads the PRISM data products for a specific variable and a set of dates *dt*. *varname* can be ppt, tmax or tmin.""" log = logging.getLogger(__name__) url = "prism.oregonstate.edu" ftp = FTP(url) ftp.login() ftp.cwd("daily/{0}".format(varname)) outpath = tempfile.mkdtemp() years = list(set([t.year for t in dts])) for yr in years: ftp.cwd("{0}".format(yr)) filenames = [f for f in ftp.nlst() if datetime.strptime(f.split("_")[-2], "%Y%m%d") >= dts[0] and datetime.strptime(f.split("_")[-2], "%Y%m%d") <= dts[-1]] for fname in filenames: dt = datetime.strptime(fname.split("_")[-2], "%Y%m%d") with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) if fname.endswith("zip"): fz = zipfile.ZipFile("{0}/{1}".format(outpath, fname)) lfilename = filter(lambda s: s.endswith("bil"), fz.namelist())[0] fz.extractall(outpath) else: lfilename = fname tfilename = lfilename.replace(".bil", ".tif") if bbox is not None: proc = subprocess.Popen(["gdal_translate", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/{1}".format(outpath, lfilename), "{0}/{1}".format(outpath, tfilename)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/{1}".format(outpath, tfilename), dt, table[varname], True) else: dbio.ingest(dbname, "{0}/{1}".format(outpath, lfilename), dt, table[varname], True) ftp.cwd("..")
def download(dbname, dts, bbox): """Downloads the PRISM data products for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" url = "jsimpson.pps.eosdis.nasa.gov" ftp = FTP(url) # FIXME: Change to RHEAS-specific password ftp.login('*****@*****.**', '*****@*****.**') ftp.cwd("data/imerg/gis") outpath = tempfile.mkdtemp() for dt in [dts[0] + timedelta(t) for t in range((dts[-1] - dts[0]).days+1)]: try: ftp.cwd("/data/imerg/gis/{0}/{1:02d}".format(dt.year, dt.month)) filenames = [f for f in ftp.nlst() if re.match(r"3B.*{0}.*S000000.*1day\.tif.*".format(dt.strftime("%Y%m%d")), f) is not None] if len(filenames) > 0: fname = filenames[0] with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) with open("{0}/{1}".format(outpath, fname.replace("tif", "tfw")), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname.replace("tif", "tfw")), f.write) tfname = fname.replace("tif", "tfw") fname = datasets.uncompress(fname, outpath) datasets.uncompress(tfname, outpath) subprocess.call(["gdalwarp", "-t_srs", "epsg:4326", "{0}/{1}".format(outpath, fname), "{0}/prec.tif".format(outpath)]) if bbox is not None: subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)]) else: subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)]) # multiply by 0.1 to get mm/hr and 24 to get mm/day cmd = " ".join(["gdal_calc.py", "-A", "{0}/prec1.tif".format(outpath), "--outfile={0}/prec2.tif".format(outpath), "--calc=\"0.1*A\""]) subprocess.call(cmd, shell=True) dbio.ingest(dbname, "{0}/prec2.tif".format(outpath), dt, table, False) except: print("WARNING! No data were available to import into {0} for {1}.".format(table, dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the PRISM data products for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" url = "jsimpson.pps.eosdis.nasa.gov" ftp = FTP(url) # FIXME: Change to RHEAS-specific password ftp.login('*****@*****.**', '*****@*****.**') ftp.cwd("data/imerg/gis") outpath = tempfile.mkdtemp() ts = list(set([(t.year, t.month) for t in [dts[0] + timedelta(dti) for dti in range((dts[1] - dts[0]).days + 1)]])) for t in ts: try: ftp.cwd("{0}/{1:02d}".format(t[0], t[1])) filenames = [f for f in ftp.nlst() if datetime.strptime(f.split(".")[-5].split("-")[0], "%Y%m%d") >= dts[0] and datetime.strptime(f.split(".")[-5].split("-")[0], "%Y%m%d") <= dts[1] and f.find("E.1day.tif") > 0] for fname in filenames: dt = datetime.strptime(fname.split(".")[-5].split("-")[0], "%Y%m%d") with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) with open("{0}/{1}".format(outpath, fname.replace(".tif", ".tfw")), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname.replace(".tif", ".tfw")), f.write) subprocess.call(["gdalwarp", "-t_srs", "epsg:4326", "{0}/{1}".format(outpath, fname), "{0}/prec.tif".format(outpath)]) if bbox is not None: subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)]) else: subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)]) cmd = " ".join(["gdal_calc.py", "-A", "{0}/prec1.tif".format(outpath), "--outfile={0}/prec2.tif".format(outpath), "--calc=\"0.1*A\""]) subprocess.call(cmd, shell=True) dbio.ingest(dbname, "{0}/prec2.tif".format(outpath), dt, table, False) ftp.cwd("../..") except: print("GPM data not available for {0}/{1}. Skipping download!".format(t[0], t[1]))
def download(dbname, dt, bbox=None): """Downloads SMOS soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" res = 0.25 url = "http://*****:*****@cp34-bec.cmima.csic.es/thredds/dodsC/NRTSM001D025A_ALL" f = netcdf.Dataset(url) lat = f.variables['lat'][:] lon = f.variables['lon'][:] if bbox is not None: i = np.where(np.logical_and(lat > bbox[1], lat < bbox[3]))[0] j = np.where(np.logical_and(lon > bbox[0], lon < bbox[2]))[0] lat = lat[i] lon = lon[j] else: i = range(len(lat)) j = range(len(lon)) t0 = datetime(2010, 1, 12) # initial date of SMOS data t1 = (dt[0] - t0).days t2 = (dt[1] - t0).days + 1 ti = range(t1, t2) sm = f.variables['SM'][ti, i, j] # FIXME: Use spatially variable observation error # smv = f.variables['VARIANCE_SM'][ti, i, j] for tj in range(sm.shape[0]): filename = dbio.writeGeotif(lat, lon, res, sm[tj, :, :]) t = t0 + timedelta(ti[tj]) dbio.ingest(dbname, filename, t, table, False) print("Imported SMOS {0}".format(tj)) os.remove(filename)
def ingest(dbname, table, data, lat, lon, res, t): """Import data into RHEAS database.""" for tj in range(data.shape[0]): filename = dbio.writeGeotif(lat, lon, res, data[tj, :, :]) dbio.ingest(dbname, filename, t[tj], table) print("Imported {0} in {1}".format(t[tj].strftime("%Y-%m-%d"), table)) os.remove(filename)
def download(dbname, dts, bbox): """Downloads AMSR-E soil moisture data for a set of dates *dts* and imports them into the PostGIS database *outpath*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" url = "n5eil01u.ecs.nsidc.org" ftp = FTP(url) ftp.login() ftp.cwd("SAN/AMSA/AE_Land3.002") for dt in [dts[0] + timedelta(ti) for ti in range((dts[-1] - dts[0]).days+1)]: datadir = dt.strftime("%Y.%m.%d") try: tmppath = tempfile.mkdtemp() ftp.cwd(datadir) fname = [f for f in ftp.nlst() if f.endswith("hdf")][0] with open("{0}/{1}".format(tmppath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) subprocess.call(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:Ascending_Land_Grid:A_Soil_Moisture".format(tmppath, fname), "{0}/sma.tif".format(tmppath)]) subprocess.call(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:Descending_Land_Grid:D_Soil_Moisture".format(tmppath, fname), "{0}/smd.tif".format(tmppath)]) # merge orbits subprocess.call(["gdal_merge.py", "-o", "{0}/sm1.tif".format(tmppath), "{0}/sma.tif".format(tmppath), "{0}/smd.tif".format(tmppath)]) # reproject data subprocess.call(["gdalwarp", "-s_srs", "epsg:3410", "-t_srs", "epsg:4326", "{0}/sm1.tif".format(tmppath), "{0}/sm2.tif".format(tmppath)]) if bbox is None: pstr = [] else: pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])] subprocess.call(["gdal_translate"] + pstr + ["-ot", "Float32", "{0}/sm2.tif".format(tmppath), "{0}/sm3.tif".format(tmppath)]) filename = "{0}/amsre_soilm_{1}.tif".format(tmppath, dt.strftime("%Y%m%d")) cmd = " ".join(["gdal_calc.py", "-A", "{0}/sm3.tif".format(tmppath), "--outfile={0}".format(filename), "--NoDataValue=-9999", "--calc=\"(abs(A)!=9999)*(A/1000.0+9999)-9999\""]) subprocess.call(cmd, shell=True) dbio.ingest(dbname, filename, dt, table, False) ftp.cwd("../") except: print("AMSR-E data not available for {0}. Skipping download!".format(dt.strftime("%Y%m%d")))
def download(dbname, dt, bbox=None): """Downloads SMOS soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" log = logging.getLogger(__name__) res = 0.25 url = "http://*****:*****@cp34-bec.cmima.csic.es/thredds/dodsC/NRTSM001D025A_ALL" f = netcdf.Dataset(url) lat = f.variables['lat'][::-1] # swap latitude orientation to northwards lon = f.variables['lon'][:] i1, i2, j1, j2 = datasets.spatialSubset(lat, lon, res, bbox) smi1 = len(lat) - i2 - 1 smi2 = len(lat) - i1 - 1 lat = lat[i1:i2] lon = lon[j1:j2] t0 = datetime(2010, 1, 12) # initial date of SMOS data t1 = (dt[0] - t0).days if t1 < 0: log.warning("Reseting start date to {0}".format(t0.strftime("%Y-%m-%d"))) t1 = 0 t2 = (dt[-1] - t0).days + 1 nt, _, _ = f.variables['SM'].shape if t2 > nt: t2 = nt log.warning("Reseting end date to {0}".format((t0 + timedelta(t2)).strftime("%Y-%m-%d"))) ti = range(t1, t2) sm = f.variables['SM'][ti, smi1:smi2, j1:j2] # FIXME: Use spatially variable observation error # smv = f.variables['VARIANCE_SM'][ti, i1:i2, j1:j2] for tj in range(sm.shape[0]): filename = dbio.writeGeotif(lat, lon, res, sm[tj, :, :]) t = t0 + timedelta(ti[tj]) dbio.ingest(dbname, filename, t, table, False) log.info("Imported SMOS {0}".format(tj)) os.remove(filename)
def download(dbname, dt, bbox=None): """Downloads SMAP soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" res = 0.36 url = "ftp://n5eil01u.ecs.nsidc.org" ftp = FTP(url) ftp.login() ftp.cwd("SAN/SMAP/SPL3SMP.002") days = ftp.nlst() datadir = dt.strftime("%Y.%m.%d") if datadir in days: outpath = tempfile.mkdtemp() ftp.cwd(datadir) fname = [f for f in ftp.nlst() if f.find("h5") > 0][0] with open("{0}/{1}".format(outpath, fname), "wb") as f: ftp.retrbinary("RETR {0}".format(fname), f.write) f = h5py.File("{0}/{1}".format(outpath, fname)) lat = f["Soil_Moisture_Retrieval_Data"]["latitude"][:, 0] lon = f["Soil_Moisture_Retrieval_Data"]["longitude"][0, :] if bbox is not None: i = np.where(np.logical_and(lat > bbox[1], lat < bbox[3]))[0] j = np.where(np.logical_and(lon > bbox[0], lon < bbox[2]))[0] lat = lat[i] lon = lon[j] else: i = range(len(lat)) j = range(len(lon)) sm = f["Soil_Moisture_Retrieval_Data"]["soil_moisture"][i[0] : i[-1] + 1, j[0] : j[-1] + 1] # FIXME: Use spatially variable observation error # sme = f['Soil_Moisture_Retrieval_Data']['soil_moisture_error'][i[0]:i[-1]+1, j[0]:j[-1]+1] filename = dbio.writeGeotif(lat, lon, res, sm) dbio.ingest(dbname, filename, dt, table, False)
def _downloadVariable(varname, dbname, dt, bbox): """Download specific variable from the MERRA Reanalysis dataset.""" # FIXME: Grid is not rectangular, but 0.5 x 0.625 degrees res = 0.5 try: url = "http://goldsmr4.sci.gsfc.nasa.gov:80/dods/M2T1NXSLV" ds = netcdf.Dataset(url) lat = ds.variables["lat"][:] lon = ds.variables["lon"][:] lon[lon > 180] -= 360.0 i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) data = np.zeros((i2-i1, j2-j1)) lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] t = ds.variables["time"] tt = netcdf.num2date(t[:], units=t.units) ti = np.where(tt == dt)[0][0] if varname == "tmax": hdata = ds.variables["t2m"][ti:ti+24, lati, loni] data = np.amax(hdata, axis=0) - 273.15 elif varname == "tmin": hdata = ds.variables["t2m"][ti:ti+24, lati, loni] data = np.amin(hdata, axis=0) - 273.15 elif varname in ["wind"]: hdata = np.sqrt(ds.variables["u10m"][ti:ti+24, lati, loni]**2 + ds.variables["v10m"][ti:ti+24, lati, loni]**2) data = np.mean(hdata, axis=0) lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] filename = dbio.writeGeotif(lat, lon, res, data) table = "{0}.merra".format(varname) dbio.ingest(dbname, filename, dt, table) print("Imported {0} in {1}".format(tt[ti].strftime("%Y-%m-%d"), table)) os.remove(filename) except: print("Cannot import MERRA dataset for {0}!".format(dt.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dts, bbox): """Download specific variable from the MERRA Reanalysis dataset.""" # FIXME: Grid is not rectangular, but 0.5 x 0.625 degrees res = 0.5 for ts in [dts[0] + timedelta(dti) for dti in range((dts[1] - dts[0]).days + 1)]: try: runid = _merraRunid(ts.year) url = "http://goldsmr4.sci.gsfc.nasa.gov:80/opendap/MERRA2/M2T1NXSLV.5.12.4/{1}/{2:02d}/MERRA2_{0}.tavg1_2d_slv_Nx.{1:04d}{2:02d}{3:02d}.nc4".format(runid, ts.year, ts.month, ts.day) ds = netcdf.Dataset(url) lat = ds.variables["lat"][:] lon = ds.variables["lon"][:] lon[lon > 180] -= 360.0 i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) data = np.zeros((i2-i1, j2-j1)) lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] if varname == "tmax": hdata = ds.variables["T2M"][:, lati, loni] data = np.amax(hdata, axis=0) - 273.15 elif varname == "tmin": hdata = ds.variables["T2M"][:, lati, loni] data = np.amin(hdata, axis=0) - 273.15 elif varname in ["wind"]: hdata = np.sqrt(ds.variables["U10M"][:, lati, loni]**2 + ds.variables["V10M"][:, lati, loni]**2) data = np.mean(hdata, axis=0) lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] filename = dbio.writeGeotif(lat, lon, res, data) dbio.ingest(dbname, filename, ts, "{0}.merra".format(varname)) os.remove(filename) except: print("Cannot import MERRA dataset for {0}!".format(ts.strftime("%Y-%m-%d")))
def ingest(dbname, varname, filename, dt, ens): """Imports Geotif *filename* into database *dbname*.""" schema = {'Precipitation': 'precip', 'Temperature': 'tmax'} db = dbio.connect(dbname) cur = db.cursor() cur.execute( "select * from information_schema.tables where table_schema='{0}' and table_name='nmme'".format(schema[varname])) if not bool(cur.rowcount): cur.execute("create table {0}.nmme (rid serial not null primary key, fdate date, ensemble int, rast raster)".format( schema[varname])) db.commit() cur.execute("select * from {0}.nmme where fdate='{1}' and ensemble = {2}".format(schema[varname], dt.strftime("%Y-%m-%d"), ens)) if bool(cur.rowcount): cur.execute("delete from {0}.nmme where fdate='{1}' and ensemble = {2}".format(schema[varname], dt.strftime("%Y-%m-%d"), ens)) db.commit() dbio.ingest(dbname, filename, dt, "{0}.nmme".format(schema[varname]), False, False) sql = "update {0}.nmme set ensemble = {1} where ensemble is null".format(schema[varname], ens) cur.execute(sql) db.commit() cur.execute("select * from raster_resampled where sname='{0}' and tname like 'nmme_%'".format(schema[varname])) tables = [r[1] for r in cur.fetchall()] for table in tables: cur.execute("select * from {0}.{1} where fdate='{2}' and ensemble = {3}".format(schema[varname], table, dt.strftime("%Y-%m-%d"), ens)) if bool(cur.rowcount): cur.execute("delete from {0}.{1} where fdate='{2}' and ensemble = {3}".format(schema[varname], table, dt.strftime("%Y-%m-%d"), ens)) db.commit() tilesize = (10, 10) dbio.createResampledTables(dbname, schema[varname], "nmme", dt, tilesize, False, "and ensemble={0}".format(ens)) _setEnsemble(dbname, schema[varname], ens) cur.close() db.close()
def download(dbname, dts, bbox): """Downloads the MODIS evapotranspiration data product MOD16 for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 urlbase = "http://files.ntsg.umt.edu" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: url = "{0}/data/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{1}".format(urlbase, dt.year) resp_year = requests.get(url) try: assert resp_year.status_code == 200 days = [link for link in BeautifulSoup(resp_year.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find(dt.strftime("%j")) >= 0] assert len(days) > 0 resp_day = requests.get("{0}{1}".format(urlbase, days[0].get('href'))) assert resp_day.status_code == 200 files = [link.get('href') for link in BeautifulSoup(resp_day.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find("hdf") > 0] files = [f for f in files if any(f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles)] outpath = tempfile.mkdtemp() for fname in files: resp_file = requests.get("{0}{1}".format(urlbase, fname)) filename = fname.split("/")[-1] with open("{0}/{1}".format(outpath, filename), 'wb') as fout: for chunk in resp_file: fout.write(chunk) proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km".format( outpath, filename), "{0}/{1}".format(outpath, filename).replace("hdf", "tif")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format( outpath), "--NoDataValue=-9999", "--calc=(A<32701)*(0.1*A+9999)-9999"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str( res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])] proc = subprocess.Popen(["gdal_translate"] + pstr + ["-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest( dbname, "{0}/et3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: log.warning("MOD16 data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def ingest(dbname, table, data, lat, lon, res, t, resample=True, overwrite=True): """Import data into RHEAS database.""" log = logging.getLogger(__name__) if data is not None: if len(data.shape) > 2: data = data[0, :, :] filename = dbio.writeGeotif(lat, lon, res, data) dbio.ingest(dbname, filename, t, table, resample, overwrite) os.remove(filename) else: log.warning("No data were available to import into {0} for {1}.".format(table, t.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2.005" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format( burl, dt.year, dt.month, dt.day) req = requests.get(url, auth=(username, password)) if req.status_code == 200: dom = html.fromstring(req.text) files = [link for link in dom.xpath('//a/@href')] if len(files) > 0: filenames = [filter(lambda s: re.findall(r'MCD.*h{0:02d}v{1:02d}.*hdf$'.format(t[1], t[0]), s), files) for t in tiles] for filename in filenames: if len(filename) > 0: filename = filename[0] proc = subprocess.Popen(["wget", "-L", "--load-cookies", ".cookiefile", "--save-cookies", ".cookiefile", "--user", username, "--password", password, "{0}/{1}".format(url, filename), "-O", "{0}/{1}".format(outpath, filename)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD15A2:Lai_1km".format( outpath, filename), "{0}/{1}".format(outpath, filename).replace("hdf", "tif")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: proc = subprocess.Popen(["gdal_merge.py", "-o", "{0}/lai.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format(outpath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(0.1*A+9999.0)-9999.0\""], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest( dbname, "{0}/lai3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) else: log.warning("MCD15 data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the Terra MODIS snow cover fraction data product MOD10 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.005 url = "n5eil01u.ecs.nsidc.org" tiles = modis.findTiles(bbox) if tiles is not None: ftp = FTP(url) ftp.login() for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: try: ftp.cwd("SAN/MOST/MOD10A1.005/{1:04d}.{2:02d}.{3:02d}".format(url, dt.year, dt.month, dt.day)) files = [f for f in ftp.nlst() if any( f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles)] files = filter(lambda s: s.endswith("hdf"), files) outpath = tempfile.mkdtemp() for fname in files: with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_Snow_500m:Fractional_Snow_Cover".format(outpath, fname), "{0}/{1}".format(outpath, fname).replace("hdf", "tif")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) proc = subprocess.Popen( ["gdal_merge.py", "-a_nodata", "-9999", "-o", "{0}/snow.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/snow.tif".format(outpath), "--outfile={0}/snow1.tif".format( outpath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(A+9999.0)-9999.0\""], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str( res), str(-res), "{0}/snow1.tif".format(outpath), "{0}/snow2.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_translate", "-a_srs", "epsg:4326", "{0}/snow2.tif".format(outpath), "{0}/snow3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest( dbname, "{0}/snow3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: log.warning("MOD10 data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the MODSCAG snow cover fraction data product for a specific date *dt* and imports it into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 tiles = modis.findTiles(bbox) for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: temppath = tempfile.mkdtemp() url = "https://snow-data.jpl.nasa.gov/modscag-historic/{0}/{1}".format(dt.year, dt.strftime("%j")) r = requests.get(url, auth=HTTPDigestAuth(username, password)) if r.status_code == 200: dom = lxml.html.fromstring(r.text) links = [link for link in dom.xpath('//a/@href') if link.find("snow_fraction.tif") > 0] for t in tiles: filenames = filter(lambda f: f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0, links) if len(filenames) > 0: filename = filenames[0] r = requests.get("{0}/{1}".format(url, filename), auth=HTTPDigestAuth(username, password)) with open("{0}/{1}".format(temppath, filename), 'wb') as fout: fout.write(r.content) tifs = glob.glob("{0}/*.tif".format(temppath)) if len(tifs) > 0: proc = subprocess.Popen(["gdal_merge.py", "-o", "{0}/snow.tif".format(temppath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/snow.tif".format(temppath), "--outfile={0}/snow1.tif".format( temppath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(A+9999.0)-9999.0\""], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str( res), str(-res), "{0}/snow1.tif".format(temppath), "{0}/snow2.tif".format(temppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])] proc = subprocess.Popen(["gdal_translate", "-a_srs", "epsg:4326"] + pstr + ["{0}/snow2.tif".format(temppath), "{0}/snow3.tif".format(temppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/snow3.tif".format(temppath), dt, table, False) shutil.rmtree(temppath) else: log.warning("MODSCAG data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dt, bbox=None): """Download specific variable from the NCEP Reanalysis dataset.""" log = logging.getLogger(__name__) res = 1.875 baseurl = "http://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/ncep.reanalysis.dailyavgs/surface_gauss" if varname == "tmax": urls = ["{0}/tmax.2m.gauss.{1}.nc".format(baseurl, dt[0].year)] dsvar = ["tmax"] elif varname == "tmin": urls = ["{0}/tmin.2m.gauss.{1}.nc".format(baseurl, dt[0].year)] dsvar = ["tmin"] else: urls = ["{0}/uwnd.10m.gauss.{1}.nc".format(baseurl, dt[0].year), "{0}/vwnd.10m.gauss.{1}.nc".format(baseurl, dt[0].year)] dsvar = ["uwnd", "vwnd"] data = None for ui, url in enumerate(urls): pds = netcdf.Dataset(url) lat = pds.variables["lat"][:] lon = pds.variables["lon"][:] lon[lon > 180] -= 360.0 i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) t = pds.variables["time"] tt = netcdf.num2date(t[:], units=t.units) ti = [tj for tj in range(len(tt)) if resetDatetime(tt[tj]) >= dt[0] and resetDatetime(tt[tj]) <= dt[1]] if len(ti) > 0: lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] if data is None: data = pds.variables[dsvar[ui]][ti, lati, loni] else: data = np.sqrt( data ** 2.0 + pds.variables[dsvar[ui]][ti, lati, loni] ** 2.0) if "temp" in dsvar: data -= 273.15 lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] table = "{0}.ncep".format(varname) for t in range(len(ti)): filename = dbio.writeGeotif(lat, lon, res, data[t, :, :]) dbio.ingest(dbname, filename, tt[ti[t]], table) os.remove(filename) for dtt in [dt[0] + timedelta(days=tj) for tj in range((dt[-1]-dt[0]).days + 1)]: if dtt not in tt: log.warning("NCEP data not available for {0}. Skipping download!".format( dtt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the PRISM data products for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) url = "jsimpson.pps.eosdis.nasa.gov" ftp = FTP(url) # FIXME: Change to RHEAS-specific password ftp.login('*****@*****.**', '*****@*****.**') ftp.cwd("data/imerg/gis") outpath = tempfile.mkdtemp() for dt in [dts[0] + timedelta(t) for t in range((dts[-1] - dts[0]).days+1)]: try: if dt.year < datetime.today().year: ftp.cwd("/data/imerg/gis/{0}/{1:02d}".format(dt.year, dt.month)) else: ftp.cwd("/data/imerg/gis/{0:02d}".format(dt.month)) filenames = [f for f in ftp.nlst() if re.match("3B.*{0}.*E235959.*1day.tif".format(dt.strftime("%Y%m%d")), f) is not None] if len(filenames) > 0: fname = filenames[0] with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) with open("{0}/{1}".format(outpath, fname.replace("tif", "tfw")), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname.replace("tif", "tfw")), f.write) tfname = fname.replace("tif", "tfw") fname = datasets.uncompress(fname, outpath) datasets.uncompress(tfname, outpath) proc = subprocess.Popen(["gdalwarp", "-srcnodata", "29999", "-dstnodata", "-9999", "-overwrite", "-t_srs", "epsg:4326", "-ot", "Float32", "{0}/{1}".format(outpath, fname), "{0}/prec.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is not None: proc = subprocess.Popen(["gdal_translate", "-ot", "Float32", "-a_srs", "epsg:4326", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) else: proc = subprocess.Popen(["gdal_translate", "-a_srs", "epsg:4326", "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) # multiply by 0.1 to get mm/hr and 24 to get mm/day proc = subprocess.Popen(["gdal_calc.py", "--NoDataValue=-9999", "-A", "{0}/prec1.tif".format(outpath), "--outfile={0}/prec2.tif".format(outpath), "--calc=0.1*A"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/prec2.tif".format(outpath), dt, table, True) except: log.warning("No data were available to import into {0} for {1}.".format(table, dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.005 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.006" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format(burl, dt.year, dt.month, dt.day) filenames = [] for t in tiles: try: tmppath, fname = earthdata.download(url, "MCD15A2H.A{0}.h{1:02d}v{2:02d}.006.*.hdf".format(dt.strftime("%Y%j"), t[1], t[0])) except ConnectionError: fname = None if fname: filenames.append("{0}/{1}".format(tmppath, fname)) for filename in filenames: proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}:MOD_Grid_MOD15A2H:Lai_500m".format( filename), "{0}/{1}".format(outpath, filename.split("/")[-1]).replace("hdf", "tif")], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) shutil.rmtree("/".join(filename.split("/")[:-1])) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: proc = subprocess.Popen(["gdal_merge.py", "-o", "{0}/lai.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format(outpath), "--NoDataValue=-9999", "--calc=(A<101.0)*(0.1*A+9999.0)-9999.0"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str(res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/lai3.tif".format(outpath), dt, table, False) else: log.warning("MCD15 data not available for {0}. Skipping download!".format(dt.strftime("%Y-%m-%d"))) shutil.rmtree(outpath)
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" res = 0.01 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2.005" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [dts[0] + timedelta(dti) for dti in range((dts[1] - dts[0]).days + 1)]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format( burl, dt.year, dt.month, dt.day) connection = urllib.urlopen(url) dom = html.fromstring(connection.read()) files = [link for link in dom.xpath('//a/@href')] if len(files) > 0: filenames = [filter(lambda s: re.findall( r'MCD.*h{0:02d}v{1:02d}.*hdf$'.format(t[1], t[0]), s), files) for t in tiles] for filename in filenames: if len(filename) > 0: filename = filename[0] urllib.urlretrieve( "{0}/{1}".format(url, filename), "{0}/{1}".format(outpath, filename)) subprocess.call(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD15A2:Lai_1km".format( outpath, filename), "{0}/{1}".format(outpath, filename).replace("hdf", "tif")]) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: subprocess.call( ["gdal_merge.py", "-o", "{0}/lai.tif".format(outpath)] + tifs) cmd = " ".join(["gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format( outpath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(0.1*A+9999.0)-9999.0\""]) subprocess.call(cmd, shell=True) cmd = " ".join(["gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str( res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath)]) subprocess.call(cmd, shell=True) subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath)]) dbio.ingest( dbname, "{0}/lai3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) else: print("MCD15 data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dt, bbox=None): """Download specific variable from the NCEP Reanalysis dataset.""" res = 1.875 if varname == "tmax": urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.maximum/.temp/dods"] dsvar = ["temp"] elif varname == "tmin": urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.minimum/.temp/dods"] dsvar = ["temp"] else: urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.u/dods", "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.v/dods"] dsvar = ["u", "v"] data = None for ui, url in enumerate(urls): pds = netcdf.Dataset(url) lat = pds.variables["Y"][:] lon = pds.variables["X"][:] lon[lon > 180] -= 360.0 i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) t = pds.variables["T"] tt = netcdf.num2date(t[:], units=t.units) # ti = [tj for tj in range(len(tt)) if tt[tj] >= dt] ti = [tj for tj in range(len(tt)) if resetDatetime(tt[tj]) >= dt[0] and resetDatetime(tt[tj]) <= dt[1]] if len(ti) > 0: lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] if data is None: data = pds.variables[dsvar[ui]][ti, 0, lati, loni] else: data = np.sqrt( data ** 2.0 + pds.variables[dsvar[ui]][ti, 0, lati, loni] ** 2.0) if "temp" in dsvar: data -= 273.15 lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] table = "{0}.ncep".format(varname) for t in range(len(ti)): filename = dbio.writeGeotif(lat, lon, res, data[t, :, :]) dbio.ingest(dbname, filename, tt[ti[t]], table) print("Imported {0} in {1}".format(tt[ti[0]].strftime("%Y-%m-%d"), table)) os.remove(filename)
def download(dbname, dts, bbox): """Downloads the MODIS evapotranspiration data product MOD16 for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" res = 0.01 url = "ftp.ntsg.umt.edu" tiles = modis.findTiles(bbox) if tiles is not None: ftp = FTP(url) ftp.login() for dt in [dts[0] + timedelta(dti) for dti in range((dts[1] - dts[0]).days + 1)]: try: ftp.cwd( "pub/MODIS/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{0}".format(dt.year)) days = ftp.nlst() datadir = "D{0}".format(dt.strftime("%j")) if datadir in days: ftp.cwd(datadir) files = [f for f in ftp.nlst() if any( f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles)] outpath = tempfile.mkdtemp() for fname in files: with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) subprocess.call(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km".format( outpath, fname), "{0}/{1}".format(outpath, fname).replace("hdf", "tif")]) tifs = " ".join(glob.glob("{0}/*.tif".format(outpath))) subprocess.call( ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs) cmd = " ".join(["gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format( outpath), "--NoDataValue=-9999", "--calc=\"(A<32701)*(0.1*A+9999)-9999\""]) subprocess.call(cmd, shell=True) cmd = " ".join(["gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str( res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath)]) subprocess.call(cmd, shell=True) subprocess.call(["gdal_translate", "-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath)]) dbio.ingest( dbname, "{0}/et3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: print("MOD16 data not available for {0}. Skipping download!".format( dt.strftime("%Y-%m-%d")))
def _downloadVariable(varname, dbname, dt, bbox=None): """Download specific variable from the NCEP Reanalysis dataset.""" res = 1.875 if varname == "tmax": urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.maximum/.temp/dods"] dsvar = ["temp"] elif varname == "tmin": urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.minimum/.temp/dods"] dsvar = ["temp"] else: urls = ["http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.u/dods", "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.DAILY/.Diagnostic/.above_ground/.v/dods"] dsvar = ["u", "v"] data = None for ui, url in enumerate(urls): pds = netcdf.Dataset(url) lat = pds.variables["Y"][:] lon = pds.variables["X"][:] lon[lon > 180] -= 360.0 if bbox is not None: i = np.where(np.logical_and(lat > bbox[1], lat < bbox[3]))[0] j = np.where(np.logical_and(lon > bbox[0], lon < bbox[2]))[0] lat = lat[i] lon = lon[j] else: i = range(len(lat)) j = range(len(lon)) t = pds.variables["T"] tt = netcdf.num2date(t[:], units=t.units) ti = [tj for tj in range(len(tt)) if tt[tj] >= dt[ 0] and tt[tj] <= dt[1]] if data is None: data = pds.variables[dsvar[ui]][ti, 0, i, j] else: data = np.sqrt( data ** 2.0 + pds.variables[dsvar[ui]][ti, 0, i, j] ** 2.0) if "temp" in dsvar: data -= 273.15 for tj in range(data.shape[0]): filename = dbio.writeGeotif(lat, lon, res, data[tj, :, :]) dbio.ingest(dbname, filename, tt[ti[tj]], "{0}.ncep".format(varname)) os.remove(filename)
def ingest(dbname, filename, dt, lt, cname, stname): """Imports Geotif *filename* into database *db*.""" db = pg.connect(database=dbname) cur = db.cursor() schemaname, tablename = stname.split(".") cur.execute( "select * from information_schema.tables where table_schema='{0}' and table_name='{1}'".format(schemaname, tablename)) if not bool(cur.rowcount): cur.execute("create table {0}.{1} (rid serial not null primary key, fdate date, tercile text, leadtime int, rast raster)".format( schemaname, tablename)) db.commit() dbio.ingest(dbname, filename, dt, stname, False) sql = "update {0} set tercile = '{1}' where tercile is null".format( stname, cname) cur.execute(sql) sql = "update {0} set leadtime = '{1}' where leadtime is null".format( stname, lt) cur.execute(sql) db.commit() cur.close()
def download(dbname, dts, bbox=None, enhanced=False): """Downloads SMAP soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" log = logging.getLogger(__name__) if enhanced: res = 0.09 url = "https://n5eil01u.ecs.nsidc.org/SMAP/SPL3SMP_E.001" else: res = 0.36 url = "https://n5eil01u.ecs.nsidc.org/DP4/SMAP/SPL3SMP.004" for dt in [dts[0] + timedelta(tt) for tt in range((dts[-1] - dts[0]).days + 1)]: try: outpath, fname = earthdata.download("{0}/{1}".format(url, dt.strftime("%Y.%m.%d")), "SMAP_L3_SM_P_\S*.h5") f = h5py.File("{0}/{1}".format(outpath, fname)) varname = None for v in f.keys(): if "latitude" in f[v] and "longitude" in f[v]: varname = v assert varname is not None lat = f[varname]['latitude'][:, 0] lon = f[varname]['longitude'][0, :] lon[lon > 180] -= 360.0 # FIXME: Need to add reprojection from EASE grid i1, i2, j1, j2 = datasets.spatialSubset(np.sort(lat)[::-1], np.sort(lon), res, bbox) lati = np.argsort(lat)[::-1][i1:i2] loni = np.argsort(lon)[j1:j2] sm = np.zeros((len(lati), len(loni))) for i in range(len(lati)): for j in range(len(loni)): sm[i, j] = f[varname]['soil_moisture'][i, j] # FIXME: Use spatially variable observation error # sme = f[varname]['soil_moisture_error'][i1:i2, j1:j2] lat = np.sort(lat)[::-1][i1:i2] lon = np.sort(lon)[j1:j2] filename = dbio.writeGeotif(lat, lon, res, sm) dbio.ingest(dbname, filename, dt, table, False) except: log.warning("No SMAP data available for {0}.".format(dt.strftime("%Y-%m-%d")))
def download(dbname, dt, bbox=None): """Downloads SMOS soil mositure data for a set of dates *dt* and imports them into the PostGIS database *dbname*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" log = logging.getLogger(__name__) res = 0.25 url = "http://*****:*****@cp34-bec.cmima.csic.es/thredds/dodsC/NRTSM001D025A_ALL" f = netcdf.Dataset(url) lat = f.variables['lat'][::-1] # swap latitude orientation to northwards lon = f.variables['lon'][:] i1, i2, j1, j2 = datasets.spatialSubset(lat, lon, res, bbox) smi1 = len(lat) - i2 - 1 smi2 = len(lat) - i1 - 1 lat = lat[i1:i2] lon = lon[j1:j2] t0 = datetime(2010, 1, 12) # initial date of SMOS data t1 = (dt[0] - t0).days if t1 < 0: log.warning("Reseting start date to {0}".format( t0.strftime("%Y-%m-%d"))) t1 = 0 t2 = (dt[-1] - t0).days + 1 nt, _, _ = f.variables['SM'].shape if t2 > nt: t2 = nt log.warning("Reseting end date to {0}".format( (t0 + timedelta(t2)).strftime("%Y-%m-%d"))) ti = range(t1, t2) sm = f.variables['SM'][ti, smi1:smi2, j1:j2] # FIXME: Use spatially variable observation error # smv = f.variables['VARIANCE_SM'][ti, i1:i2, j1:j2] for tj in range(sm.shape[0]): filename = dbio.writeGeotif(lat, lon, res, sm[tj, :, :]) t = t0 + timedelta(ti[tj]) dbio.ingest(dbname, filename, t, table, False) log.info("Imported SMOS {0}".format(tj)) os.remove(filename)
def download(dbname, dts, bbox): """Downloads AMSR-E soil moisture data for a set of dates *dts* and imports them into the PostGIS database *outpath*. Optionally uses a bounding box to limit the region with [minlon, minlat, maxlon, maxlat].""" log = logging.getLogger(__name__) url = "https://n5eil01u.ecs.nsidc.org/AMSA/AE_Land3.002" for dt in [dts[0] + timedelta(ti) for ti in range((dts[-1] - dts[0]).days+1)]: try: tmppath, fname = earthdata.download("{0}/{1}".format(url, dt.strftime("%Y.%m.%d")), "AMSR_E_L3_DailyLand\S*.hdf") proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:Ascending_Land_Grid:A_Soil_Moisture".format(tmppath, fname), "{0}/sma.tif".format(tmppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen(["gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:Descending_Land_Grid:D_Soil_Moisture".format(tmppath, fname), "{0}/smd.tif".format(tmppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) # merge orbits proc = subprocess.Popen(["gdal_merge.py", "-o", "{0}/sm1.tif".format(tmppath), "{0}/sma.tif".format(tmppath), "{0}/smd.tif".format(tmppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) # reproject data proc = subprocess.Popen(["gdalwarp", "-s_srs", "epsg:3410", "-t_srs", "epsg:4326", "{0}/sm1.tif".format(tmppath), "{0}/sm2.tif".format(tmppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])] proc = subprocess.Popen(["gdal_translate"] + pstr + ["-ot", "Float32", "{0}/sm2.tif".format(tmppath), "{0}/sm3.tif".format(tmppath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) filename = "{0}/amsre_soilm_{1}.tif".format(tmppath, dt.strftime("%Y%m%d")) proc = subprocess.Popen(["gdal_calc.py", "-A", "{0}/sm3.tif".format(tmppath), "--outfile={0}".format(filename), "--NoDataValue=-9999", "--calc=(abs(A)!=9999)*(A/1000.0+9999)-9999"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, filename, dt, table, False) except: log.warning("AMSR-E data not available for {0}. Skipping download!".format(dt.strftime("%Y%m%d")))
def ingest(dbname, filename, dt, lt, cname, stname): """Imports Geotif *filename* into database *dbname*.""" db = dbio.connect(dbname) cur = db.cursor() schemaname, tablename = stname.split(".") cur.execute( "select * from information_schema.tables where table_schema='{0}' and table_name='{1}'".format(schemaname, tablename)) if not bool(cur.rowcount): cur.execute("create table {0}.{1} (rid serial not null primary key, fdate date, tercile text, leadtime int, rast raster)".format( schemaname, tablename)) db.commit() cur.execute("select * from {0} where fdate='{1}' and tercile = '{2}' and leadtime = {3}".format(stname, dt.strftime("%Y-%m-%d"), cname, lt)) if bool(cur.rowcount): cur.execute("delete from {0} where fdate='{1}' and tercile = '{2}' and leadtime = {3}".format(stname, dt.strftime("%Y-%m-%d"), cname, lt)) db.commit() dbio.ingest(dbname, filename, dt, stname, False, False) sql = "update {0} set tercile = '{1}' where tercile is null".format( stname, cname) cur.execute(sql) sql = "update {0} set leadtime = '{1}' where leadtime is null".format( stname, lt) cur.execute(sql) db.commit() cur.close()
def download(dbname, dts, bbox): """Downloads the MODIS evapotranspiration data product MOD16 for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 url = "ftp.ntsg.umt.edu" tiles = modis.findTiles(bbox) if tiles is not None: ftp = FTP(url) ftp.login() for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: try: ftp.cwd( "pub/MODIS/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{0}". format(dt.year)) days = ftp.nlst() datadir = "D{0}".format(dt.strftime("%j")) if datadir in days: ftp.cwd(datadir) files = [ f for f in ftp.nlst() if any( f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles) ] outpath = tempfile.mkdtemp() for fname in files: with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) proc = subprocess.Popen([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km" .format(outpath, fname), "{0}/{1}".format( outpath, fname).replace("hdf", "tif") ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format(outpath), "--NoDataValue=-9999", "--calc=\"(A<32701)*(0.1*A+9999)-9999\"" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = [ "-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1]) ] proc = subprocess.Popen(["gdal_translate"] + pstr + [ "-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/et3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: log.warning( "MOD16 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the Terra MODIS snow cover fraction data product MOD10 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" res = 0.005 url = "n5eil01u.ecs.nsidc.org" tiles = modis.findTiles(bbox) if tiles is not None: ftp = FTP(url) ftp.login() for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: try: ftp.cwd("SAN/MOST/MOD10A1.005/{1:04d}.{2:02d}.{3:02d}".format( url, dt.year, dt.month, dt.day)) files = [ f for f in ftp.nlst() if any( f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles) ] files = filter(lambda s: s.endswith("hdf"), files) outpath = tempfile.mkdtemp() for fname in files: with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) subprocess.call([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_Snow_500m:Fractional_Snow_Cover" .format(outpath, fname), "{0}/{1}".format(outpath, fname).replace("hdf", "tif") ]) tifs = glob.glob("{0}/*.tif".format(outpath)) subprocess.call([ "gdal_merge.py", "-a_nodata", "-9999", "-o", "{0}/snow.tif".format(outpath) ] + tifs) cmd = " ".join([ "gdal_calc.py", "-A", "{0}/snow.tif".format(outpath), "--outfile={0}/snow1.tif".format(outpath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(A+9999.0)-9999.0\"" ]) subprocess.call(cmd, shell=True) cmd = " ".join([ "gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/snow1.tif".format(outpath), "{0}/snow2.tif".format(outpath) ]) subprocess.call(cmd, shell=True) subprocess.call([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/snow2.tif".format(outpath), "{0}/snow3.tif".format(outpath) ]) dbio.ingest(dbname, "{0}/snow3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: print("MOD10 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.005 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2H.006" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format(burl, dt.year, dt.month, dt.day) filenames = [] for t in tiles: try: tmppath, fname = earthdata.download( url, "MCD15A2H.A{0}.h{1:02d}v{2:02d}.006.*.hdf".format( dt.strftime("%Y%j"), t[1], t[0])) except ConnectionError: fname = None if fname: filenames.append("{0}/{1}".format(tmppath, fname)) for filename in filenames: proc = subprocess.Popen([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}:MOD_Grid_MOD15A2H:Lai_500m".format( filename), "{0}/{1}".format( outpath, filename.split("/")[-1]).replace("hdf", "tif") ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) shutil.rmtree("/".join(filename.split("/")[:-1])) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/lai.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format(outpath), "--NoDataValue=-9999", "--calc=(A<101.0)*(0.1*A+9999.0)-9999.0" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str(res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/lai3.tif".format(outpath), dt, table, False) else: log.warning( "MCD15 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d"))) shutil.rmtree(outpath)
def download(dbname, dts, bbox): """Downloads the MODIS evapotranspiration data product MOD16 for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 url = "ftp.ntsg.umt.edu" tiles = modis.findTiles(bbox) if tiles is not None: ftp = FTP(url) ftp.login() for dt in [dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1)]: try: ftp.cwd("pub/MODIS/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{0}".format(dt.year)) days = ftp.nlst() datadir = "D{0}".format(dt.strftime("%j")) if datadir in days: ftp.cwd(datadir) files = [ f for f in ftp.nlst() if any(f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles) ] outpath = tempfile.mkdtemp() for fname in files: with open("{0}/{1}".format(outpath, fname), "wb") as f: ftp.retrbinary("RETR {0}".format(fname), f.write) proc = subprocess.Popen( [ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km".format(outpath, fname), "{0}/{1}".format(outpath, fname).replace("hdf", "tif"), ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen( [ "gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format(outpath), "--NoDataValue=-9999", '--calc="(A<32701)*(0.1*A+9999)-9999"', ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen( [ "gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath), ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = ["-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1])] proc = subprocess.Popen( ["gdal_translate"] + pstr + ["-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/et3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: log.warning("MOD16 data not available for {0}. Skipping download!".format(dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the MODSCAG snow cover fraction data product for a specific date *dt* and imports it into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 tiles = modis.findTiles(bbox) for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: temppath = tempfile.mkdtemp() url = "https://snow-data.jpl.nasa.gov/modscag-historic/{0}/{1}".format( dt.year, dt.strftime("%j")) r = requests.get(url, auth=HTTPDigestAuth(username, password)) if r.status_code == 200: dom = lxml.html.fromstring(r.text) links = [ link for link in dom.xpath('//a/@href') if link.find("snow_fraction.tif") > 0 ] for t in tiles: filenames = filter( lambda f: f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0, links) if len(filenames) > 0: filename = filenames[0] r = requests.get("{0}/{1}".format(url, filename), auth=HTTPDigestAuth(username, password)) with open("{0}/{1}".format(temppath, filename), 'wb') as fout: fout.write(r.content) tifs = glob.glob("{0}/*.tif".format(temppath)) if len(tifs) > 0: proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/snow.tif".format(temppath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/snow.tif".format(temppath), "--outfile={0}/snow1.tif".format(temppath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(A+9999.0)-9999.0\"" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/snow1.tif".format(temppath), "{0}/snow2.tif".format(temppath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = [ "-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1]) ] proc = subprocess.Popen( ["gdal_translate", "-a_srs", "epsg:4326"] + pstr + [ "{0}/snow2.tif".format(temppath), "{0}/snow3.tif".format(temppath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/snow3.tif".format(temppath), dt, table, False) shutil.rmtree(temppath) else: log.warning( "MODSCAG data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the MODIS evapotranspiration data product MOD16 for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 urlbase = "http://files.ntsg.umt.edu" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: url = "{0}/data/NTSG_Products/MOD16/MOD16A2.105_MERRAGMAO/Y{1}".format( urlbase, dt.year) resp_year = requests.get(url) try: assert resp_year.status_code == 200 days = [ link for link in BeautifulSoup(resp_year.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find(dt.strftime("%j")) >= 0 ] assert len(days) > 0 resp_day = requests.get("{0}{1}".format( urlbase, days[0].get('href'))) assert resp_day.status_code == 200 files = [ link.get('href') for link in BeautifulSoup(resp_day.text, parse_only=SoupStrainer('a')) if isinstance(link, Tag) and link.text.find("hdf") > 0 ] files = [ f for f in files if any( f.find("h{0:02d}v{1:02d}".format(t[1], t[0])) > 0 for t in tiles) ] outpath = tempfile.mkdtemp() for fname in files: resp_file = requests.get("{0}{1}".format(urlbase, fname)) filename = fname.split("/")[-1] with open("{0}/{1}".format(outpath, filename), 'wb') as fout: for chunk in resp_file: fout.write(chunk) proc = subprocess.Popen([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD16A2:ET_1km". format(outpath, filename), "{0}/{1}".format( outpath, filename).replace("hdf", "tif") ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) proc = subprocess.Popen( ["gdal_merge.py", "-o", "{0}/et.tif".format(outpath)] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/et.tif".format(outpath), "--outfile={0}/et1.tif".format(outpath), "--NoDataValue=-9999", "--calc=(A<32701)*(0.1*A+9999)-9999" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str(res), str(-res), "{0}/et1.tif".format(outpath), "{0}/et2.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is None: pstr = [] else: pstr = [ "-projwin", str(bbox[0]), str(bbox[3]), str(bbox[2]), str(bbox[1]) ] proc = subprocess.Popen(["gdal_translate"] + pstr + [ "-a_srs", "epsg:4326", "{0}/et2.tif".format(outpath), "{0}/et3.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/et3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) except: log.warning( "MOD16 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))
def ingestTables(dbname): """Ingest datasets needed for the unit tests.""" for dt in pd.date_range("2011-1-1", "2011-12-31"): dbio.ingest(dbname, "{0}/tests/precip/chirps_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "precip.chirps") dbio.ingest(dbname, "{0}/tests/precip/trmm_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "precip.trmm") dbio.ingest(dbname, "{0}/tests/tmax/tmax_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "tmax.ncep") dbio.ingest(dbname, "{0}/tests/tmin/tmin_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "tmin.ncep") dbio.ingest(dbname, "{0}/tests/wind/wind_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "wind.ncep") dbio.ingest(dbname, "{0}/tests/soilmoist/smos_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "soilmoist.smos") subprocess.call(["{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/cropland.sql".format(rpath.data)]) subprocess.call(["{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/plantstart.sql".format(rpath.data)]) subprocess.call(["{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/dssat_soils.sql".format(rpath.data)])
def ingestTables(dbname): """Ingest datasets needed for the unit tests.""" for dt in pd.date_range("2011-1-1", "2011-12-31"): dbio.ingest( dbname, "{0}/tests/precip/chirps_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "precip.chirps") dbio.ingest( dbname, "{0}/tests/precip/trmm_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "precip.trmm") dbio.ingest( dbname, "{0}/tests/tmax/tmax_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "tmax.ncep") dbio.ingest( dbname, "{0}/tests/tmin/tmin_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "tmin.ncep") dbio.ingest( dbname, "{0}/tests/wind/wind_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "wind.ncep") dbio.ingest( dbname, "{0}/tests/soilmoist/smos_{1}.tif".format(rpath.data, dt.strftime("%Y-%m-%d")), dt, "soilmoist.smos") subprocess.call([ "{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/cropland.sql".format(rpath.data) ]) subprocess.call([ "{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/plantstart.sql".format(rpath.data) ]) subprocess.call([ "{0}/psql".format(rpath.bins), "-d", dbname, "-f", "{0}/tests/dssat_soils.sql".format(rpath.data) ])
def download(dbname, dts, bbox): """Downloads the PRISM data products for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) url = "jsimpson.pps.eosdis.nasa.gov" ftp = FTP(url) # FIXME: Change to RHEAS-specific password ftp.login('*****@*****.**', '*****@*****.**') ftp.cwd("data/imerg/gis") outpath = tempfile.mkdtemp() for dt in [ dts[0] + timedelta(t) for t in range((dts[-1] - dts[0]).days + 1) ]: try: if dt.year < datetime.today().year: ftp.cwd("/data/imerg/gis/{0}/{1:02d}".format( dt.year, dt.month)) else: ftp.cwd("/data/imerg/gis/{0:02d}".format(dt.month)) filenames = [ f for f in ftp.nlst() if re.match( r"3B.*{0}.*S000000.*1day\.tif.*".format( dt.strftime("%Y%m%d")), f) is not None ] if len(filenames) > 0: fname = filenames[0] with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) with open( "{0}/{1}".format(outpath, fname.replace("tif", "tfw")), 'wb') as f: ftp.retrbinary( "RETR {0}".format(fname.replace("tif", "tfw")), f.write) tfname = fname.replace("tif", "tfw") fname = datasets.uncompress(fname, outpath) datasets.uncompress(tfname, outpath) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "-ot", "Float32", "epsg:4326", "{0}/{1}".format( outpath, fname), "{0}/prec.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) if bbox is not None: proc = subprocess.Popen([ "gdal_translate", "-ot", "Float32", "-a_srs", "epsg:4326", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) else: proc = subprocess.Popen([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) # multiply by 0.1 to get mm/hr and 24 to get mm/day proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/prec1.tif".format(outpath), "--outfile={0}/prec2.tif".format(outpath), "--calc=\"2.4*A\"" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/prec2.tif".format(outpath), dt, table, True) except: log.warning( "No data were available to import into {0} for {1}.".format( table, dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the PRISM data products for a set of dates *dt* and imports them into the PostGIS database *dbname*.""" url = "jsimpson.pps.eosdis.nasa.gov" ftp = FTP(url) # FIXME: Change to RHEAS-specific password ftp.login('*****@*****.**', '*****@*****.**') ftp.cwd("data/imerg/gis") outpath = tempfile.mkdtemp() for dt in [ dts[0] + timedelta(t) for t in range((dts[-1] - dts[0]).days + 1) ]: try: ftp.cwd("/data/imerg/gis/{0}/{1:02d}".format(dt.year, dt.month)) filenames = [ f for f in ftp.nlst() if re.match( r"3B.*{0}.*S000000.*1day\.tif.*".format( dt.strftime("%Y%m%d")), f) is not None ] if len(filenames) > 0: fname = filenames[0] with open("{0}/{1}".format(outpath, fname), 'wb') as f: ftp.retrbinary("RETR {0}".format(fname), f.write) with open( "{0}/{1}".format(outpath, fname.replace("tif", "tfw")), 'wb') as f: ftp.retrbinary( "RETR {0}".format(fname.replace("tif", "tfw")), f.write) tfname = fname.replace("tif", "tfw") fname = datasets.uncompress(fname, outpath) datasets.uncompress(tfname, outpath) subprocess.call([ "gdalwarp", "-t_srs", "epsg:4326", "{0}/{1}".format(outpath, fname), "{0}/prec.tif".format(outpath) ]) if bbox is not None: subprocess.call([ "gdal_translate", "-a_srs", "epsg:4326", "-projwin", "{0}".format(bbox[0]), "{0}".format(bbox[3]), "{0}".format(bbox[2]), "{0}".format(bbox[1]), "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath) ]) else: subprocess.call([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/prec.tif".format(outpath), "{0}/prec1.tif".format(outpath) ]) # multiply by 0.1 to get mm/hr and 24 to get mm/day cmd = " ".join([ "gdal_calc.py", "-A", "{0}/prec1.tif".format(outpath), "--outfile={0}/prec2.tif".format(outpath), "--calc=\"0.1*A\"" ]) subprocess.call(cmd, shell=True) dbio.ingest(dbname, "{0}/prec2.tif".format(outpath), dt, table, False) except: print( "WARNING! No data were available to import into {0} for {1}.". format(table, dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" res = 0.01 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2.005" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format(burl, dt.year, dt.month, dt.day) connection = urllib.urlopen(url) dom = html.fromstring(connection.read()) files = [link for link in dom.xpath('//a/@href')] if len(files) > 0: filenames = [ filter( lambda s: re.findall( r'MCD.*h{0:02d}v{1:02d}.*hdf$'.format(t[1], t[0]), s), files) for t in tiles ] for filename in filenames: if len(filename) > 0: filename = filename[0] urllib.urlretrieve("{0}/{1}".format(url, filename), "{0}/{1}".format(outpath, filename)) subprocess.call([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD15A2:Lai_1km" .format(outpath, filename), "{0}/{1}".format(outpath, filename).replace("hdf", "tif") ]) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: subprocess.call( ["gdal_merge.py", "-o", "{0}/lai.tif".format(outpath) ] + tifs) cmd = " ".join([ "gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format(outpath), "--NoDataValue=-9999", "--calc=\"(A<101.0)*(0.1*A+9999.0)-9999.0\"" ]) subprocess.call(cmd, shell=True) cmd = " ".join([ "gdalwarp", "-t_srs", "'+proj=latlong +ellps=sphere'", "-tr", str(res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath) ]) subprocess.call(cmd, shell=True) subprocess.call([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath) ]) dbio.ingest(dbname, "{0}/lai3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) else: print("MCD15 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))
def download(dbname, dts, bbox): """Downloads the combined MODIS LAI data product MCD15 for a specific date *dt* and imports them into the PostGIS database *dbname*.""" log = logging.getLogger(__name__) res = 0.01 burl = "http://e4ftl01.cr.usgs.gov/MOTA/MCD15A2.005" tiles = modis.findTiles(bbox) if tiles is not None: for dt in [ dts[0] + timedelta(dti) for dti in range((dts[-1] - dts[0]).days + 1) ]: outpath = tempfile.mkdtemp() url = "{0}/{1:04d}.{2:02d}.{3:02d}".format(burl, dt.year, dt.month, dt.day) req = requests.get(url, auth=(username, password)) if req.status_code == 200: dom = html.fromstring(req.text) files = [link for link in dom.xpath('//a/@href')] if len(files) > 0: filenames = [ filter( lambda s: re.findall( r'MCD.*h{0:02d}v{1:02d}.*hdf$'.format( t[1], t[0]), s), files) for t in tiles ] for filename in filenames: if len(filename) > 0: filename = filename[0] proc = subprocess.Popen([ "wget", "-L", "--load-cookies", ".cookiefile", "--save-cookies", ".cookiefile", "--user", username, "--password", password, "{0}/{1}".format(url, filename), "-O", "{0}/{1}".format(outpath, filename) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_translate", "HDF4_EOS:EOS_GRID:{0}/{1}:MOD_Grid_MOD15A2:Lai_1km" .format(outpath, filename), "{0}/{1}".format( outpath, filename).replace("hdf", "tif") ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) tifs = glob.glob("{0}/*.tif".format(outpath)) if len(tifs) > 0: proc = subprocess.Popen([ "gdal_merge.py", "-o", "{0}/lai.tif".format(outpath) ] + tifs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_calc.py", "-A", "{0}/lai.tif".format(outpath), "--outfile={0}/lai1.tif".format(outpath), "--NoDataValue=-9999", "--calc=(A<101.0)*(0.1*A+9999.0)-9999.0" ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdalwarp", "-t_srs", "+proj=latlong +ellps=sphere", "-tr", str(res), str(-res), "{0}/lai1.tif".format(outpath), "{0}/lai2.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) proc = subprocess.Popen([ "gdal_translate", "-a_srs", "epsg:4326", "{0}/lai2.tif".format(outpath), "{0}/lai3.tif".format(outpath) ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = proc.communicate() log.debug(out) dbio.ingest(dbname, "{0}/lai3.tif".format(outpath), dt, table, False) shutil.rmtree(outpath) else: log.warning( "MCD15 data not available for {0}. Skipping download!". format(dt.strftime("%Y-%m-%d")))