def load_nightlight_noaa(ref_year=2013, sat_name=None): """ Get nightlight luminosites. Nightlight matrix, lat and lon ordered such that nightlight[1][0] corresponds to lat[1], lon[0] point (the image has been flipped). Parameters: ref_year (int): reference year sat_name (str, optional): satellite provider (e.g. 'F10', 'F18', ...) Returns: nightlight (sparse.csr_matrix), coord_nl (np.array), fn_light (str) """ if sat_name is None: fn_light = path.join(path.abspath(SYSTEM_DIR), '*' + \ str(ref_year) + '*.stable_lights.avg_vis') else: fn_light = path.join(path.abspath(SYSTEM_DIR), sat_name + \ str(ref_year) + '*.stable_lights.avg_vis') # check if file exists in SYSTEM_DIR, download if not if glob.glob(fn_light + ".p"): fn_light = glob.glob(fn_light + ".p")[0] with open(fn_light, 'rb') as f_nl: nightlight = pickle.load(f_nl) elif glob.glob(fn_light + ".tif.gz"): fn_light = glob.glob(fn_light + ".tif.gz")[0] fn_light, nightlight = unzip_tif_to_py(fn_light) else: # iterate over all satellites if no satellite name provided if sat_name is None: ini_pre, end_pre = 18, 9 for pre_i in np.arange(ini_pre, end_pre, -1): url = NOAA_SITE + 'F' + str(pre_i) + str(ref_year) + '.v4.tar' try: file_down = download_file(url) break except ValueError: pass if 'file_down' not in locals(): LOGGER.error('Nightlight for reference year %s not available. ' 'Try an other year.', ref_year) raise ValueError else: url = NOAA_SITE + sat_name + str(ref_year) + '.v4.tar' try: file_down = download_file(url) except ValueError: LOGGER.error('Nightlight intensities for year %s and satellite' ' %s do not exist.', ref_year, sat_name) raise ValueError fn_light = untar_noaa_stable_nightlight(file_down) fn_light, nightlight = unzip_tif_to_py(fn_light) # first point and step coord_nl = np.empty((2, 2)) coord_nl[0, :] = [NOAA_BORDER[1], NOAA_RESOLUTION_DEG] coord_nl[1, :] = [NOAA_BORDER[0], NOAA_RESOLUTION_DEG] return nightlight, coord_nl, fn_light
def test_wrong_url_fail(self): """Error raised when wrong url.""" url = 'https://ngdc.noaa.gov/eog/data/web_data/v4composites/F172012.v4.tar' try: with self.assertRaises(ValueError): download_file(url) except IOError: pass
def download_icon_centroids_file(model_name='icon-eu-eps', download_dir = None): """ create centroids based on netcdf files provided by dwd, links found here: https://www.dwd.de/DE/leistungen/opendata/neuigkeiten/opendata_dez2018_02.html https://www.dwd.de/DE/leistungen/opendata/neuigkeiten/opendata_aug2020_01.html Parameters ---------- model_name : str the name of the forecast model written as it appears in the folder structure in opendata.dwd.de/weather/nwp/ download_dir : str or Path directory where the downloaded files should be saved in Returns ------- file_name : str absolute path and filename of the downloaded and decompressed netcdf file """ # define url and filename url = 'https://opendata.dwd.de/weather/lib/cdo/' if model_name == 'icon-eu-eps': file_name = 'icon_grid_0028_R02B07_N02.nc.bz2' elif model_name == 'icon-eu': file_name = 'icon_grid_0024_R02B06_G.nc.bz2' elif model_name in ('icon-d2-eps', 'icon-d2'): file_name = 'icon_grid_0047_R19B07_L.nc.bz2' elif model_name == 'test': file_name = 'test_storm_europe_icon_grid.nc.bz2' else: raise ValueError(f'Creation of centroids for the icon model {model_name} ' 'is not yet implemented. Please define ' 'the default values in the code first.') download_path = CONFIG.local_data.save_dir.dir() if download_dir is None else Path(download_dir) bz2_pathfile = download_path.absolute().joinpath(file_name) nc_pathfile = bz2_pathfile.with_suffix('') # download and unzip file if not nc_pathfile.exists(): if not bz2_pathfile.exists(): try: download_file(url + file_name, download_dir=download_path) except ValueError as err: raise ValueError(f'Error while downloading {url + file_name}.') from err with open(bz2_pathfile, 'rb') as source, open(nc_pathfile, 'wb') as dest: dest.write(bz2.decompress(source.read())) bz2_pathfile.unlink() return str(nc_pathfile)
def read_wiod16(self, year=2014, range_rows=(5,2469), range_cols=(4,2468), col_iso3=2, col_sectors=1): """Read multi-regional input-output tables of the 2016 release of the WIOD project: http://www.wiod.org/database/wiots16 Parameters ---------- year : int Year of WIOD table to use. Valid years go from 2000 to 2014. Default year is 2014. range_rows : tuple initial and end positions of data along rows. Default is (5,2469). range_cols : tuple initial and end positions of data along columns. Default is (4,2468). col_iso3 : int column with countries names in ISO3 codes. Default is 2. col_sectors : int column with sector names. Default is 1. References ---------- [1] Timmer, M. P., Dietzenbacher, E., Los, B., Stehrer, R. and de Vries, G. J. (2015), "An Illustrated User Guide to the World Input–Output Database: the Case of Global Automotive Production", Review of International Economics., 23: 575–605 """ file_name = 'WIOT{}_Nov16_ROW.xlsb'.format(year) file_loc = WIOD_DIRECTORY / file_name if not file_loc in WIOD_DIRECTORY.iterdir(): download_link = WIOD_FILE_LINK + file_name u_fh.download_file(download_link, download_dir=WIOD_DIRECTORY) LOGGER.info('Downloading WIOD table for year %s', year) mriot = pd.read_excel(file_loc, engine='pyxlsb') start_row, end_row = range_rows start_col, end_col = range_cols self.sectors = mriot.iloc[start_row:end_row, col_sectors].unique() self.mriot_reg_names = mriot.iloc[start_row:end_row, col_iso3].unique() self.mriot_data = mriot.iloc[start_row:end_row, start_col:end_col].values self.total_prod = mriot.iloc[start_row:end_row, -1].values self.reg_pos = { name: range(len(self.sectors)*i, len(self.sectors)*(i+1)) for i, name in enumerate(self.mriot_reg_names) } self.mriot_type = 'WIOD'
def download_nl_files(req_files=np.ones(len(BM_FILENAMES), ), files_exist=np.zeros(len(BM_FILENAMES), ), dwnl_path=SYSTEM_DIR, year=2016): """Attempts to download nightlight files from NASA webpage. Parameters: req_files (array): Boolean array which indicates the files required for the current operation (0-> skip, 1-> download). Can be obtained by check_required_nightlight_files files_exists (array): Boolean array which indicates if the files already exist locally and should not be downloaded (0-> download, 1-> skip). Can be obtained by function check_nightlight_local_file_exists dwnl_path (str): Returns: path_str (Path): Path to download directory. """ if (len(req_files) != len(files_exist)) or (len(req_files) != len(BM_FILENAMES)): raise ValueError( 'The given arguments are invalid. req_files and ' 'files_exist must both be as long as there are files to download' ' (' + str(len(BM_FILENAMES)) + ').') if not Path(dwnl_path).is_dir(): raise ValueError( f'The folder {dwnl_path} does not exist. Operation aborted.') if np.all(req_files == files_exist): LOGGER.debug('All required files already exist. ' 'No downloads necessary.') return dwnl_path try: for num_files in range(0, np.count_nonzero(BM_FILENAMES)): if req_files[num_files] == 0: continue else: if files_exist[num_files] == 1: continue else: curr_file = NASA_SITE + BM_FILENAMES[num_files] curr_file = curr_file.replace('*', str(year)) LOGGER.info('Attempting to download file from %s', curr_file) download_file(curr_file, download_dir=dwnl_path) except Exception as exc: raise RuntimeError( 'Download failed. Please check the network ' 'connection and whether filenames are still valid.') from exc return dwnl_path
def world_bank(cntry_iso, ref_year, info_ind): """Get country's GDP from World Bank's data at a given year, or closest year value. If no data, get the natural earth's approximation. Parameters ---------- cntry_iso : str key = ISO alpha_3 country ref_year : int reference year info_ind : str indicator of World Bank, e.g. 'NY.GDP.MKTP.CD'. If 'INC_GRP', historical income groups from excel file used. Returns ------- int, float Raises ------ IOError, KeyError, IndexError """ if info_ind != 'INC_GRP': with warnings.catch_warnings(): warnings.simplefilter("ignore") cntry_gdp = wb.download(indicator=info_ind, country=cntry_iso, start=1960, end=2030) years = np.array( [int(year) for year in cntry_gdp.index.get_level_values('year')]) sort_years = np.abs(years - ref_year).argsort() close_val = cntry_gdp.iloc[sort_years].dropna() close_year = int(close_val.iloc[0].name[1]) close_val = float(close_val.iloc[0].values) else: # income group level fn_ig = SYSTEM_DIR.joinpath('OGHIST.xls') dfr_wb = pd.DataFrame() try: if not fn_ig.is_file(): file_down = download_file(WORLD_BANK_INC_GRP) shutil.move(file_down, fn_ig) dfr_wb = pd.read_excel(fn_ig, 'Country Analytical History', skiprows=5) dfr_wb = dfr_wb.drop(dfr_wb.index[0:5]).set_index('Unnamed: 0') dfr_wb = dfr_wb.replace(INCOME_GRP_WB_TABLE.keys(), INCOME_GRP_WB_TABLE.values()) except (IOError, requests.exceptions.ConnectionError) as err: raise type(err)('Internet connection failed while downloading ' 'historical income groups: ' + str(err)) from err cntry_dfr = dfr_wb.loc[cntry_iso] close_val = cntry_dfr.iloc[ np.abs(np.array(cntry_dfr.index[1:]) - ref_year).argsort() + 1].dropna() close_year = close_val.index[0] close_val = int(close_val.iloc[0]) return close_year, close_val
def download_nl_files(req_files=np.ones(len(BM_FILENAMES),), \ files_exist=np.zeros(len(BM_FILENAMES),), dwnl_path=SYSTEM_DIR, year=2016): """ Attempts to download nightlight files from NASA webpage. Parameters: req_files (array): Boolean array which indicates the files required for the current operation (0-> skip, 1-> download). Can be obtained by check_required_nightlight_files files_exists (array): Boolean array which indicates if the files already exist locally and should not be downloaded (0-> download, 1-> skip). Can be obtained by function check_nightlight_local_file_exists dwnl_path (str): Returns: path_str (str): Absolute path to file storage. """ if (len(req_files) != len(files_exist)) or \ (len(req_files) != len(BM_FILENAMES)): raise ValueError('The given arguments are invalid. req_files and ' + \ 'files_exist must both be as long as there are files to download'+\ ' (' + str(len(BM_FILENAMES)) + ').') if not path.exists(dwnl_path): dwnl_path = SYSTEM_DIR if not path.exists(dwnl_path): raise ValueError('The folder does not exist. Operation aborted.') else: LOGGER.warning('The given folder does not exist using the ' + \ 'Climada data directory instead.') if np.all(req_files == files_exist): LOGGER.debug('All required files already exist. ' + 'No downloads necessary.') return None try: curr_wd = getcwd() chdir(dwnl_path) for num_files in range(0, np.count_nonzero(BM_FILENAMES)): if req_files[num_files] == 0: continue else: if files_exist[num_files] == 1: continue else: curr_file = NASA_SITE + BM_FILENAMES[num_files] curr_file = curr_file.replace('*', str(year)) LOGGER.info('Attempting to download file from %s', curr_file) path_dwn = download_file(curr_file) path_str = path.dirname(path_dwn) except: chdir(curr_wd) raise RuntimeError('Download failed. Please check the network ' + \ 'connection and whether filenames are still valid.') return path_str
def _spam_download_csv(data_path=SYSTEM_DIR, spam_variable='V_agg'): """ Download and unzip CSV files from https://dataverse.harvard.edu/file Inputs: data_path (str): absolute path where files are to be stored. Default: SYSTEM_DIR spam_variable (str): select one variable: 'A' physical area 'H' harvested area 'P' production 'Y' yield 'V_agg' value of production, aggregated to all crops, food and non-food (default) 'cell5m' concordance_data to retrieve lat / lon """ try: fname = os.path.join(data_path, FILENAME_PERMALINKS) if not os.path.isfile(fname): url1 = 'https://dataverse.harvard.edu/api/access/datafile/:'\ + 'persistentId?persistentId=doi:10.7910/DVN/DHXBJX/' permalinks = pd.DataFrame( columns=['A', 'H', 'P', 'Y', 'V_agg', 'cell5m']) permalinks.loc[0, 'A'] = url1 + 'FS1JO8' permalinks.loc[0, 'H'] = url1 + 'M727TX' permalinks.loc[0, 'P'] = url1 + 'HPUWVA' permalinks.loc[0, 'Y'] = url1 + 'RTGSQA' permalinks.loc[0, 'V_agg'] = url1 + 'UG0N7K' permalinks.loc[0, 'cell5m'] = url1 + 'H2D3LI' else: permalinks = pd.read_csv(fname, sep=',', index_col=None, header=0) LOGGER.debug('Importing %s', str(fname)) # go to data directory: os.chdir(data_path) path_dwn = download_file(permalinks.loc[0, spam_variable]) LOGGER.debug('Download complete. Unzipping %s', str(path_dwn)) zip_ref = zipfile.ZipFile(path_dwn, 'r') zip_ref.extractall(data_path) zip_ref.close() os.remove(path_dwn) except: LOGGER.error('Downloading SPAM data failed. Operation aborted.') raise
def setup(self): """Set up the hazard drought""" #self.tag = TagHazard(HAZ_TYPE, 'TEST') try: #file_path = os.path.join(self.file_dir, self.file_name) if not os.path.isfile(self.file_path): if self.file_path == os.path.join(SPEI_FILE_DIR, SPEI_FILE_NAME): try: path_dwl = download_file(SPEI_FILE_URL + '/' + SPEI_FILE_NAME) try: os.rename(path_dwl, self.file_path) except: raise FileNotFoundError( 'The file ' + str(path_dwl) + ' could not be moved to ' + str(os.path.dirname(self.file_path))) except: raise FileExistsError( 'The file ' + str(self.file_path) + ' could not ' + 'be found. Please download the file ' + 'first or choose a different folder. ' + 'The data can be downloaded from ' + SPEI_FILE_URL) LOGGER.debug('Importing %s', str(SPEI_FILE_NAME)) dataset = xr.open_dataset(self.file_path) except: LOGGER.error('Importing the SPEI data file failed. ' 'Operation aborted.') raise spei_3d = self.__read_indices_spei(dataset) spei_2d = self.__traslate_matrix(spei_3d) intensity_matrix_min = self.__get_intensity_from_2d( spei_2d, self.intensity_definition) self.hazard_def(intensity_matrix_min) return self
def _spam_download_csv(data_path=SYSTEM_DIR, spam_variable='V_agg'): """ Download and unzip CSV files from https://dataverse.harvard.edu/file Inputs: data_path (str): absolute path where files are to be stored. Default: SYSTEM_DIR spam_variable (str): select one variable: 'A' physical area 'H' harvested area 'P' production 'Y' yield 'V_agg' value of production, aggregated to all crops, food and non-food (default) 'cell5m' concordance_data to retrieve lat / lon """ try: fname = Path(data_path, FILENAME_PERMALINKS) if not fname.is_file(): permalinks = pd.DataFrame( columns=['A', 'H', 'P', 'Y', 'V_agg', 'cell5m']) permalinks.loc[0, 'A'] = SPAM_URL + 'FS1JO8' permalinks.loc[0, 'H'] = SPAM_URL + 'M727TX' permalinks.loc[0, 'P'] = SPAM_URL + 'HPUWVA' permalinks.loc[0, 'Y'] = SPAM_URL + 'RTGSQA' permalinks.loc[0, 'V_agg'] = SPAM_URL + 'UG0N7K' permalinks.loc[0, 'cell5m'] = SPAM_URL + 'H2D3LI' else: permalinks = pd.read_csv(fname, sep=',', index_col=None, header=0) LOGGER.debug('Importing %s', str(fname)) # go to data directory: path_dwn = download_file(permalinks.loc[0, spam_variable], download_dir=data_path) LOGGER.debug('Download complete. Unzipping %s', str(path_dwn)) zip_ref = zipfile.ZipFile(path_dwn, 'r') zip_ref.extractall(data_path) zip_ref.close() Path(path_dwn).unlink() except Exception as err: raise type(err)('Downloading SPAM data failed: ' + str(err)) from err
def setup(self): """Set up the hazard drought""" try: if not self.file_path.is_file(): if self.file_path == Path(SPEI_FILE_DIR, SPEI_FILE_NAME): try: path_dwl = download_file(SPEI_FILE_URL + '/' + SPEI_FILE_NAME) try: Path(path_dwl).rename(self.file_path) except: raise FileNotFoundError('The file ' + str(path_dwl) + ' could not be moved to ' + str(self.file_path.parent)) except: raise FileExistsError( 'The file ' + str(self.file_path) + ' could not ' + 'be found. Please download the file ' + 'first or choose a different folder. ' + 'The data can be downloaded from ' + SPEI_FILE_URL) LOGGER.debug('Importing %s', str(SPEI_FILE_NAME)) dataset = xr.open_dataset(self.file_path) except Exception as err: raise type(err)('Importing the SPEI data file failed: ' + str(err)) from err spei_3d = self.__read_indices_spei(dataset) spei_2d = self.__traslate_matrix(spei_3d) intensity_matrix_min = self.__get_intensity_from_2d( spei_2d, self.intensity_definition) self.hazard_def(intensity_matrix_min) return self
def load_nightlight_noaa(ref_year=2013, sat_name=None): """Get nightlight luminosites. Nightlight matrix, lat and lon ordered such that nightlight[1][0] corresponds to lat[1], lon[0] point (the image has been flipped). Parameters ---------- ref_year : int, optional reference year. The default is 2013. sat_name : str, optional satellite provider (e.g. 'F10', 'F18', ...) Returns ------- nightlight : sparse.csr_matrix coord_nl : np.array fn_light : str """ # NOAA's URL used to retrieve nightlight satellite images: noaa_url = CONFIG.exposures.litpop.nightlights.noaa_url.str() if sat_name is None: fn_light = str(SYSTEM_DIR.joinpath('*' + str(ref_year) + '*.stable_lights.avg_vis')) else: fn_light = str(SYSTEM_DIR.joinpath(sat_name + str(ref_year) + '*.stable_lights.avg_vis')) # check if file exists in SYSTEM_DIR, download if not if glob.glob(fn_light + ".p"): fn_light = glob.glob(fn_light + ".p")[0] with open(fn_light, 'rb') as f_nl: nightlight = pickle.load(f_nl) elif glob.glob(fn_light + ".tif.gz"): fn_light = glob.glob(fn_light + ".tif.gz")[0] fn_light, nightlight = unzip_tif_to_py(fn_light) else: # iterate over all satellites if no satellite name provided if sat_name is None: ini_pre, end_pre = 18, 9 for pre_i in np.arange(ini_pre, end_pre, -1): url = noaa_url + 'F' + str(pre_i) + str(ref_year) + '.v4.tar' try: file_down = download_file(url, download_dir=SYSTEM_DIR) break except ValueError: pass if 'file_down' not in locals(): raise ValueError(f'Nightlight for reference year {ref_year} not available. ' 'Try a different year.') else: url = noaa_url + sat_name + str(ref_year) + '.v4.tar' try: file_down = download_file(url, download_dir=SYSTEM_DIR) except ValueError as err: raise ValueError(f'Nightlight intensities for year {ref_year} and satellite' f' {sat_name} do not exist.') from err fn_light = untar_noaa_stable_nightlight(file_down) fn_light, nightlight = unzip_tif_to_py(fn_light) # first point and step coord_nl = np.empty((2, 2)) coord_nl[0, :] = [NOAA_BORDER[1], NOAA_RESOLUTION_DEG] coord_nl[1, :] = [NOAA_BORDER[0], NOAA_RESOLUTION_DEG] return nightlight, coord_nl, fn_light
def world_bank_wealth_account(cntry_iso, ref_year, variable_name="NW.PCA.TO", \ no_land=True): """ Download and unzip wealth accounting historical data (1995, 2000, 2005, 2010, 2014) from World Bank (https://datacatalog.worldbank.org/dataset/wealth-accounting). Return requested variable for a country (cntry_iso) and a year (ref_year). Inputs: cntry_iso (str): ISO3-code of country, i.e. "CHN" for China ref_year (int): reference year - available in data: 1995, 2000, 2005, 2010, 2014 - other years between 1995 and 2014 are interpolated - for years outside range, indicator is scaled proportionally to GDP variable_name (str): select one variable, i.e.: 'NW.PCA.TO': Produced capital stock of country incl. manufactured or built assets such as machinery, equipment, and physical structures and value of built-up urban land (24% mark-up) 'NW.PCA.PC': Produced capital stock per capita incl. manufactured or built assets such as machinery, equipment, and physical structures and value of built-up urban land (24% mark-up) 'NW.NCA.TO': Total natural capital of country. Natural capital includes the valuation of fossil fuel energy (oil, gas, hard and soft coal) and minerals (bauxite, copper, gold, iron ore, lead, nickel, phosphate, silver, tin, and zinc), agricultural land (cropland and pastureland), forests (timber and some nontimber forest products), and protected areas. 'NW.TOW.TO': Total wealth of country. Note: Values are measured at market exchange rates in constant 2014 US dollars, using a country-specific GDP deflator. no_land (boolean): If True, return produced capital without built-up land value (applies to 'NW.PCA.*' only). Default = True. """ try: fname = os.path.join(SYSTEM_DIR, FILE_WORLD_BANK_WEALTH_ACC) if not os.path.isfile(fname): fname = os.path.join(SYSTEM_DIR, 'Wealth-Accounts_CSV', FILE_WORLD_BANK_WEALTH_ACC) if not os.path.isfile(fname): if not os.path.isdir( os.path.join(SYSTEM_DIR, 'Wealth-Accounts_CSV')): os.mkdir(os.path.join(SYSTEM_DIR, 'Wealth-Accounts_CSV')) file_down = download_file(WORLD_BANK_WEALTH_ACC) zip_ref = zipfile.ZipFile(file_down, 'r') zip_ref.extractall(os.path.join(SYSTEM_DIR, 'Wealth-Accounts_CSV')) zip_ref.close() os.remove(file_down) LOGGER.debug('Download and unzip complete. Unzipping %s', str(fname)) data_wealth = pd.read_csv(fname, sep=',', index_col=None, header=0) except: LOGGER.error('Downloading World Bank Wealth Accounting Data failed.') raise data_wealth = data_wealth[data_wealth['Country Code'].str.contains(cntry_iso) \ & data_wealth['Indicator Code'].\ str.contains(variable_name)].loc[:, '1995':'2014'] years = list(map(int, list(data_wealth))) if data_wealth.size == 0 and 'NW.PCA.TO' in variable_name: # if country is not found in data LOGGER.warning( 'No data available for country. Using non-financial wealth instead' ) gdp_year, gdp_val = gdp(cntry_iso, ref_year) ref_year_fac, fac = wealth2gdp(cntry_iso) return gdp_year, np.around((fac * gdp_val), 1), 0 if ref_year in years: # indicator for reference year is available directly result = data_wealth.loc[:, np.str(ref_year)].values[0] elif ref_year > np.min(years) and ref_year < np.max(years): # interpolate result = np.interp(ref_year, years, data_wealth.values[0, :]) elif ref_year < np.min(years): # scale proportionally to GDP gdp_year, gdp0_val = gdp(cntry_iso, np.min(years)) gdp_year, gdp_val = gdp(cntry_iso, ref_year) result = data_wealth.values[0, 0] * gdp_val / gdp0_val ref_year = gdp_year else: gdp_year, gdp0_val = gdp(cntry_iso, np.max(years)) gdp_year, gdp_val = gdp(cntry_iso, ref_year) result = data_wealth.values[0, -1] * gdp_val / gdp0_val ref_year = gdp_year if 'NW.PCA.' in variable_name and no_land: # remove value of built-up land from produced capital result = result / 1.24 return ref_year, np.around(result, 1), 1
def download_icon_grib(run_datetime, model_name='icon-eu-eps', parameter_name='vmax_10m', max_lead_time=None, download_dir=None): """download the gribfiles of a weather forecast run for a certain weather parameter from opendata.dwd.de/weather/nwp/. Parameters ---------- run_datetime : datetime The starting timepoint of the forecast run model_name : str the name of the forecast model written as it appears in the folder structure in opendata.dwd.de/weather/nwp/ or 'test' parameter_name : str the name of the meteorological parameter written as it appears in the folder structure in opendata.dwd.de/weather/nwp/ max_lead_time : int number of hours for which files should be downloaded, will default to maximum available data download_dir: : str or Path directory where the downloaded files should be saved in Returns ------- file_names : list a list of filenames that link to all just downloaded or available files from the forecast run, defined by the input parameters """ LOGGER.info('Downloading icon grib files of model %s for parameter %s with starting date %s.', model_name, parameter_name, run_datetime.strftime('%Y%m%d%H')) url, file_name, lead_times = _create_icon_grib_name(run_datetime, model_name, parameter_name, max_lead_time) download_path = CONFIG.local_data.save_dir.dir() if download_dir is None else Path(download_dir) #download all files file_names = [] for lead_i in lead_times: file_name_i = file_name.format(lead_i=lead_i) bz2_pathfile_i = download_path.absolute().joinpath(file_name_i) # download file if it does not exist already if not bz2_pathfile_i.exists(): try: download_file(url + file_name_i, download_dir=download_dir) except Exception as err: err_msg = "" if run_datetime > (dt.datetime.utcnow()-dt.timedelta(hours=6)): err_msg += (f'Forecast file {file_name_i} might not yet be available ' f'on {url}. Wait a few hours. ') elif run_datetime < (dt.datetime.utcnow() -dt.timedelta(hours=24)): err_msg += (f'Forecast file {file_name_i} might no longer be available ' f'on {url}. Files are only openly available for 24 hours. ') err_msg += f"Error while downloading {url + file_name_i}: " raise type(err)(err_msg + str(err)) from err file_names.append(str(bz2_pathfile_i)) return file_names
def test_ne_api_pass(self): """Test Natural Earth API""" url = 'http://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries.zip' file_down = download_file(url) os.remove(file_down)
def test_wb_wealth_pass(self): """Test world bank's wealth data""" file_down = download_file(WORLD_BANK_WEALTH_ACC) os.remove(file_down)
def test_wb_lev_hist_pass(self): """Test world bank's historical income group levels data""" file_down = download_file(WORLD_BANK_INC_GRP) os.remove(file_down)
def test_noaa_nl_pass(self): """Test NOAA nightlights used in BlackMarble.""" file_down = download_file(NOAA_SITE + 'F101992.v4.tar') os.remove(file_down)
def test_nasa_nl_pass(self): """Test NASA nightlights used in BlackMarble and LitPop.""" url = NASA_SITE + BM_FILENAMES[0] file_down = download_file(url.replace('*', str(2016))) os.remove(file_down)
def test_noaa_nl_pass(self): """Test NOAA nightlights used in BlackMarble.""" file_down = download_file( f'{CONFIG.exposures.litpop.nightlights.noaa_url.str()}/F101992.v4.tar' ) Path(file_down).unlink()
def download_nl_files(req_files=np.ones(len(BM_FILENAMES),), files_exist=np.zeros(len(BM_FILENAMES),), dwnl_path=SYSTEM_DIR, year=2016): """Attempts to download nightlight files from NASA webpage. Parameters ---------- req_files : numpy array, optional Boolean array which indicates the files required (0-> skip, 1-> download). The default is np.ones(len(BM_FILENAMES),). files_exist : numpy array, optional Boolean array which indicates if the files already exist locally and should not be downloaded (0-> download, 1-> skip). The default is np.zeros(len(BM_FILENAMES),). dwnl_path : str or path, optional Download directory path. The default is SYSTEM_DIR. year : int, optional Data year to be downloaded. The default is 2016. Raises ------ ValueError RuntimeError Returns ------- dwnl_path : str or path Download directory path. """ if (len(req_files) != len(files_exist)) or (len(req_files) != len(BM_FILENAMES)): raise ValueError('The given arguments are invalid. req_files and ' 'files_exist must both be as long as there are files to download' ' (' + str(len(BM_FILENAMES)) + ').') if not Path(dwnl_path).is_dir(): raise ValueError(f'The folder {dwnl_path} does not exist. Operation aborted.') if np.all(req_files == files_exist): LOGGER.debug('All required files already exist. No downloads necessary.') return dwnl_path try: for num_files in range(0, np.count_nonzero(BM_FILENAMES)): if req_files[num_files] == 0 or files_exist[num_files] == 1: continue # file already available or not required path_check = False # loop through different possible URLs defined in CONFIG: for url in CONFIG.exposures.litpop.nightlights.nasa_sites.list(): try: # control for ValueError due to wrong URL curr_file = url.str() + BM_FILENAMES[num_files] %(year) LOGGER.info('Attempting to download file from %s', curr_file) path_check = download_file(curr_file, download_dir=dwnl_path) break # leave loop if sucessful except ValueError as err: value_err = err if path_check: # download succesful continue raise ValueError("Download failed, check URLs in " + "CONFIG.exposures.litpop.nightlights.nasa_sites! \n Last " + "error message: \n" + value_err.args[0]) except Exception as exc: raise RuntimeError('Download failed. Please check the network ' 'connection and whether filenames are still valid.') from exc return dwnl_path
def test_wb_wealth_pass(self): """Test world bank's wealth data""" file_down = download_file(WORLD_BANK_WEALTH_ACC) Path(file_down).unlink()
def test_ne_api_pass(self): """Test Natural Earth API""" url = 'https://naturalearth.s3.amazonaws.com/10m_cultural/ne_10m_admin_0_countries.zip' file_down = download_file(url) Path(file_down).unlink()
def download_icon_grib(run_datetime, model_name='icon-eu-eps', parameter_name='vmax_10m', max_lead_time=None, download_dir=None): """download the gribfiles of a weather forecast run for a certain weather parameter from opendata.dwd.de/weather/nwp/. Parameters: run_datetime (datetime): The starting timepoint of the forecast run model_name (str): the name of the forecast model written as it appears in the folder structure in opendata.dwd.de/weather/nwp/ or 'test' parameter_name (str): the name of the meteorological parameter written as it appears in the folder structure in opendata.dwd.de/weather/nwp/ max_lead_time (int): number of hours for which files should be downloaded, will default to maximum available data download_dir: (str or Path): directory where the downloaded files should be saved in Returns: file_names (list): a list of filenames that link to all just downloaded or available files from the forecast run, defined by the input parameters """ LOGGER.info(('Downloading icon grib files of model ' + model_name + ' for parameter ' + parameter_name + ' with starting date ' + run_datetime.strftime('%Y%m%d%H') + '.')) url, file_name, lead_times = _create_icon_grib_name( run_datetime, model_name, parameter_name, max_lead_time) download_path = CONFIG.local_data.save_dir.dir( ) if download_dir is None else Path(download_dir) #download all files file_names = [] for lead_i in lead_times: file_name_i = file_name.format(lead_i=lead_i) bz2_pathfile_i = download_path.absolute().joinpath(file_name_i) # download file if it does not exist already if not bz2_pathfile_i.exists(): try: download_file(url + file_name_i, download_dir=download_dir) except ValueError as err: if run_datetime > (dt.datetime.utcnow() - dt.timedelta(hours=6)): LOGGER.error( 'Forecast file %s might not yet be available' ' on %s. Wait a few hours. Error while ' 'downloading %s.', file_name_i, url, url + file_name_i) elif run_datetime < (dt.datetime.utcnow() - dt.timedelta(hours=24)): LOGGER.error( 'Forecast file %s might no longer be ' 'available on %s. Files are only openly ' 'available for 24 hours. Error while ' 'downloading %s.', file_name_i, url, url + file_name_i) raise err file_names.append(str(bz2_pathfile_i)) return file_names