Esempio n. 1
0
def netcdf_to_ascii_PNNL2018(homedir, subdir, netcdfs, mappingfile,
                             catalog_label, meta_file):
    # initialize list of dataframe outputs
    outfiledict = {}

    # generate destination folder
    filedir = os.path.join(homedir, subdir)
    ogh.ensure_dir(filedir)

    # connect with collection of netcdfs
    ds_mf = xray.open_mfdataset(netcdfs, engine='netcdf4')

    # convert netcdfs to pandas.Panel API
    ds_pan = ds_mf.to_dataframe().reset_index('TIME')

    # generate list of variables
    ds_vars = [
        ds_var for ds_var in ds_pan.columns
        if ds_var not in ['YEAR', 'MONTH', 'DAY', 'TIME', 'LAT', 'LON']
    ]

    # read in gridded cells of interest
    maptable, nstation = ogh.mappingfileToDF(mappingfile, colvar=None)

    # maptable = maptable[0:3]

    # at each latlong of interest
    for ind, eachrow in maptable.iterrows():

        # generate ASCII time-series
        ds_df = ds_pan.loc[eachrow['SN'], eachrow['WE'], :].reset_index(
            drop=True).loc[:, ds_vars]

        # create file name
        #        outfilename = os.path.join(filedir, catalog_label  + '_' + time_nm + '_{0}_{1}'.format(eachrow['LAT'],eachrow['LONG_']))
        outfilename = os.path.join(
            filedir, catalog_label +
            '_{0}_{1}'.format(eachrow['LAT'], eachrow['LONG_']))
        # save ds_df
        outfiledict[outfilename] = da.delayed(ds_df.to_csv)(
            path_or_buf=outfilename, sep='\t', header=False, index=False)

    # compute ASCII time-series files
    ProgressBar().register()
    outfiledict = da.compute(outfiledict)[0]

    # update metadata file
    meta_file[catalog_label]['variable_info'].update(dict(ds_mf.attrs))
    meta_file[catalog_label]['variable_info'].update(dict(ds_mf.variables))
    meta_file[catalog_label]['variable_list'] = np.array(ds_vars)

    # catalog the output files
    ogh.addCatalogToMap(outfilepath=mappingfile,
                        maptable=maptable,
                        folderpath=filedir,
                        catalog_label=catalog_label)
    os.chdir(homedir)
    return (list(outfiledict.keys()))
Esempio n. 2
0
def get_x_hourlywrf_PNNL2018(
        homedir,
        spatialbounds,
        subdir='PNNL2018/Hourly_WRF_1981_2015/SaukSpatialBounds',
        nworkers=4,
        start_date='2005-01-01',
        end_date='2007-12-31',
        time_resolution='H',
        time_steps=24,
        file_prefix='sp_',
        rename_timelatlong_names={
            'south_north': 'SN',
            'west_east': 'WE',
            'time': 'TIME'
        },
        replace_file=True):
    """
    get hourly WRF data from a 2018 PNNL WRF run using xarray on netcdf files
    """
    # check and generate data directory
    filedir = os.path.join(homedir, subdir)
    ogh.ensure_dir(filedir)

    # modify each month between start_date and end_date to year-month
    dates = [
        x.strftime('%Y%m%d')
        for x in pd.date_range(start=start_date, end=end_date, freq='D')
    ]

    # initialize parallel workers
    da.set_options(pool=ThreadPool(nworkers))
    ProgressBar().register()

    # generate the list of files to download
    filelist = compile_x_wrfpnnl2018_raw_locations(dates)

    # download files of interest
    NetCDFs = []
    for url, date in zip(filelist, dates):
        NetCDFs.append(
            da.delayed(wget_x_download_spSubset_PNNL)(
                fileurl=url,
                filedate=date,
                time_resolution=time_resolution,
                time_steps=time_steps,
                spatialbounds=spatialbounds,
                file_prefix=file_prefix,
                rename_timelatlong_names=rename_timelatlong_names,
                replace_file=replace_file))

    # run operations
    outputfiles = da.compute(NetCDFs)[0]

    # reset working directory
    os.chdir(homedir)
    return (outputfiles)
Esempio n. 3
0
def get_x_dailymet_Livneh2013_raw(
        homedir,
        spatialbounds,
        subdir='livneh2013/Daily_MET_1915_2011/raw_netcdf',
        nworkers=4,
        start_date='1915-01-01',
        end_date='2011-12-31',
        rename_timelatlong_names={
            'lat': 'LAT',
            'lon': 'LON',
            'time': 'TIME'
        },
        file_prefix='sp_',
        replace_file=True):
    """
    get Daily MET data from Livneh et al. (2013) using xarray on netcdf files
    """
    # check and generate DailyMET livneh 2013 data directory
    filedir = os.path.join(homedir, subdir)
    ogh.ensure_dir(filedir)

    # modify each month between start_date and end_date to year-month
    dates = [
        x.strftime('%Y%m')
        for x in pd.date_range(start=start_date, end=end_date, freq='M')
    ]

    # initialize parallel workers
    da.set_options(pool=ThreadPool(nworkers))
    ProgressBar().register()

    # generate the list of files to download
    filelist = compile_x_dailymet_Livneh2013_raw_locations(dates)

    # download files of interest
    NetCDFs = []
    for url in filelist:
        NetCDFs.append(
            da.delayed(ftp_x_download_spSubset)(
                fileurl=url,
                spatialbounds=spatialbounds,
                file_prefix=file_prefix,
                rename_timelatlong_names=rename_timelatlong_names,
                replace_file=replace_file))

    # run operations
    outputfiles = da.compute(NetCDFs)[0]

    # reset working directory
    os.chdir(homedir)
    return (outputfiles)
Esempio n. 4
0
def netcdf_to_ascii(homedir,
                    subdir,
                    source_directory,
                    mappingfile,
                    catalog_label,
                    meta_file,
                    temporal_resolution='D',
                    netcdfs=None,
                    variable_list=None):
    # initialize list of dataframe outputs
    outfiledict = {}

    # generate destination folder
    filedir = os.path.join(homedir, subdir)
    ogh.ensure_dir(filedir)

    # connect with collection of netcdfs
    if isinstance(netcdfs, type(None)):
        netcdfs = [
            os.path.join(source_directory, file)
            for file in os.listdir(source_directory) if file.endswith('.nc')
        ]
    ds_mf = xray.open_mfdataset(netcdfs, engine='netcdf4').sortby('TIME')

    # generate list of variables
    if not isinstance(variable_list, type(None)):
        ds_vars = variable_list.copy()
    else:
        ds_vars = [
            ds_var for ds_var in dict(ds_mf.variables).keys()
            if ds_var not in ['YEAR', 'MONTH', 'DAY', 'TIME', 'LAT', 'LON']
        ]

    # convert netcdfs to pandas.Panel API
    ds_pan = ds_mf.to_dataframe()[ds_vars]

    # read in gridded cells of interest
    maptable, nstation = ogh.mappingfileToDF(mappingfile,
                                             colvar=None,
                                             summary=False)

    # at each latlong of interest
    for ind, eachrow in maptable.iterrows():

        # generate ASCII time-series
        ds_df = ds_pan.loc[eachrow['LAT'],
                           eachrow['LONG_'], :].reset_index(drop=True,
                                                            level=[0, 1])

        # create file name
        outfilename = os.path.join(
            filedir, 'data_{0}_{1}'.format(eachrow['LAT'], eachrow['LONG_']))

        # save ds_df
        outfiledict[outfilename] = da.delayed(ds_df.to_csv)(
            path_or_buf=outfilename, sep='\t', header=False, index=False)

    # compute ASCII time-series files
    ProgressBar().register()
    outfiledict = da.compute(outfiledict)[0]

    # annotate metadata file
    meta_file[catalog_label] = dict(ds_mf.attrs)
    meta_file[catalog_label]['variable_list'] = list(np.array(ds_vars))
    meta_file[catalog_label]['delimiter'] = '\t'
    meta_file[catalog_label]['start_date'] = pd.Series(
        ds_mf.TIME).sort_values().iloc[0].strftime('%Y-%m-%d %H:%M:%S')
    meta_file[catalog_label]['end_date'] = pd.Series(
        ds_mf.TIME).sort_values().iloc[-1].strftime('%Y-%m-%d %H:%M:%S')
    meta_file[catalog_label]['temporal_resolution'] = temporal_resolution
    meta_file[catalog_label]['variable_info'] = dict(ds_mf.variables)

    # catalog the output files
    ogh.addCatalogToMap(outfilepath=mappingfile,
                        maptable=maptable,
                        folderpath=filedir,
                        catalog_label=catalog_label)
    os.chdir(homedir)
    return (list(outfiledict.keys()))
Esempio n. 5
0
 def test_ensuredir(self):
     path0 = os.getcwd()
     path1 = os.path.join(data_path, 'test_files')
     ogh.ensure_dir(path1)
     ogh.ensure_dir(path0)
     assert os.path.exists(path1)