예제 #1
0
def extract_eq_var_at_pressure_level(path=mri_path, var='vmrh2o', plevel=8000):
    import xarray as xr
    from aux_functions_strat import path_glob
    from aux_functions_strat import save_ncfile
    file = path_glob(path, '{}_*equatorial*.nc'.format(var))[0]
    var_ds = xr.load_dataset(file)
    ps_file = path_glob(path, 'ps_*equatorial*.nc')[0]
    ps = xr.load_dataset(ps_file)['ps']
    a, b, p0 = load_a_b_p0(path=path)
    plev = convert_hybrid_sigma_to_pressure(a, b, p0, ps)
    var_dts = []
    for dt in plev.time:
        var_dt = var_ds.sel(time=dt)
        pl = plev.sel(time=dt)
        var_dt['lev'] = pl
        var_dt = var_dt.sel(lev=plevel, method='nearest')
        var_dts.append(var_dt)
    var_plev = xr.concat(var_dts, 'time')
    var_at_plevel = var_plev.reset_coords(drop=True)[var]
    var_at_plevel.attrs['plevel'] = str(plevel) + 'Pa'
    yrmax = var_at_plevel.time.max().dt.year.item()
    yrmin = var_at_plevel.time.min().dt.year.item()
    filename = '{}_equatorial_{}_Pa_{}-{}.nc'.format(var, plevel, yrmin, yrmax)
    save_ncfile(var_at_plevel, path, filename)
    return var_at_plevel
예제 #2
0
def proc_era5(path, field, model_name):
    import xarray as xr
    import numpy as np
    from aux_functions_strat import path_glob
    # from aux_functions_strat import save_ncfile
    files = sorted(path_glob(path, 'era5_{}_*.nc'.format(field)))
    files = [x for x in files if 'mm' not in x.as_posix()]
    print(files)
    if 'single' in model_name:
        era5 = xr.open_mfdataset(files)
        era5 = concat_era5T(era5)
        start = era5.time.dt.year[0].values.item()
        end = era5.time.dt.year[-1].values.item()
        filename = '_'.join(
            ['era5',
             str(field), '4Xdaily',
             str(start) + '-' + str(end)])
        filename += '.nc'
        era5.to_netcdf(path / filename)
        print('Done!')
    elif 'pressure' in model_name:
        era5 = xr.open_mfdataset(files)
        era5 = concat_era5T(era5)
        start = era5.time.dt.year[0].values.item()
        end = era5.time.dt.year[-1].values.item()
        years = np.arange(start, end + 1).tolist()
        for year in years:
            era5_yearly = era5.sel(time=str(year))
            filename = '_'.join(['era5', str(field), '4Xdaily', str(year)])
            filename += '.nc'
            era5_yearly.to_netcdf(path / filename)
        print('Done!')
    return
예제 #3
0
def load_one_gridsearchcv_object(path=ml_path, model_name='SVM', verbose=True):
    """load one gridsearchcv obj with model_name and features and run read_one_gridsearchcv_object"""
    from aux_functions_strat import path_glob
    import joblib
    # first filter for model name:
    if verbose:
        print('loading GridsearchCVs results for {} model'.format(model_name))
    model_files = path_glob(path, 'GRSRCHCV_*.pkl')
    model_files = [x for x in model_files if model_name in x.as_posix()]
    # now select features:
    # if verbose:
    #     print('loading GridsearchCVs results with {} features'.format(features))
    # model_features = [x.as_posix().split('/')[-1].split('_')[3] for x in model_files]
    # feat_ind = get_feature_set_from_list(model_features, features)
    # also get the test ratio and seed number:
    # if len(feat_ind) > 1:
    #     if verbose:
    #         print('found {} GR objects.'.format(len(feat_ind)))
    #     files = sorted([model_files[x] for x in feat_ind])
    #     outer_splits = [x.as_posix().split('/')[-1].split('.')[0].split('_')[-3] for x in files]
    #     grs = [joblib.load(x) for x in files]
    #     best_dfs = [read_one_gridsearchcv_object(x) for x in grs]
    #     di = dict(zip(outer_splits, best_dfs))
    #     return di
    # else:
    # file = model_files[feat_ind]
    # seed = file.as_posix().split('/')[-1].split('.')[0].split('_')[-1]
    # outer_splits = file.as_posix().split('/')[-1].split('.')[0].split('_')[-3]
    # load and produce best_df:
    gr = joblib.load(model_files[0])
    best_df = read_one_gridsearchcv_object(gr)
    return best_df
예제 #4
0
def get_all_swoosh_files(path):
    import xarray as xr
    import pandas as pd
    from aux_functions_strat import path_glob
    """Reads all swoosh dataset downloaded from swoosh website and write
    them as xarray netcdf files, basically adding datetime index and importing
    just the 'combined' fields."""
    filenames = path_glob(path, 'swoosh-v02.6*.nc')
    datasets = []
    for file in filenames:
        dataset = xr.open_dataset(file, decode_times=False)
        dataset.attrs['filename'] = file.as_posix().split('/')[-1]
        datasets.append(dataset)
        print('importing {}'.format(file.as_posix().split('/')[-1]))
    data_dict = {}
    for dataset in datasets:
        combined_list = [
            name for name in dataset.data_vars if 'combined' in name.lower()
        ]
        dataset = dataset[combined_list]
        time = pd.date_range('1984-01-01',
                             freq='MS',
                             periods=len(dataset.time))
        dataset['time'] = time
        if 'latpress' in dataset.attrs['filename'].split('-'):
            dname = '-'.join(dataset.attrs['filename'].split('-')[-3:-1])
        elif 'lonlatpress' in dataset.attrs['filename'].split('-'):
            dname = '-'.join(dataset.attrs['filename'].split('-')[-4:-1])
        elif 'lattheta' in dataset.attrs['filename'].split('-'):
            dname = '-'.join(dataset.attrs['filename'].split('-')[-3:-1])
        data_dict[dname] = dataset
        data_dict[dname].to_netcdf(path / 'swoosh_{}.nc'.format(dname))
        print('Saved {} to nc file, in {}'.format(dname, path))
    print('Done!')
    return
예제 #5
0
def get_HP_params_from_optimized_model(path=ml_path, model='SVM'):
    import joblib
    from aux_functions_strat import path_glob
    files = path_glob(path, 'GRSRCHCV_*.pkl')
    file = [x for x in files if model in x.as_posix()][0]
    gr = joblib.load(file)
    df = read_one_gridsearchcv_object(gr)
    return df.iloc[0][:-2].to_dict()
예제 #6
0
def produce_eq_means_from_3D_MRI(path=mri_path,
                                 var='vmrh2o',
                                 lat_slice=[-30, 30]):
    from aux_functions_strat import path_glob
    import xarray as xr
    from aux_functions_strat import save_ncfile
    from aux_functions_strat import lat_mean
    print('producing equatorial means for {}.'.format(var))
    filestr = 'monthly_MRI-ESM1r1_refC2_r1i1p1'
    files = sorted(path_glob(path, '{}_{}*.nc'.format(var, filestr)))
    dsl = [xr.open_dataset(x) for x in files]
    eqs = []
    for ds in dsl:
        eq_var = ds[var].mean('lon', keep_attrs=True)
        eq_var = lat_mean(eq_var.sel(lat=slice(*lat_slice)))
        eqs.append(eq_var)
    eq_da = xr.concat(eqs, 'time')
    eq_da = eq_da.sortby('time')
    yrmax = eq_da.time.max().dt.year.item()
    yrmin = eq_da.time.min().dt.year.item()
    filename = '{}_{}_equatorial_{}-{}.nc'.format(var, filestr, yrmin, yrmax)
    save_ncfile(eq_da, path, filename)
    return
예제 #7
0
def siphon_igra2_to_xarray(station, path=sound_path,
                           fields=['temperature', 'pressure'],
                           times=['1984-01-01', '2019-12-31'], derived=False):
    from siphon.simplewebservice.igra2 import IGRAUpperAir
    import pandas as pd
    import numpy as np
    import xarray as xr
    from urllib.error import URLError
    import logging
    from aux_functions_strat import path_glob

    logger = logging.getLogger('strato_sounding')
#    logging.basicConfig(filename=path / 'siphon.log', level=logging.INFO,
#                        format='%(asctime)s  %(levelname)-10s %(processName)s  %(name)s %(message)s')
    # check for already d/l files:
    files = path_glob(path, '*_derived.nc')
    names = [x.as_posix().split('/')[-1].split('.')[0] for x in files]
    if station in names:
        logging.warning('station {} already downloaded, skipping'.format(station))
        return '1'
    logger.info('fields chosen are: {}'.format(fields))
    logger.info('dates chosen are: {}'.format(times))
    dates = pd.to_datetime(times)
    dates = [x.to_pydatetime() for x in dates]
    logger.info('getting {} from IGRA2...'.format(station))
    try:
        df, header = IGRAUpperAir.request_data(dates, station, derived=derived)
    except URLError:
        logger.warning('file not found using siphon.skipping...')
        return '2'
    header = header[header['number_levels'] > 25]  # enough resolution
    dates = header['date'].values
    logger.info('splicing dataframe and converting to xarray dataset...')
    ds_list = []
    for date in dates:
        dff = df[fields].loc[df['date'] == date]
        # release = dff.iloc[0, 1]
        dss = dff.to_xarray()
        # dss.attrs['release'] = release
        ds_list.append(dss)
    max_ind = np.max([ds.index.size for ds in ds_list])
    vars_ = np.nan * np.ones((len(dates), len(fields), max_ind))
    for i, ds in enumerate(ds_list):
        size = ds[[x for x in ds.data_vars][0]].size
        vars_[i, :, 0:size] = ds.to_array().values
    Vars = xr.DataArray(vars_, dims=['time', 'var', 'point'])
    Vars['time'] = dates
    Vars['var'] = fields
    ds = Vars.to_dataset(dim='var')
    for field in fields:
        ds[field].attrs['units'] = df.units[field]
    ds.attrs['site_id'] = header.loc[:, 'site_id'].values[0]
    ds.attrs['lat'] = header.loc[:, 'latitude'].values[0]
    ds.attrs['lon'] = header.loc[:, 'longitude'].values[0]
    logger.info('Done!')
    if derived:
        filename = station + '_derived' + '.nc'
    else:
        filename = station + '_not_derived' + '.nc'
    comp = dict(zlib=True, complevel=9)  # best compression
    encoding = {var: comp for var in ds.data_vars}
    ds.to_netcdf(path / filename, 'w', encoding=encoding)
    logger.info('saved {} to {}.'.format(filename, path))
    return ds
예제 #8
0
def calc_cold_point_from_sounding(path=sound_path, times=['1993', '2017'],
                                  plot=True, return_mean=True,
                                  return_anom=True):
    import xarray as xr
#     import seaborn as sns
    from aux_functions_strat import deseason_xr
    from aux_functions_strat import path_glob

    def return_one_station(file_obj, name, times):
        print('proccessing station {}:'.format(name))
        station = xr.open_dataset(file)
        if times is None:
            first = station['time'].min().dt.strftime('%Y-%m')
            last = station['time'].max().dt.strftime('%Y-%m')
            times = [first, last]
        station = station.sel(time=slice(times[0], times[1]))
        # take Majuro station data after 2011 only nighttime:
        if 'RMM00091376' in name:
            print('taking just the midnight soundings after 2011 for {}'.format(name))
            station_after_2011 = station.sel(
                    time=slice('2011', times[1])).where(
                            station['time.hour'] == 00)
            station_before_2011 = station.sel(time=slice(times[0], '2010'))
            station = xr.concat([station_before_2011, station_after_2011],
                                'time')
        # slice with cold point being between 80 and 130 hPa
        cold = station['temperature'].where(station.pressure <= 120).where(
                station.pressure >= 80).min(
                dim='point')
        # take the min and ensure it is below -72 degC:
        cold = station.temperature.min('point')
        cold = cold.where(cold < -72)
        cold.attrs = station.attrs

        try:
            cold = cold.resample(time='MS').mean()
        except IndexError:
            return
        if return_anom:
            anom = deseason_xr(cold, how='mean')
            anom.name = name
            return anom
        cold.name = name
        return cold

    da_list = []
    for file in path_glob(path, '*_derived.nc'):
        if file.is_dir():
            continue
        name = file.as_posix().split('/')[-1].split('.')[0]
        da = return_one_station(file, name, times)
        da_list.append(da)
#        argmin_point = station.temperature.argmin(dim='point').values
#        p_points = []
#        for i, argmin in enumerate(argmin_point):
#            p = station.pressure.sel(point=argmin).isel(time=i).values.item()
#            p_points.append(p)
#        sns.distplot(p_points, bins=100, color='c',
#                     label='pressure_cold_points_' + name)
    ds = xr.merge(da_list)
    da = ds.to_array(dim='name')
    if return_anom:
        da.name = 'radiosonde_cold_point_anomalies'
    else:
        da.name = 'radiosonde_cold_point'
#     mean_da = da.where(np.abs(da) < 3).mean('name')
    mean_da = da.mean('name')
    if plot:
        da.to_dataset('name').to_dataframe().plot()
    if return_mean:
        return mean_da
    else:
        return da
    return da