Ejemplo n.º 1
0
        usv_min_lat, usv_max_lat = ds_usv.lat.min().data - .5, ds_usv.lat.max(
        ).data + .5
        cond = (xlon >= usv_min_lon) & (xlon <= usv_max_lon)
        sub_lon = xlon.where(cond)
        cond = (xlat >= usv_min_lat) & (xlat <= usv_max_lat)
        sub_lat = xlat.where(cond)

        ph0 = var_data.phony_dim_0
        ph1 = var_data.phony_dim_1
        tem_time = sat_time
        ds = xr.Dataset(
            {
                'time': (['phony_dim_0'], tem_time),
                'tb': (['phony_dim_0', 'phony_dim_1'], var_data.data),
                'lat': (['phony_dim_0', 'phony_dim_1'], sub_lat.data),
                'lon': (['phony_dim_0', 'phony_dim_1'], sub_lon.data)
            },
            coords={
                'phony_dim_0': (['phony_dim_0'], ph0),
                'phony_dim_1': (['phony_dim_1'], ph1)
            })
        ds2 = ds.stack(z=('phony_dim_0', 'phony_dim_1')).reset_index('z')
        # drop nan
        #        ds_drop = ds2.where(np.isfinite(ds2.lon), drop=True)
        ds_dropa = ds2.where(np.isfinite(ds2.lon), drop=True)
        ds_drop = ds_dropa.where(np.isfinite(ds_dropa.lat), drop=True)

        lats = ds_drop.lat.data
        lons = ds_drop.lon.data
        inputdata = list(zip(lons.ravel(), lats.ravel()))
        tree = spatial.KDTree(inputdata)
Ejemplo n.º 2
0
def test_custom_criteria():
    my_custom_criteria = {
        "ssh": {
            "standard_name": "sea_surface_elev*|sea_surface_height",
            "name": "sea_surface_elevation$",  # variable name
        },
        "salt": {
            "standard_name": "salinity",
            "name": "sal*",
        },
        "wind_speed": {
            "standard_name": "wind_speed$",
        },
    }
    my_custom_criteria2 = {"temp": {"name": "temperature"}}
    my_custom_criteria_list = [my_custom_criteria, my_custom_criteria2]
    my_custom_criteria_tuple = (my_custom_criteria, my_custom_criteria2)

    cf_xarray.set_options(custom_criteria=my_custom_criteria)

    # Match by name regex match
    ds = xr.Dataset()
    ds["salinity"] = ("dim", np.arange(10))
    assert_identical(ds.cf["salt"], ds["salinity"])

    # Match by standard_name regex match
    ds = xr.Dataset()
    ds["elev"] = ("dim", np.arange(10), {
        "standard_name": "sea_surface_elevBLAH"
    })
    assert_identical(ds.cf["ssh"], ds["elev"])

    # Match by standard_name exact match
    ds = xr.Dataset()
    ds["salinity"] = ("dim", np.arange(10), {"standard_name": "salinity"})
    assert_identical(ds.cf["salt"], ds["salinity"])

    # If not exact name, won't match
    ds = xr.Dataset()
    ds["sea_surface_elevation123"] = ("dim", np.arange(10))
    # Since this will not match, this should error
    with pytest.raises(KeyError):
        ds.cf["ssh"]

    # will select only one variable here since exact match
    ds = xr.Dataset()
    ds["winds"] = ("dim", np.arange(10), {"standard_name": "wind_speed"})
    ds["gusts"] = ("dim", np.arange(10), {
        "standard_name": "wind_speed_of_gust"
    })
    assert_identical(ds.cf["wind_speed"], ds["winds"])

    # Match by exact name
    ds = xr.Dataset()
    ds["sea_surface_elevation"] = ("dim", np.arange(10))
    ds["sea_surface_height"] = (
        "dim",
        np.arange(10),
        {
            "standard_name": "sea_surface_elevBLAH"
        },
    )
    # Since there are two variables, this should error
    with pytest.raises(KeyError):
        ds.cf["ssh"]
    # But the following should work instead given the two ssh variables
    assert_identical(ds.cf[["ssh"]],
                     ds[["sea_surface_elevation", "sea_surface_height"]])

    # test criteria list of dicts
    with cf_xarray.set_options(custom_criteria=my_custom_criteria_list):
        ds = xr.Dataset()
        ds["temperature"] = ("dim", np.arange(10))
        assert_identical(ds.cf["temp"], ds["temperature"])

    # test criteria tuple of dicts
    with cf_xarray.set_options(custom_criteria=my_custom_criteria_tuple):
        ds = xr.Dataset()
        ds["temperature"] = ("dim", np.arange(10))
        assert_identical(ds.cf["temp"], ds["temperature"])
Ejemplo n.º 3
0
def test_attributes():
    actual = airds.cf.sizes
    expected = {
        "X": 50,
        "Y": 25,
        "T": 4,
        "longitude": 50,
        "latitude": 25,
        "time": 4
    }
    assert actual == expected

    assert popds.cf.sizes == {"X": 30, "Y": 20}

    with pytest.raises(AttributeError):
        multiple.cf.sizes

    assert airds.cf.chunks == {}

    expected = {
        "X": (50, ),
        "Y": (5, 5, 5, 5, 5),
        "T": (4, ),
        "longitude": (50, ),
        "latitude": (5, 5, 5, 5, 5),
        "time": (4, ),
    }
    assert airds.chunk({"lat": 5}).cf.chunks == expected

    with pytest.raises(AttributeError):
        airds.da.cf.chunks

    airds2 = airds.copy(deep=True)
    airds2.lon.attrs = {}
    actual = airds2.cf.sizes
    expected = {"lon": 50, "Y": 25, "T": 4, "latitude": 25, "time": 4}
    assert actual == expected

    actual = popds.cf.data_vars
    expected = {
        "sea_water_x_velocity": popds.cf["UVEL"],
        "sea_water_potential_temperature": popds.cf["TEMP"],
    }
    assert_dicts_identical(actual, expected)

    actual = multiple.cf.data_vars
    expected = dict(multiple.data_vars)
    assert_dicts_identical(actual, expected)

    # check that data_vars contains ancillary variables
    assert_identical(anc.cf.data_vars["specific_humidity"],
                     anc.cf["specific_humidity"])

    # clash between var name and "special" CF name
    # Regression test for #126
    data = np.random.rand(4, 3)
    times = pd.date_range("2000-01-01", periods=4)
    locs = [30, 60, 90]
    coords = [("time", times, {"axis": "T"}), ("space", locs)]
    foo = xr.DataArray(data, coords, dims=["time", "space"])
    ds1 = xr.Dataset({"T": foo})
    assert_identical(ds1.cf.data_vars["T"], ds1["T"])

    # multiple latitudes but only one latitude data_var
    ds = popds.copy(deep=True)
    for var in ["ULAT", "TLAT"]:
        ds[var].attrs["standard_name"] = "latitude"
    ds = ds.reset_coords("ULAT")
    assert_identical(ds.cf.data_vars["latitude"], ds.cf["ULAT"])
Ejemplo n.º 4
0
                i] + '/MONTH/' + variable_in + '/' + list_histo[
                    i] + '_' + variable_in + '_' + str(year) + m + '.nc'
            TAS = xr.open_dataset(filename)
            # lecture de la précipitation
            filename = rep1 + path_histo[i] + '/MONTH/preacc/' + list_histo[
                i] + '_preacc_' + str(year) + m + '.nc'
            PR = xr.open_dataset(filename)
            # Les chams de temperature et de précipitation peuvent ne pas avoir la même dimension temporelle
            # Pour contourner ce pbm, on va créer un xarray.dataset avec ls deux variables mais uniquement
            # avec la dimension temporelle de la précipitation

            ds = xr.Dataset(
                {
                    'temperature': (['time', 'y', 'x'], TAS.tasmax.values),
                    'precipitation': (['time', 'y', 'x'], PR.preacc.values)
                },
                coords={
                    'lon': (['y', 'x'], PR.lon),
                    'lat': (['y', 'x'], PR.lat),
                    'time': PR.time
                })

            # On va détecter les journées avec précipitation et relever la température associée
            PR_w_precip = ds.precipitation.where((ds.precipitation >= 1))
            # On va détecter les journées avec précipitation et relever la température associée
            TT_w_precip = ds.temperature.where((ds.precipitation >= 1))

            # Etape ou on peut sauver un champs netcdf intermédiaire
            # PR_w_precip.to_netcdf('method1.nc'

            # On filtre les points de grille au-dessus du bassin versant
            PR_w_precip_BV = PR_w_precip.where(MASK.sftlf == 100)
Ejemplo n.º 5
0
def pd_dataframe_2_ragged(dataframe,
                          groupby,
                          var_names_,
                          coord_names_,
                          coord_var_names_,
                          var_types_=None,
                          coord_types_=None,
                          coord_var_types_=None):
    """
    Convert profile data(x, y, z, t) to ragged format.

    Ragged format is as described by CF conventions. Default
    types of coordinates and variables are `numpy.float64`.

    Parameters
    ----------
    dataframe: pd.DataFrame
        Columns are a list of obs and coords.
    groupby: list of str
        Input col names to use for grouping sets of obs.
    var_names_: list of str
        Input col names to extract.
    coord_names : list of str
        Output coord names in order matching `groupby`.
    coord_var_names_: list of str
        Input col names of coords same size as obs.
    var_types_: list of objects
        Variable types with order matching `var_names_`.
    coord_types_: list of objects
        Coord types with order matching `coord_names_`.
    coord_var_types_: list of objects
        Coord variable types with order matching `coord_var_names_`.

    Returns
    -------
    xr.Dataset
        Input dataframe in ragged format.

    """
    # Manage default types
    if var_types_ is None:
        var_types_ = [np.float64 for _ in var_names_]
    if coord_types_ is None:
        coord_types_ = [np.float64 for _ in coord_names_]
    if coord_var_types_ is None:
        coord_var_types_ = [np.float64 for _ in coord_var_names_]

    # Select T_max by profile
    gp = dataframe.groupby(groupby)
    index_ = gp.first().index

    # Set up master dimension
    profiles_ = np.arange(index_.size)

    # Set up coordinates
    coords_ = dict()
    for c_, t_ in zip(coord_names_, coord_types_):
        coords_[c_] = ('profiles', np.empty(index_.size, dtype=t_))

    # Set up variables
    vars_ = dict()
    for v_, t_ in zip(var_names_, var_types_):
        vars_[v_] = (['obs'],
                     np.empty(dataframe[v_].values.size, dtype=np.float64))
        vars_['%s_row_size' % v_] = (['profiles'],
                                     np.zeros(index_.size, dtype=np.int64))

    # Set up coordinate variables
    coord_vars_ = dict()
    if coord_var_names_:
        for c_, t_ in zip(coord_var_names_, coord_var_types_):
            coord_vars_[c_] = ('obs',
                               np.empty(dataframe[v_].values.size,
                                        dtype=np.float64))
            vars_['%s_row_size' % c_] = (['profiles'],
                                         np.zeros(index_.size, dtype=np.int64))

    # Loop over groups
    for i_, name_ in enumerate(index_):

        # Find individual profile
        profile = gp.get_group(name_).copy(deep=True)

        # Reintegrate coordinates to this profile
        for j_, c_ in enumerate(coords_):
            coords_[c_][1][i_] = name_[j_]

        # Get downcast size
        rs_ = profile.shape[0]

        # Store coordinate variable data and size
        for c_ in coord_var_names_:
            row_start_ = np.nansum(vars_['%s_row_size' % c_][1])
            coord_vars_[c_][1][row_start_:row_start_ +
                               rs_] = profile[c_].values
            vars_['%s_row_size' % c_][1][i_] = rs_

        # Store variable data and size
        for v_ in var_names_:
            row_start_ = np.nansum(vars_['%s_row_size' % v_][1])
            vars_[v_][1][row_start_:row_start_ + rs_] = profile[v_].values
            vars_['%s_row_size' % v_][1][i_] = rs_

    # Form xarray dataset
    dataset = xr.Dataset(vars_,
                         coords={
                             'profiles': profiles_,
                             **coords_,
                             **coord_vars_
                         })

    return dataset
Ejemplo n.º 6
0
    def make_xarray_dataset(data, atts):
        """Short summary.

        Parameters
        ----------
        data : type
            Description of parameter `data`.
        atts : type
            Description of parameter `atts`.

        Returns
        -------
        type
            Description of returned object.

        """
        from numpy import array, ndarray
        # altitude variables
        alt = data['ALT'][:].squeeze()
        altvars = [
            'AirND', 'AirNDUncert', 'ChRange', 'Press', 'Temp', 'TempUncert',
            'PressUncert'
        ]
        # time variables
        tseries = pd.Series(data["TIME_MID_UT_UNIX"][:].squeeze())
        time = pd.Series(pd.to_datetime(tseries, unit='ms'), name='time')
        # all other variables
        ovars = [
            'O3MR', 'O3ND', 'O3NDUncert', 'O3MRUncert', 'O3NDResol',
            'Precision'
        ]

        dataset = xr.Dataset()
        dataset['z'] = (('z'), alt)
        dataset['time'] = (('time'), time)
        dataset['x'] = (('x'), [0])
        dataset['y'] = (('y'), [0])
        for i in ovars:
            if data[i].shape == (len(alt), len(time)):
                dataset[i] = (('z', 'time'), data[i][:])
            elif data[i].shape == (len(alt), 1):
                dataset[i] = (('z'), data[i][:].squeeze())
            else:
                dataset[i] = (('time'), data[i][:].squeeze())
            dataset[i] = dataset[i].where(dataset[i] > -990)
        for i in altvars:
            # print(i)
            dataset[i] = (('z'), data[i][:].squeeze())

        for i in list(atts.attrs.keys()):
            # print(type(atts.attrs[i]))
            if isinstance(atts.attrs[i], list) or isinstance(
                    atts.attrs[i], ndarray):
                # print('here')
                dataset.attrs[i] = atts.attrs[i][0]
            else:
                dataset.attrs[i] = atts.attrs[i]

        # print(dataset)
        a, b = dataset.Location_Latitude.decode('ascii').split()
        if b == 'S':
            latitude = -1 * float(a)
        else:
            latitude = float(a)
        a, b = dataset.Location_Longitude.decode('ascii').split()
        if b == 'W':
            longitude = -1 * float(a)
        else:
            longitude = float(a)
        # dataset = dataset.expand_dims('x')
        # dataset = dataset.expand_dims('y')
        dataset.coords['latitude'] = (('y', 'x'), array(latitude).reshape(
            1, 1))
        dataset.coords['longitude'] = (('y', 'x'), array(longitude).reshape(
            1, 1))
        return dataset
        except ValueError:
            b = np.nan
        return b

    year_on_ds = ds18_present.salem.lookup_transform(ls, grid=g.grid, lut=lut, method=dyear)

    # deforestation before 2009: set to 0
    #pdb.set_trace()
    #lst_on_ds[np.where(year_on_ds<=9)]=0



    ds = xr.Dataset({'topo': (['lat', 'lon'], srtm_on_ds),
                     't' : (['lat', 'lon'], t_on_ds),
                     't2': (['lat', 'lon'], t2_on_ds),
                     'deforestation': (['lat', 'lon'], lst_on_ds),
                     'forest2000': (['lat', 'lon'], vegfra_on_ds),
                     'dyear': (['lat', 'lon'], year_on_ds),
                     },
                    coords=ds18_present.coords)

    ds.to_netcdf(path + 'scatter.nc')

else:
    srfc = xr.open_dataset(path + 'scatter.nc')
    #
    # coord = [-8, -5.5, 5.1, 8, 5.5, 6, 7.9, 8]
    # srfc = srfc.sel(lon=slice(coord[0], coord[1]), lat=slice(coord[2], coord[3]))

    srtm_on_ds = srfc['topo']
    t_on_ds = srfc['t']
    t2_on_ds = srfc['t2']
Ejemplo n.º 8
0
def tasseled_cap(sensor_data,
                 tc_bands=['greenness', 'brightness', 'wetness'],
                 drop=True):
    """   
    Computes tasseled cap wetness, greenness and brightness bands from a six
    band xarray dataset, and returns a new xarray dataset with old bands
    optionally dropped.
    
    Coefficients are from Crist and Cicone 1985 "A TM Tasseled Cap equivalent 
    transformation for reflectance factor data"
    https://doi.org/10.1016/0034-4257(85)90102-6
    
    Last modified: June 2018
    Authors: Robbi Bishop-Taylor, Bex Dunn
    
    :attr sensor_data: input xarray dataset with six Landsat bands
    :attr tc_bands: list of tasseled cap bands to compute
    (valid options: 'wetness', 'greenness','brightness')
    :attr drop: if 'drop = False', return all original Landsat bands
    :returns: xarray dataset with newly computed tasseled cap bands
    """

    # Copy input dataset
    output_array = sensor_data.copy(deep=True)

    # Coefficients for each tasseled cap band
    wetness_coeff = {
        'blue': 0.0315,
        'green': 0.2021,
        'red': 0.3102,
        'nir': 0.1594,
        'swir1': -0.6806,
        'swir2': -0.6109
    }

    greenness_coeff = {
        'blue': -0.1603,
        'green': -0.2819,
        'red': -0.4934,
        'nir': 0.7940,
        'swir1': -0.0002,
        'swir2': -0.1446
    }

    brightness_coeff = {
        'blue': 0.2043,
        'green': 0.4158,
        'red': 0.5524,
        'nir': 0.5741,
        'swir1': 0.3124,
        'swir2': 0.2303
    }

    # Dict to use correct coefficients for each tasseled cap band
    analysis_coefficient = {
        'wetness': wetness_coeff,
        'greenness': greenness_coeff,
        'brightness': brightness_coeff
    }

    # For each band, compute tasseled cap band and add to output dataset
    for tc_band in tc_bands:
        # Create xarray of coefficient values used to multiply each band of input
        coeff = xr.Dataset(analysis_coefficient[tc_band])
        sensor_coeff = sensor_data * coeff

        # Sum all bands
        output_array[tc_band] = sensor_coeff.blue + sensor_coeff.green + \
                                sensor_coeff.red + sensor_coeff.nir + \
                                sensor_coeff.swir1 + sensor_coeff.swir2

    # If drop = True, remove original bands
    if drop:
        bands_to_drop = list(sensor_data.data_vars)
        output_array = output_array.drop(bands_to_drop)

    return output_array
Ejemplo n.º 9
0
def cdf_to_xarray(filename,
                  to_datetime=False,
                  to_unixtime=False,
                  fillval_to_nan=False):

    # Convert the CDF file into a series of dicts, so we don't need to keep reading the file
    global_attributes, all_variable_attributes, all_variable_data, all_variable_properties = _convert_cdf_to_dicts(
        filename, to_datetime=to_datetime, to_unixtime=to_unixtime)

    created_vars, depend_dimensions = _generate_xarray_data_variables(
        all_variable_data, all_variable_attributes, all_variable_properties,
        fillval_to_nan)

    label_variables = _discover_label_variables(all_variable_attributes,
                                                all_variable_properties,
                                                all_variable_data)
    uncertainty_variables = _discover_uncertainty_variables(
        all_variable_attributes)

    # Determine which dimensions are coordinates vs actual data
    # Variables are considered coordinates if one of the other dimensions depends on them.
    # Otherwise, they are considered data coordinates.
    created_coord_vars = {}
    created_data_vars = {}
    for var_name in created_vars:
        if var_name in label_variables:
            # If these are label variables, we'll deal with these later when the DEPEND variables come up
            continue
        elif (var_name in depend_dimensions) or (var_name + '_dim'
                                                 in depend_dimensions):
            # If these are DEPEND variables, add them to the DataSet coordinates
            created_coord_vars[var_name] = created_vars[var_name]
            # Check if these coordinate variable have associated labels
            for lab in label_variables:
                if label_variables[lab] == var_name:  # Found one!
                    if len(created_vars[lab].dims) == len(
                            created_vars[var_name].dims):
                        if created_vars[lab].size != created_vars[
                                var_name].size:
                            print(
                                f"Warning, label variable {lab} does not match the expected dimension sizes of {var_name}"
                            )
                        else:
                            created_vars[lab].dims = created_vars[
                                var_name].dims
                    else:
                        created_vars[lab].dims = created_vars[var_name].dims[
                            -1]
                    # Add the labels to the coordinates as well
                    created_coord_vars[lab] = created_vars[lab]
        elif var_name in uncertainty_variables:
            # If there is an uncertainty variable, link it to the uncertainty along a dimension
            if created_vars[var_name].size == created_vars[
                    uncertainty_variables[var_name]].size:
                created_vars[var_name].dims = created_vars[
                    uncertainty_variables[var_name]].dims
                created_coord_vars[var_name] = created_vars[var_name]
            else:
                created_data_vars[var_name] = created_vars[var_name]
        else:
            created_data_vars[var_name] = created_vars[var_name]

    # Create the XArray DataSet Object!
    return xr.Dataset(data_vars=created_data_vars,
                      coords=created_coord_vars,
                      attrs=global_attributes)
Ejemplo n.º 10
0
def calc_nliw_params(h5file, zmin, dz, mode=0):

    # Lload the data
    #data,time, rho, depth, rho_std, z_std, rho_mu = load_density_h5(h5file)
    data, time, rho_std, z_std, rho_mu = load_density_h5(h5file)
    nparams, nt, ntrace = data[:].shape

    zout = np.arange(0, zmin, -dz)

    # Calculate c and alpha
    samples = ntrace
    alpha_ens = np.zeros((nt, samples))
    c_ens = np.zeros((nt, samples))

    rand_loc = np.random.randint(0, ntrace, samples)

    for tstep in tqdm(range(0, nt)):
        #if (tstep%20==0):
        #    print('%d of %d...'%(tstep,nt))
        for ii in range(samples):

            if nparams == 4:
                rhotmp = single_tanh(data[:, tstep, rand_loc[ii]],
                                     zout / z_std)
            elif nparams == 6:
                rhotmp = double_tanh(data[:, tstep, rand_loc[ii]],
                                     zout / z_std)
            elif nparams == 7:
                rhotmp = double_tanh_7(data[:, tstep, rand_loc[ii]],
                                       zout / z_std)

            # Need to scale rho

            rhotmp = rhotmp * rho_std + rho_mu

            N2 = -9.81 / 1000 * np.gradient(rhotmp, -dz)

            phi, cn = isw.iwave_modes(N2, dz)

            phi_1 = phi[:, mode]
            phi_1 = phi_1 / np.abs(phi_1).max()
            phi_1 *= np.sign(phi_1.sum())

            alpha = isw.calc_alpha(phi_1, cn[mode], N2, dz)

            alpha_ens[tstep, ii] = alpha
            c_ens[tstep, ii] = cn[mode]
            #mykdv = kdv.KdV(rhotmp,zout)

    # Export to an xarray data set
    # Create an xray dataset with the output
    dims2 = (
        'time',
        'ensemble',
    )
    #dims2a = ('time','depth',)
    dims3 = ('params', 'time', 'ensemble')

    #time = rho.time.values
    #time = range(nt)
    coords2 = {'time': time, 'ensemble': range(ntrace)}
    #coords2a = {'time':time, 'depth':depth[:,0]}
    coords3 = {
        'time': time,
        'ensemble': range(ntrace),
        'params': range(nparams)
    }

    #rho = xr.DataArray(rho.T,
    #    coords=coords2a,
    #    dims=dims2a,
    #    attrs={'long_name':'', 'units':''},
    #    )

    cn_da = xr.DataArray(
        c_ens,
        coords=coords2,
        dims=dims2,
        attrs={
            'long_name': '',
            'units': ''
        },
    )

    alpha_da = xr.DataArray(
        alpha_ens,
        coords=coords2,
        dims=dims2,
        attrs={
            'long_name': '',
            'units': ''
        },
    )

    beta_da = xr.DataArray(
        data,
        coords=coords3,
        dims=dims3,
        attrs={
            'long_name': '',
            'units': ''
        },
    )

    dsout = xr.Dataset({
        'cn': cn_da,
        'alpha': alpha_da,
        'beta': beta_da,
    })

    return dsout
Ejemplo n.º 11
0
 def mixed_reduce(grdds, dim=None):
     tas1 = grdds.tas1.mean(dim=dim)
     tas0 = grdds.tas0 / grdds.tas0.mean(dim=dim)
     tas1.attrs["_group_apply_reshape"] = True
     return xr.Dataset(data_vars={"tas1_mean": tas1, "norm_tas0": tas0})
Ejemplo n.º 12
0
def handle(infiles, tables, user_input_path, **kwargs):
    """
    Transform MPASSI timeMonthly_avg_iceAreaCell and
    timeMonthly_avg_snowVolumeCell into CMIP.sisnthick

    Parameters
    ----------
    infiles : dict
        a dictionary with namelist, mesh and time series file names

    tables : str
        path to CMOR tables

    user_input_path : str
        path to user input json file

    Returns
    -------
    varname : str
        the name of the processed variable after processing is complete
    """
    msg = 'Starting {name}'.format(name=__name__)
    logging.info(msg)

    meshFileName = infiles['MPAS_mesh']
    mappingFileName = infiles['MPAS_map']
    timeSeriesFiles = infiles['MPASSI']

    dsMesh = xarray.open_dataset(meshFileName, mask_and_scale=False)
    cellMask2D, _ = mpas.get_cell_masks(dsMesh)

    variableList = ['timeMonthly_avg_iceAreaCell',
                    'timeMonthly_avg_snowVolumeCell',
                    'xtime_startMonthly', 'xtime_endMonthly']

    ds = xarray.Dataset()
    with mpas.open_mfdataset(timeSeriesFiles, variableList) as dsIn:
        ds[VAR_NAME] = dsIn.timeMonthly_avg_snowVolumeCell
        ds['siconc'] = dsIn.timeMonthly_avg_iceAreaCell
        ds = mpas.add_time(ds, dsIn)
        ds.compute()

    ds = mpas.add_si_mask(ds, cellMask2D, ds.siconc)
    ds['cellMask'] = ds.siconc * ds.cellMask
    ds.compute()

    ds = mpas.remap(ds, mappingFileName)

    mpas.setup_cmor(VAR_NAME, tables, user_input_path, component='seaice')

    # create axes
    axes = [{'table_entry': 'time',
             'units': ds.time.units},
            {'table_entry': 'latitude',
             'units': 'degrees_north',
             'coord_vals': ds.lat.values,
             'cell_bounds': ds.lat_bnds.values},
            {'table_entry': 'longitude',
             'units': 'degrees_east',
             'coord_vals': ds.lon.values,
             'cell_bounds': ds.lon_bnds.values}]

    try:
        mpas.write_cmor(axes, ds, VAR_NAME, VAR_UNITS)
    except Exception:
        return ""
    return VAR_NAME
Ejemplo n.º 13
0
 allfiles = filter(
     lambda file: fnmatch.fnmatch(file, experiment + '_*.txt'),
     os.listdir(directory))
 allfiles = [directory + '/' + name for name in allfiles]
 allfiles.sort()
 # From the file name, extract the independent variables
 dimensions = {}
 for file in allfiles:
     dimensions = mergeDicts(dimensions, extractCoordinates(file))
 dimensions = {k: sorted(v) for k, v in dimensions.items()}
 # Add time to the independent variables
 dimensions[timeColumnName] = range(0, timeSamples)
 # Compute the matrix shape
 shape = tuple(len(v) for k, v in dimensions.items())
 # Prepare the Dataset
 dataset = xr.Dataset()
 for k, v in dimensions.items():
     dataset.coords[k] = v
 if len(allfiles) == 0:
     print("WARNING: No data for experiment " + experiment)
 else:
     varNames = extractVariableNames(allfiles[0])
     for v in varNames:
         if v != timeColumnName:
             novals = np.ndarray(shape)
             novals.fill(float('nan'))
             dataset[v] = (dimensions.keys(), novals)
     # Compute maximum and minimum time, create the resample
     timeColumn = varNames.index(timeColumnName)
     allData = {file: np.matrix(openCsv(file)) for file in allfiles}
     computeMin = minTime is None
Ejemplo n.º 14
0
def loadSequentialRawImages_v2(	partialFileName,
								dtype=_np.uint16,
								shape=(64, 128),
								fps=None,
								color=True,
								plot=False,
								save=False):
	"""
	Loads sequential images.
	
	Parameters
	----------
	partialFileName : str
		Directory and filename of the images.  Use wildcards * to indicate
		unspecified text.  Example, partialFileName = 'images/*.tif'
	dtype : str or numpy data type
		You must specify the data type of the images being loaded.  'uint16' is default.
	shape : tuple with two ints
		These represent the (y,x) resolution of the figure.  NOTE that the order is (y,x).  If you get this backwards, the code will still work, but the image won't look right
	color : bool
		True - assumes that there R,B,G data associated with each image
		False - assumes greyscale.
		
	Returns
	-------
	da : xarray.core.dataarray.DataArray
		xarray DataFrame with coordinates: time,x,y,color
		where time is the image number
	
	Example
	-------
	::
		
		import numpy as np
		
		if False:
			directory='C:\\Users\\jwbrooks\\HSVideos\\cathode_testing\\Test1\\test3_goodCaes_15A_30sccm_29p3V'
			shape=(64,128)
			fps=390000
		elif False:
			directory='C:\\Users\\jwbrooks\\HSVideos\\cathode_testing\\Test1\\test4_2020_10_12_50mmlens_400kHz_15A_35p5V_15sccm'
			shape=(48,48)
			fps=400000
		elif True:
			directory='C:\\Users\\jwbrooks\\HSVideos\\cathode_testing\\test2\\Test1\\test2_goodCase_15A_20sccm_31p6V'
			shape=(64,128)
			fps=390000
		partialFileName='%s\\Img*.raw'%directory
		
		dtype=np.uint16
		color=True
		save=True
		da=loadSequentialRawImages(partialFileName,shape=shape,dtype=dtype,color=color,save=save,plot=True,fps=fps)
		
	References
	----------
	https://rabernat.github.io/research_computing/xarray.html
	"""
	if fps==None:
		dt=int(1)
	else:
		dt=1.0/fps

	# import libraries
	import glob
	import xarray as xr
	import numpy as np
	
	# get file names
	inList=glob.glob(partialFileName, recursive=True)
	dfFiles=_pd.DataFrame(inList,columns=['filepaths'],dtype=str).sort_values('filepaths').reset_index(drop=True)
	
	# initialize data storage 
	if color==True:
		video=_np.zeros((dfFiles.shape[0],shape[0],shape[1],3),dtype=dtype)
	else:
		video=_np.zeros((dfFiles.shape[0],shape[0],shape[1],1),dtype=dtype)
		
	# step through each image and import it in the data storage
	for i,(key,val) in enumerate(dfFiles.iterrows()):
# 		print(val[0]) # TODO convert to a percent complete printout
		A=_np.fromfile(val[0],dtype=dtype)
		if color==True:
			B=A.reshape((shape[0],shape[1],3))
			video[i,:,:,:]=B[::-1,:,:]
		else:
			B=A.reshape((shape[0],shape[1],1))
			video[i,:,:,0]=B[::-1,:,0] 
	
	# convert to xarray
	t = np.arange(video.shape[0]) * dt
	x = np.arange(video.shape[2])
	y = np.arange(video.shape[1])
	if video.shape[3] == 1: # greyscale

		video_out=xr.DataArray(	video[:, :, :, 0],
								dims=['t', 'y', 'x'],
								coords={	't': t,
											'x': x,
											'y': y},
								name='video').to_dataset()
		
	elif video.shape[3] == 3: # color
# 		c = ['blue', 'green', 'red']
		b = xr.DataArray(	video[:, :, :, 0],
								dims=['t', 'y', 'x'],
								coords={	't': t,
											'x': x,
											'y': y})
		g = xr.DataArray(	video[:, :, :, 1],
								dims=['t', 'y', 'x'],
								coords={	't': t,
											'x': x,
											'y': y})
		r = xr.DataArray(	video[:, :, :, 2],
								dims=['t', 'y', 'x'],
								coords={	't': t,
											'x': x,
											'y': y})
		video_out = xr.Dataset({'r': r, 'g': g, 'b': b})
	
# 	if save==True:
# 		import pickle as pkl
# 		pkl.dump(video_out,open(partialFileName.split('*')[0]+'.pkl','wb'))
 	
# 	if plot==True:
# 		_plt.figure()
# 		video_out[0,:,:,0].plot()

	return video_out
Ejemplo n.º 15
0
 def test_merge_error(self):
     ds = xr.Dataset({"x": 0})
     with pytest.raises(xr.MergeError):
         xr.merge([ds, ds + 1])
Ejemplo n.º 16
0
    return new_kprof[kk]


for stn in range(stn_b, stn_e):

    print('station is: ', str(stn))
    print('x is :', d_stn_x[stn])
    print('y is :', d_stn_y[stn])

    ts_x = d_stn_x[stn]
    ts_y = d_stn_y[stn]

    daily_omahor = np.zeros(dayslen)

    for day in range(0, dayslen):

        carp = nc.Dataset(carpT[day])
        W = nc.Dataset(gridW[day])
        #print(gridW[day])
        vedeuph = getVEDEuph(ts_x, ts_y, LL, W, carp)
        if day % 5 == 0:
            print(day)
            print(vedeuph)
        daily_omahor[day] = vedeuph
        #print(toma)

        ved = xr.Dataset({'daily_ved': (['t'], daily_omahor)})
        stn_name = '/data/tjarniko/MEOPAR/analysis_tereza/notebooks/CLUSTER_PAPER/CLEAN/NC_HINDCAST/'\
        + str(year) + '/VED_TS/stn_' + str(stn)  + 'FOTOVED_sp' + str(spacing)+ '.nc'
        ved.to_netcdf(stn_name)
Ejemplo n.º 17
0
 def test_merge_alignment_error(self):
     ds = xr.Dataset(coords={"x": [1, 2]})
     other = xr.Dataset(coords={"x": [2, 3]})
     with raises_regex(ValueError, "indexes .* not equal"):
         xr.merge([ds, other], join="exact")
Ejemplo n.º 18
0
    raise ValueError('must specify dataset')
dataset = args[1]

if len(args) == 2:
    store = 'local'
else:
    store = args[2]

cmip_models = [
    'BCC-CSM2-MR', 'ACCESS-ESM1-5', 'CanESM5', 'MIROC6', 'MPI-ESM1-2-LR'
]
scenarios = ['ssp245', 'ssp370', 'ssp585']

targets = list(map(lambda x: str(x), np.arange(2020, 2120, 20)))
pf = pd.read_parquet(f'data/{dataset}.parquet')
ds = xr.Dataset()

print(f'[{dataset}] filtering values')
pf = pf.dropna().reset_index(drop=True)
if dataset in ['drought', 'insects']:
    badinds = (pf['historical'] > 1) | (np.isnan(pf['historical']))
    for key in pf.columns:
        if key not in ['lat', 'lon', 'type_code', 'r2']:
            badinds = badinds | ((pf[key] > 1) | (np.isnan(pf[key])))
    pf = pf[~badinds]

print(f'[{dataset}] computing multi model mean')
for scenario in scenarios:
    for target in targets:
        keys = list(
            filter(
Ejemplo n.º 19
0
def main():
    infile = ''
    domainfile = ''
    varnames = ['Prec', 'Tmin', 'Tmax', 'wind']
    outfile = ''
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "hi:d:v:o:",
            ["infile=", "domainfile=", "varnamelist=", "outfile="])
    except getopt.GetoptError:
        print(sys.argv[0],
              ' -i <infile> -d <domainfile> -v <varnamelist> -o <outfile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                sys.argv[0],
                ' -i <infile> -d <domainfile> -v <varnamelist> -o <outfile>')
            sys.exit()
        elif opt in ("-i", "--infile"):
            infile = arg
        elif opt in ("-d", "--domainfile"):
            domainfile = arg
        elif opt in ("-v", "--varnamelist"):
            varnamelist = arg
            varnames = varnamelist.lstrip().rstrip().split(',')
        elif opt in ("-o", "--outfile"):
            outfile = arg

    # Read domain file
    ds = xr.open_dataset(domainfile)

    lat = ds['lat']
    lon = ds['lon']
    mask = ds['mask']

    nLat = len(lat)
    nLon = len(lon)
    cellsize = round(np.asscalar((lat[-1] - lat[0]) / float(nLat - 1)), 6)
    minlat = round(np.asscalar(lat[0] - (0.5 * cellsize)), 6)
    minlon = round(np.asscalar(lon[0] - (0.5 * cellsize)), 6)
    maxlat = round(np.asscalar(lat[-1] + (0.5 * cellsize)), 6)
    maxlon = round(np.asscalar(lon[-1] + (0.5 * cellsize)), 6)

    # Read infile
    ds_in = xr.open_dataset(infile)

    time = ds_in['time']
    lat_in = ds_in['lat']
    lon_in = ds_in['lon']

    nTime = len(time)
    nLat_in = len(lat_in)
    nLon_in = len(lon_in)
    cellsize_in = round(
        np.asscalar((lat_in[-1] - lat_in[0]) / float(nLat_in - 1)), 6)
    minlat_in = round(np.asscalar(lat_in[0] - (0.5 * cellsize_in)), 6)
    minlon_in = round(np.asscalar(lon_in[0] - (0.5 * cellsize_in)), 6)
    maxlat_in = round(np.asscalar(lat_in[-1] + (0.5 * cellsize_in)), 6)
    maxlon_in = round(np.asscalar(lon_in[-1] + (0.5 * cellsize_in)), 6)

    # Open output file
    ds_out = xr.Dataset(coords={
        'lat': (['lat'], lat),
        'lon': (['lon'], lon),
        'time': (['time'], time),
    })

    for varname in varnames:
        data_in = ds_in[varname]

        # Map input to output
        data_out = np.empty((nTime, nLat, nLon), dtype=np.single)
        for y in range(nLat):
            ctr_lat = minlat + (y + 0.5) * cellsize
            y_in = int((ctr_lat - minlat_in) / cellsize_in)
            for x in range(nLon):
                ctr_lon = minlon + (x + 0.5) * cellsize
                x_in = int((ctr_lon - minlon_in) / cellsize_in)
                data_out[:, y, x] = data_in[:, y_in, x_in]

        ds_out[varname] = (['time', 'lat', 'lon'], data_out)
        ds_out[varname].attrs = ds_in[varname].attrs
        ds_out[varname].encoding = ds_in[varname].encoding

    print('writing to', outfile)
    ds_out.to_netcdf(outfile, engine='scipy')

    ds_in.close()
    ds_out.close()
    ds.close()
Ejemplo n.º 20
0
def get_data_opensource(prod_info, input_lon, input_lat, acq_min, acq_max,
                        window_size, no_partial_scenes):

    datacube_config = prod_info[0]
    source_prod = prod_info[1]
    source_band_list = prod_info[2]
    mask_band = prod_info[3]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if datacube_config != 'default':
            remotedc = Datacube(config=datacube_config)
        else:
            remotedc = Datacube()

        return_data = {}
        data = xr.Dataset()

        if source_prod != '':
            # find dataset to get metadata
            fd_query = {
                'time': (acq_min, acq_max),
                'x': (input_lon, input_lon + window_size / 100000),
                'y': (input_lat, input_lat + window_size / 100000),
            }
            sample_fd_ds = remotedc.find_datasets(product=source_prod,
                                                  group_by='solar_day',
                                                  **fd_query)

            if (len(sample_fd_ds)) > 0:
                # decidce pixel size for output data
                pixel_x, pixel_y = get_pixel_size(sample_fd_ds[0],
                                                  source_band_list)

                log.info('Output pixel size for product {}: x={}, y={}'.format(
                    source_prod, pixel_x, pixel_y))

                # get target epsg from metadata
                target_epsg = get_epsg(sample_fd_ds[0])
                log.info('CRS for product {}: {}'.format(
                    source_prod, target_epsg))

                x1, y1, x2, y2 = setQueryExtent(target_epsg, input_lon,
                                                input_lat, window_size)

                query = {
                    'time': (acq_min, acq_max),
                    'x': (x1, x2),
                    'y': (y1, y2),
                    'crs': target_epsg,
                    'output_crs': target_epsg,
                    'resolution': (-pixel_y, pixel_x),
                    'measurements': source_band_list
                }

                if 's2' in source_prod:
                    data = remotedc.load(product=source_prod,
                                         group_by='solar_day',
                                         **query)
                else:
                    data = remotedc.load(product=source_prod,
                                         align=(pixel_x / 2.0, pixel_y / 2.0),
                                         group_by='solar_day',
                                         **query)
                # remove cloud and nodta
                data = remove_cloud_nodata(source_prod, data, mask_band)

                if no_partial_scenes:
                    # calculate valid data percentage
                    data = only_return_whole_scene(data)

            return_data = {
                source_prod: {
                    'data': data,
                    'mask_band': mask_band,
                    'find_list': sample_fd_ds
                }
            }

    return return_data
Ejemplo n.º 21
0
def read_sp2(file_name, debug=False, arm_convention=True):
    """
    Loads a binary SP2 raw data file and returns all of the wave forms
    into an xarray Dataset.

    Parameters
    ----------
     file_name:
    :return:
    """

    my_data = open(file_name, "rb").read()
    # Get file date from name
    if platform.system() == "Windows":
        split_file_name = file_name.split("\\")
    else:
        split_file_name = file_name.split("/")
    if arm_convention:
        next_split = split_file_name[-1].split(".")
        dt = datetime.strptime(next_split[2], "%Y%m%d")
    else:
        dt = datetime.strptime(split_file_name[-1][0:8], "%Y%m%d")

    if len(my_data) > 0:
        bytepos = 0
        numCols = struct.unpack(">I", my_data[bytepos:bytepos + 4])[0]
        bytepos += 4
        numChannels = struct.unpack(">I", my_data[bytepos:bytepos + 4])[0]
        if debug:
            print(("Loaded file with numCols = {}, numChannels = {}".format(
                numCols, numChannels)))

        data_points_per_record = numChannels * numCols

        bytes_per_record = 2 * data_points_per_record
        bytes_not_data_array = 12 + 2 + 28 + 16
        bytes_per_record += bytes_not_data_array
        last_pos = int(bytes_per_record - 1)
        num_spare_cols = struct.unpack(">I", my_data[last_pos - 4:last_pos])[0]
        if debug:
            print("Number of spare columns = %d" % num_spare_cols)

        if num_spare_cols != 0:
            bytes_per_record += num_spare_cols

        numRecords = int(len(my_data) / bytes_per_record)
        totalRows = numChannels * numRecords
        DataWave = np.zeros((totalRows, numCols), dtype='int16')
        Flag = np.zeros(int(totalRows / numChannels), dtype='int16')
        TimeWave = np.zeros(numRecords, dtype='float64')
        Res1 = np.zeros(numRecords, dtype='float32')
        EventIndex = np.zeros(numRecords, dtype='float32')
        TimeDiv10000 = np.zeros(numRecords, dtype='float64')
        TimeRemainder = np.zeros(numRecords, dtype='float64')
        Res5 = np.zeros(numRecords, dtype='float32')
        Res6 = np.zeros(numRecords, dtype='float32')
        Res7 = np.zeros(numRecords, dtype='float64')
        Res8 = np.zeros(numRecords, dtype='float64')
        if num_spare_cols != 0:
            SpareDataArray = np.zeros(numRecords, num_spare_cols)

        arrayFmt = ">"
        for i in range(data_points_per_record):
            arrayFmt += "h"

        for record in range(numRecords):
            dataStartPoint = record * bytes_per_record + 8
            startRow = record * numChannels
            endRow = startRow + numChannels - 1
            the_row = np.array(
                struct.unpack(
                    arrayFmt, my_data[dataStartPoint:dataStartPoint +
                                      int(data_points_per_record * 2)]))

            DataWave[startRow:endRow + 1,
                     0:numCols] = the_row.reshape(numCols, numChannels).T
            dataStartPoint += data_points_per_record * 2
            Flag[record] = struct.unpack(
                ">h", my_data[dataStartPoint:dataStartPoint + 2])[0]
            next_floats = struct.unpack(
                ">ffffffff", my_data[dataStartPoint + 2:dataStartPoint + 34])
            TimeWave[record] = next_floats[0]
            Res1[record] = next_floats[1]
            EventIndex[record] = next_floats[2]
            TimeDiv10000[record] = next_floats[3]
            TimeRemainder[record] = next_floats[4]
            Res5[record] = next_floats[5]
            Res6[record] = next_floats[6]
            next_doubles = struct.unpack(
                ">dd", my_data[dataStartPoint + 34:dataStartPoint + 50])
            Res7[record] = next_doubles[0]
            Res8[record] = next_doubles[1]
            dataStartPoint += 50

            if num_spare_cols != 0:
                startRow = (2 * num_spare_cols) * record
                dataStartPoint += bytes_not_data_array - 4
                spareFmt = ">"
                for i in range(num_spare_cols):
                    spareFmt += "f"

                SpareDataArray[record] = np.array(
                    struck.unpack(
                        spareFmt, my_data[dataStartPoint:dataStartPoint +
                                          4 * num_spare_cols]))

        UTCtime = TimeDiv10000 * 10000 + TimeRemainder
        diff_epoch_1904 = (datetime(1970, 1, 1) -
                           datetime(1904, 1, 1)).total_seconds()
        UTCdatetime = np.array(
            [datetime.fromtimestamp(x - diff_epoch_1904) for x in UTCtime])
        DateTimeWaveUTC = UTCtime

        DateTimeWave = (dt - datetime(1904, 1, 1)).total_seconds() + TimeWave

        # Make an xarray dataset for SP2
        Flag = xr.DataArray(Flag, dims={'event_index': EventIndex})
        Res1 = xr.DataArray(Res1, dims={'event_index': EventIndex})
        Res5 = xr.DataArray(Res5, dims={'event_index': EventIndex})
        Res6 = xr.DataArray(Res6, dims={'event_index': EventIndex})
        Res7 = xr.DataArray(Res7, dims={'event_index': EventIndex})
        Res8 = xr.DataArray(Res8, dims={'event_index': EventIndex})
        Time = xr.DataArray(UTCdatetime, dims={'event_index': EventIndex})
        EventInd = xr.DataArray(EventIndex, dims={'event_index': EventIndex})
        DateTimeWaveUTC = xr.DataArray(UTCtime,
                                       dims={'event_index': EventIndex})
        DateTimeWave = xr.DataArray(DateTimeWave,
                                    dims={'event_index': EventIndex})
        TimeWave = xr.DataArray(TimeWave, dims={'event_index': EventIndex})
        my_ds = xr.Dataset({
            'time': Time,
            'Flag': Flag,
            'Res1': Res1,
            'Res5': Res5,
            'Res6': Res6,
            'Res7': Res7,
            'Res8': Res8,
            'EventIndex': EventInd,
            'DateTimeWaveUTC': DateTimeWaveUTC,
            'TimeWave': TimeWave,
            'DateTimeWave': DateTimeWave
        })

        for i in range(numChannels):
            temp_array = np.zeros((numRecords, numCols), dtype='int')
            for j in range(numRecords):
                k = i + j * numChannels
                temp_array[j] = DataWave[k]
            my_ds['Data_ch' + str(i)] = xr.DataArray(temp_array,
                                                     dims={
                                                         'event_index':
                                                         EventIndex,
                                                         'columns':
                                                         np.arange(0, 100, 1)
                                                     })
        del my_data
        del DataWave
        return my_ds
    else:
        return None
Ejemplo n.º 22
0
def get_data_opensource_shapefile(prod_info, acq_min, acq_max, shapefile,
                                  no_partial_scenes):

    datacube_config = prod_info[0]
    source_prod = prod_info[1]
    source_band_list = prod_info[2]
    mask_band = prod_info[3]

    if datacube_config != 'default':
        remotedc = Datacube(config=datacube_config)
    else:
        remotedc = Datacube()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with fiona.open(shapefile) as shapes:
            crs = geometry.CRS(shapes.crs_wkt)
            first_geometry = next(iter(shapes))['geometry']
            geom = geometry.Geometry(first_geometry, crs=crs)

            return_data = {}
            data = xr.Dataset()

            if source_prod != '':
                # get a sample dataset to decide the target epsg
                fd_query = {'time': (acq_min, acq_max), 'geopolygon': geom}
                sample_fd_ds = remotedc.find_datasets(product=source_prod,
                                                      group_by='solar_day',
                                                      **fd_query)

                if (len(sample_fd_ds)) > 0:
                    # decidce pixel size for output data
                    pixel_x, pixel_y = get_pixel_size(sample_fd_ds[0],
                                                      source_band_list)
                    log.info(
                        'Output pixel size for product {}: x={}, y={}'.format(
                            source_prod, pixel_x, pixel_y))

                    # get target epsg from metadata
                    target_epsg = get_epsg(sample_fd_ds[0])
                    log.info('CRS for product {}: {}'.format(
                        source_prod, target_epsg))

                    query = {
                        'time': (acq_min, acq_max),
                        'geopolygon': geom,
                        'output_crs': target_epsg,
                        'resolution': (-pixel_y, pixel_x),
                        'measurements': source_band_list
                    }

                    if 's2' in source_prod:
                        data = remotedc.load(product=source_prod,
                                             group_by='solar_day',
                                             **query)
                    else:
                        data = remotedc.load(product=source_prod,
                                             align=(pixel_x / 2.0,
                                                    pixel_y / 2.0),
                                             group_by='solar_day',
                                             **query)

                    # remove cloud and nodta
                    data = remove_cloud_nodata(source_prod, data, mask_band)

                    if data.data_vars:
                        mask = geometry_mask([geom], data.geobox, invert=True)
                        data = data.where(mask)

                    if no_partial_scenes:
                        # calculate valid data percentage
                        data = only_return_whole_scene(data)

                return_data = {
                    source_prod: {
                        'data': data,
                        'mask_band': mask_band,
                        'find_list': sample_fd_ds
                    }
                }

    return return_data
Ejemplo n.º 23
0
def xr_concatenate_ragged(file_list, concat_dims):
    """
    Concatenate ragged array netCDF files (CF).

    Parameters
    ----------
    file_list : list of str or list of xr.Dataset
        Path and name of files to concatenate. Also accepts lists of datasets.
    concat_dims : list of str
        Dimensions along which to independently concatenate.

    Returns
    -------
    xr.Dataset
        Merged netCDF files.

    """
    # Init dictionnary to contain size of each concat dimension
    dim_size = dict()
    for cd_ in concat_dims:
        dim_size[cd_] = 0

    # Check each input file for dimension sizes
    for f_ in file_list:
        # load file
        if isinstance(f_, xr.Dataset):
            ds = f_
        else:
            ds = xr.open_dataset(f_)

        # Determine size of each concat dimension
        for d_ in concat_dims:
            dim_size[d_] += ds[d_].size

    # Pre-allocate
    vars_ = dict()
    crds_ = dict()

    # --- variables
    for v_ in ds.data_vars:
        dim_, = ds[v_].dims
        vars_[v_] = ([dim_], np.empty(dim_size[dim_], dtype=ds[v_].dtype))

    # --- coordinates
    for c_ in ds.coords:
        dim_, = ds[c_].dims
        crds_[c_] = ([dim_], np.empty(dim_size[dim_], dtype=ds[c_].dtype))

    # Init indexing dictionnaries
    dim_start = dict()
    row_width = dict()

    # --- start index along each concat dim
    for cd_ in concat_dims:
        dim_start[cd_] = 0

    # --- row size along each concat dim
    for cd_ in concat_dims:
        row_width[cd_] = 0

    # Loop over input files
    for f_ in file_list:

        # Load file
        if isinstance(f_, xr.Dataset):
            ds = f_
        else:
            ds = xr.open_dataset(f_)

        # Loop over coordinates
        for c_ in crds_.keys():

            # Determine alon which dim
            d_, = ds[c_].dims

            # Determine start and stop indices
            row_start = dim_start[d_]
            row_width[d_] = ds[c_].size
            row_stop = row_start + row_width[d_]

            # Insert new values
            if c_ in concat_dims:

                # Offset by row start to keep unique value sample dims
                offset = row_start

            else:
                offset = 0
            crds_[c_][1][row_start:row_stop] = ds[c_].values + offset

        # Loop over variables
        for v_ in vars_.keys():

            # Determine along which dim
            d_, = ds[v_].dims

            # Determine start and stop indices
            row_start = dim_start[d_]
            row_width[d_] = ds[v_].size
            row_stop = row_start + row_width[d_]

            # Insert new values
            vars_[v_][1][row_start:row_stop] = ds[v_].values

        # Update row start indices
        for d_ in dim_start.keys():
            dim_start[d_] = dim_start[d_] + row_width[d_]

    # Form dataset
    dataset = xr.Dataset(vars_, coords=crds_)

    # Add CF attributes
    dataset = xr_cf_attributes(dataset)

    return dataset
Ejemplo n.º 24
0
 def test_merge_no_conflicts_preserve_attrs(self):
     data = xr.Dataset({"x": ([], 0, {"foo": "bar"})})
     actual = xr.merge([data, data])
     assert data.identical(actual)
Ejemplo n.º 25
0
def xr_cf_get_cast(rag_arr, n, depth_name='z'):
    """
    Get a single cast from a CF ragged array profiles dataset.

    Parameters
    ----------
    rag_arr: str or xarray.Dataset
        Path and name to nc file or ragged dataset.
    n: int
        Number id of the profile to get.
    depth_name: str
        Name of depth coordinate in `rag_arr`.

    Returns
    -------
    xarray.Dataset:
        Data structure containing one cast only.

    """
    # Read netcdf or pass xarray
    if isinstance(rag_arr, str):
        dset = xr.open_dataset(rag_arr)
    elif isinstance(rag_arr, xr.Dataset):
        dset = rag_arr
    else:
        raise TypeError('rag_arr is not string or xarray Dataset')

    # Variables along obs dimension
    var_names = [v_ for v_ in rag_arr.data_vars if 'obs' in rag_arr[v_].dims]

    # Get cast depth information

    c_strt = int(dset['%s_row_size' % depth_name][:n].values.sum())
    c_stop = c_strt + int(dset['%s_row_size' % depth_name][n].values)
    depth = dset[depth_name][c_strt:c_stop].values

    # Add requested variables
    cast = xr.Dataset(coords={depth_name: depth}, attrs=dset.attrs)

    # Loop over requested variables
    for variable in var_names:
        # Check this variable is not empty for this cast
        if dset['%s_row_size' % variable][n] > 0:
            # Get variable index values
            c_strt = int(dset['%s_row_size' % variable][:n].values.sum())
            c_stop = c_strt + int(dset['%s_row_size' % variable][n].values)

            # Assign
            cast[variable] = (depth_name, dset[variable][c_strt:c_stop])

    # Variables along obs dimension
    var_names = [
        v_ for v_ in rag_arr.data_vars if 'profiles' in rag_arr[v_].dims
    ]

    # Loop over requested variables
    for variable in var_names:
        # Assign
        cast.attrs[variable] = dset[variable].values[n]

    # Loop over requested variables
    for c_ in dset.coords:
        if c_ != 'z':
            # Assign
            cast = cast.assign_coords({c_: dset[c_].values[n]})

    return cast
Ejemplo n.º 26
0
 def test_merge_dicts_simple(self):
     actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}])
     expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5})
     assert actual.identical(expected)
Ejemplo n.º 27
0
def test_guess_coord_axis_datetime():
    ds = xr.Dataset()
    ds["time"] = ("time", pd.date_range("2001-01-01", "2001-04-01"))
    dsnew = ds.cf.guess_coord_axis()
    assert dsnew.time.attrs == {"standard_name": "time", "axis": "T"}
Ejemplo n.º 28
0
 def test_merge_dicts_dims(self):
     actual = xr.merge([{"y": ("x", [13])}, {"x": [12]}])
     expected = xr.Dataset({"x": [12], "y": ("x", [13])})
     assert actual.identical(expected)
Ejemplo n.º 29
0
    def check_conservation(self, plot=True, axs=None, plot_resolutions=False):
        '''Check particle conservation for an aurora simulation.

        Parameters
        -----------------
        plot : bool, optional
            If True, plot time histories in each particle reservoir and display quality of particle conservation.
        axs : matplotlib.Axes instances, optional 
            Axes to pass to :py:meth:`~aurora.particle_conserv.check_particle_conserv`
            These may be the axes returned from a previous call to this function, to overlap 
            results for different runs. 
        
        Returns
        ------------
        out : dict
            Dictionary containing density of particles in each reservoir.
        axs : matplotlib.Axes instances , only returned if plot=True
            New or updated axes returned by :py:meth:`~aurora.particle_conserv.check_particle_conserv`
        '''
        import xarray  # import only if necessary

        nz, N_wall, N_div, N_pump, N_ret, N_tsu, N_dsu, N_dsul, rcld_rate, rclw_rate = self.res
        nz = nz.transpose(2, 1, 0)  # time,nZ,space

        if self.namelist['explicit_source_vals'] is None:
            source_time_history = self.source_time_history
        else:
            # if explicit source was provided, all info about the source is in the source_rad_prof array
            srp = xarray.Dataset(
                {
                    'source': (['time', 'rvol_grid'], self.source_rad_prof.T),
                    'pro': (['rvol_grid'], self.pro_grid),
                    'rhop_grid': (['rvol_grid'], self.rhop_grid)
                },
                coords={
                    'time': self.time_out,
                    'rvol_grid': self.rvol_grid
                })
            source_time_history = particle_conserv.vol_int(
                self.Raxis_cm, srp, 'source')

        source_time_history = self.source_time_history
        # Check particle conservation
        ds = xarray.Dataset(
            {
                'impurity_density':
                (['time', 'charge_states', 'rvol_grid'], nz),
                'source_time_history': (['time'], source_time_history),
                'particles_in_divertor': (['time'], N_div),
                'particles_in_pump': (['time'], N_pump),
                'parallel_loss': (['time'], N_dsu),
                'parallel_loss_to_limiter': (['time'], N_dsul),
                'edge_loss': (['time'], N_tsu),
                'particles_at_wall': (['time'], N_wall),
                'particles_retained_at_wall': (['time'], N_ret),
                'recycling_from_wall': (['time'], rclw_rate),
                'recycling_from_divertor': (['time'], rcld_rate),
                'pro': (['rvol_grid'], self.pro_grid),
                'rhop_grid': (['rvol_grid'], self.rhop_grid)
            },
            coords={
                'time': self.time_out,
                'rvol_grid': self.rvol_grid,
                'charge_states': np.arange(nz.shape[1])
            })

        return particle_conserv.check_particle_conserv(self.Raxis_cm,
                                                       ds=ds,
                                                       plot=plot,
                                                       axs=axs)
Ejemplo n.º 30
0
def spco2_sensitivity(ds):
    """Compute sensitivity of surface pCO2 to changes in driver variables.

    Args:
        ds (xr.Dataset): containing cmorized variables:
                         * spco2 [uatm]: ocean pCO2 at surface
                         * talkos[mmol m-3]: Alkalinity at ocean surface
                         * dissicos[mmol m-3]: DIC at ocean surface
                         * tos [C] : temperature at ocean surface
                         * sos [psu] : salinity at ocean surface

    Returns:
        sensitivity (xr.Dataset):

    References:
        * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima.
          “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of
          the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2
          (2007). https://doi.org/10/fpv2wt.
        * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics.
          Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1)

    Examples:
        >>> from esm_analysis.carbon import spco2_sensitivity
        >>> import numpy as np
        >>> import xarray as xr
        >>> tos = xr.DataArray(np.random.randint(15, 30, size=(100, 10, 10)),
                dims=['time', 'lat', 'lon']).rename('tos')
        >>> sos = xr.DataArray(np.random.randint(30, 35, size=(100, 10, 10)),
                dims=['time', 'lat', 'lon']).rename('sos')
        >>> spco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)),
                dims=['time', 'lat', 'lon']).rename('spco2')
        >>> dissicos = xr.DataArray(np.random.randint(1900, 2100, size=(100, 10, 10)),
                dims=['time', 'lat', 'lon']).rename('dissicos')
        >>> talkos = xr.DataArray(np.random.randint(2100, 2300, size=(100, 10, 10)),
                dims=['time', 'lat', 'lon']).rename('talkos')
        >>> ds = xr.merge([tos, sos, spco2, dissicos, talkos])
        >>> sensitivity = spco2_sensitivity(ds)
    """
    def _check_variables(ds):
        requiredVars = ['spco2', 'tos', 'sos', 'talkos', 'dissicos']
        if not all(i in ds.data_vars for i in requiredVars):
            missingVars = [i for i in requiredVars if i not in ds.data_vars]
            raise ValueError(f"""Missing variables needed for calculation:
            {missingVars}""")

    _check_variables(ds)
    # Sensitivities are based on the time-mean for each field. This computes
    # sensitivities at each grid cell.
    # TODO: Add keyword for sliding mean, as in N year chunks of time to
    # account for trends.
    DIC = ds['dissicos']
    ALK = ds['talkos']
    SALT = ds['sos']
    pCO2 = ds['spco2']

    buffer_factor = dict()
    buffer_factor['ALK'] = -ALK**2 / ((2 * DIC - ALK) * (ALK - DIC))
    buffer_factor['DIC'] = (3 * ALK * DIC - 2 * DIC**2) / ((2 * DIC - ALK) *
                                                           (ALK - DIC))

    # Compute sensitivities
    sensitivity = dict()
    sensitivity['tos'] = 0.0423
    sensitivity['sos'] = 1 / SALT
    sensitivity['talkos'] = (1 / ALK) * buffer_factor['ALK']
    sensitivity['dissicos'] = (1 / DIC) * buffer_factor['DIC']
    sensitivity = xr.Dataset(sensitivity) * pCO2
    return sensitivity