Example #1
0
def if_then_else(condition, val_if_true, val_if_false):

    # checking if parameters are xarrays
    con_xr = isinstance(condition, xr.DataArray)
    true_xr = isinstance(val_if_true, xr.DataArray)
    false_xr = isinstance(val_if_false, xr.DataArray)
    # short circuit if a scalar condition
    # made to avoid computation of other part
    # if not con_xr:
    #    return val_if_true if condition else val_if_false

    # Todo improve this!!!
    if true_xr and false_xr and con_xr:
        val_if_true, val_if_false, condition = xr.align(
            val_if_true, val_if_false, condition)
        return val_if_true.where(condition, val_if_false)
    elif not true_xr and false_xr and con_xr:
        val_if_false, condition = xr.align(val_if_false, condition)
        return val_if_false.where(np.logical_not(condition), val_if_true)
    elif true_xr and not false_xr and con_xr:
        val_if_true, condition = xr.align(val_if_true, condition)
        return val_if_true.where(condition, val_if_false)
    elif true_xr and false_xr:
        val_if_true, val_if_false = xr.align(val_if_true, val_if_false)
        return val_if_true.where(condition, val_if_false)
    elif not true_xr and false_xr:
        return val_if_false.where(not condition, val_if_true)
    elif true_xr and not false_xr:
        return val_if_true.where(condition, val_if_false)
    elif con_xr:
        return (condition * 0 + val_if_true).where(condition, val_if_false)
    else:
        return np.where(condition, val_if_true, val_if_false)
def lag_linregress_3D_range(x, y, lagx=0, lagy=0):
    x, y = xr.align(x, y)

    if lagx != 0:
        # If x lags y by 1, x must be shifted 1 step backwards.
        # Works with negative and positive lags
        # E.g., if lag = -1, x is shifted 1 step backwards
        # If lag = 2, x is shifted 2 steps forwards
        x = x.shift(time=lagx).dropna(dim='time')
        x, y = xr.align(x, y)

    if lagy != 0:
        y = y.shift(time=lagy).dropna(dim='time')
        x, y = xr.align(x, y)

    # 3. Compute data length, mean and standard deviation along time axis for further use:
    n = x.shape[0]
    xmean = x.mean(axis=0, skipna=True)
    ymean = y.mean(axis=0, skipna=True)
    xstd = x.std(axis=0, skipna=True)
    ystd = y.std(axis=0, skipna=True)

    # 4. Compute covariance along time axis
    cov = np.nansum((x - xmean) * (y - ymean), axis=0) / (n - 1)

    # 5. Compute correlation along time axis
    cor = cov / (xstd * ystd)

    return cor
Example #3
0
def lag_linregress_3D(x, y, lagx=0, lagy=0):
    """
    Input: Two xr.Datarrays of any dimensions with the first dim being time. 
    Thus the input data could be a 1D time series, or for example, have three 
    dimensions (time,lat,lon). 
    Datasets can be provided in any order, but note that the regression slope 
    and intercept will be calculated for y with respect to x.
    Output: Covariance, correlation, regression slope and intercept, p-value, 
    and standard error on regression between the two datasets along their 
    aligned time dimension.  
    Lag values can be assigned to either of the data, with lagx shifting x, and
    lagy shifting y, with the specified lag amount. 
    """
    #1. Ensure that the data are properly alinged to each other.
    x, y = xr.align(x, y)

    #2. Add lag information if any, and shift the data accordingly
    if lagx != 0:

        # If x lags y by 1, x must be shifted 1 step backwards.
        # But as the 'zero-th' value is nonexistant, xr assigns it as invalid
        # (nan). Hence it needs to be dropped
        x = x.shift(time=-lagx).dropna(dim='time')

        # Next important step is to re-align the two datasets so that y adjusts
        # to the changed coordinates of x
        x, y = xr.align(x, y)

    if lagy != 0:
        y = y.shift(time=-lagy).dropna(dim='time')
        x, y = xr.align(x, y)

    #3. Compute data length, mean and standard deviation along time axis:
    n = y.notnull().sum(dim='time')
    xmean = x.mean(axis=0)
    ymean = y.mean(axis=0)
    xstd = x.std(axis=0)
    ystd = y.std(axis=0)

    #4. Compute covariance along time axis
    cov = np.sum((x - xmean) * (y - ymean), axis=0) / (n)

    #5. Compute correlation along time axis
    cor = cov / (xstd * ystd)

    #6. Compute regression slope and intercept:
    slope = cov / (xstd**2)
    intercept = ymean - xmean * slope

    #7. Compute P-value and standard error
    #Compute t-statistics
    tstats = cor * np.sqrt(n - 2) / np.sqrt(1 - cor**2)
    stderr = slope / tstats

    from scipy.stats import t
    pval = t.sf(tstats, n - 2) * 2
    pval = xr.DataArray(pval, dims=cor.dims, coords=cor.coords)

    return cov, cor, slope, intercept, pval, stderr
Example #4
0
def lag_linregress(x, y, lagx=0, lagy=0):
    """
    Calculate the lead-lag linear regression.

    Parameters
    ----------
    x : xarray.DataArray
        The x-coordinates at which to evaluate the interpolated values.
    y : xarray.DataArray
        The x-coordinates of the data points.
    lagx : 1-D sequence of float or complex
        The y-coordinates of the data points, same length as `xp`.
    inc : boolean
        xp is increasing or decresing.

    Returns
    ----------
    re : tuple of floats
        Covariance, correlation, regression slope and intercept, p-value,
        and standard error on regression between the two datasets along
        their aligned time dimension.  Lag values can be assigned to either
        of the data, with lagx shifting x, and lagy shifting y, with the
        specified lag amount. 

    Input: Two xr.Datarrays of any dimensions with the first dim being time. 
    Thus the input data could be a 1D time series, or for example, have three dimensions (time,lat,lon). 
    Datasets can be provied in any order, but note that the regression slope and intercept will be calculated
    for y with respect to x.
    """
    #1. Ensure that the data are properly alinged to each other.
    x, y = xr.align(x, y)

    #2. Add lag information if any, and shift the data accordingly
    if lagx != 0:
        #If x lags y by 1, x must be shifted 1 step backwards.
        #But as the 'zero-th' value is nonexistant, xr assigns it as invalid (nan). Hence it needs to be dropped
        x = x.shift(time=-lagx).dropna(dim='time')
        #Next important step is to re-align the two datasets so that y adjusts to the changed coordinates of x
        x, y = xr.align(x, y)

    if lagy != 0:
        y = y.shift(time=-lagy).dropna(dim='time')
        x, y = xr.align(x, y)

    # slope, intercept, r_value, p_value, std_err = linregress(x, y)
    slp, itc, r, p, std = xr.apply_ufunc(
        linregress,
        x,
        y,
        dask='allowed',
        input_core_dims=[['time'], ['time']],
        output_core_dims=[[], [], [], [], []],
        # exclude_dims=set(('contour',)),
        # output_dtypes=[theta.dtype],
        vectorize=True)

    return slp, itc, r, p, std
Example #5
0
def multi_linregress_3D(x1, x2, y, lagx=0, lagy=0):
    """
    Input: Two xr.Datarrays of any dimensions with the first dim being time. 
    Thus the input data could be a 1D time series, or for example, have three dimensions (time,lat,lon). 
    Datasets can be provied in any order, but note that the regression slope and intercept will be calculated for y with respect to x.
    Output: Covariance, correlation, regression slope and intercept, p-value, and standard error on regression between the two datasets along their aligned time dimension.  
    """
    #1. Ensure that the data are properly alinged to each other.
    x1, y = xr.align(x1, y)
    x2, y = xr.align(x2, y)

    #3. Compute data length, mean and standard deviation along time axis for further use:
    n = y.notnull().sum(dim='time')
    x1mean = np.nanmean(x1, axis=0)
    x2mean = np.nanmean(x2, axis=0)
    ymean = np.nanmean(y, axis=0)
    x1std = np.nanstd(x1, axis=0)
    x2std = np.nanstd(x2, axis=0)
    ystd = np.nanstd(y, axis=0)

    #4. Compute covariance along time axis
    #cov   =  np.sum((x1 - x1mean)*(x2 - x2mean)*(y - ymean), axis=0) / (n)

    #5. Compute correlation along time axis
    #cor   = cov/(x1std*x2std*ystd)
    #6. Compute regression slope and intercept:
    slopex1 = (np.sum((x2)**2, axis=0) * np.sum((x1) * (y), axis=0) - np.sum(
        (x1) * (x2), axis=0) * np.sum((x2) * (y), axis=0)) / (np.sum(
            (x1) * (x1), axis=0) * np.sum((x2) * (x2), axis=0) - (np.sum(
                (x1) * (x2), axis=0))**2)
    slopex2 = (np.sum((x1)**2, axis=0) * np.sum((x2) * (y), axis=0) - np.sum(
        (x1) * (x2), axis=0) * np.sum((x1) * (y), axis=0)) / (np.sum(
            (x1) * (x1), axis=0) * np.sum((x2) * (x2), axis=0) - (np.sum(
                (x1) * (x2), axis=0))**2)
    intercept = ymean - x1mean * slopex1 - x2mean * slopex2

    y_pred = intercept + x1 * slopex1 + x2 * slopex2

    rss = np.nansum((y - y_pred)**2, axis=0)
    ss_tot = np.nansum((y - ymean)**2, axis=0)
    r_2 = 1 - (rss / ss_tot)
    r_2 = xr.DataArray(r_2, dims=intercept.dims, coords=intercept.coords)

    #7. Compute P-value and standard error
    #Compute t-statistics

    # tstats = cor*np.sqrt(n-2)/np.sqrt(1-cor**2)
    # stderr = slope/tstats
    # from scipy.stats import t
    # pval   = t.sf(tstats, n-2)*2
    # pval   = xr.DataArray(pval, dims=cor.dims, coords=cor.coords)

    return slopex1, slopex2, intercept, y_pred, r_2  #,pval,stderr
Example #6
0
    def project_w(self, data, z=False, sel=None, align=True):
        """ projection on w-mode (varphi = -c**2/N2 * dphidz)
        for reconstructing, use w = wn*varphi (see reconstruct_w)
        
        interpolation uses linear interpolation, but midpoints should be OK 
            (since it gives something equivalent to trapezoidal integration upon integration)
            
        Parameters:
        ___________
        data: xarray.DataArray
        
        Returns:
        ________
        xarray.Datarray
        
        See also:
        _________
        project, reconstruct_w
        
        """

        if sel is None:
            dm = self.ds
        else:
            dm = self.ds.sel(sel)

        if align:
            data, dm = xr.align(data, dm, join="inner")

        _check_hdim_mismatch(data, dm)

        if not (z is None or z is False):
            if z is True:
                z, = gop.get_z_coord(data)
            if isinstance(z, str):
                z = data.coords[z]
            elif align:
                data, z = xr.align(data, z, join="inner")
            data = gop.interp2z(dm[self._znames['zc']], z, data)

        zf, zc = self._znames['zf'], self._znames["zc"]
        prov = (data * self._w2rho(-dm.dphidz, zc=dm[zc], zf=dm[zf]) *
                dm.dz).sum(self._zdims['zc'])

        if self.free_surf:
            prov += self.g * (-dm.dphidz / dm.N2 * self.xgrid.interp(
                data, self._xgrid_z, boundary="extrapolate")).isel({
                    self._zdims["zf"]:
                    -1
                }).drop(self._znames["zf"])

        return prov / dm.norm
Example #7
0
def align_debug():
    v2_base_path = "/braintree/data2/active/users/jjpr/mkgu_packaging/crcns/v2-1"
    nc_files = sorted(
        glob.glob(os.path.join(v2_base_path, "*/*/*.nc"), recursive=True))
    gd_arrays = []
    nonzeros_raw = []
    for f in (nc_files[0], nc_files[5]):
        print(f)
        gd_array = xr.open_dataarray(f)
        # gd_array = gd_array.T.rename({"image_file_name": "presentation"})
        # gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[1]))
        # gd_array = gd_array.rename({"image_file_name": "presentation"})
        # gd_array.coords["presentation_id"] = ("presentation", range(gd_array.shape[0]))
        gd_array.coords["presentation_id"] = ("image_file_name",
                                              range(gd_array.shape[0]))
        # gd_array.coords["neuroid_id"] = ("neuroid", gd_array["neuroid"].values)
        # df_massage = pd.DataFrame(list(map(massage_file_name, gd_array["presentation"].values)))
        # for column in df_massage.columns:
        #     gd_array.coords[column] = ("presentation", df_massage[column])
        # gd_array.reset_index(["neuroid", "presentation"], drop=True, inplace=True)
        gd_array.reset_index("category_name", drop=True, inplace=True)
        mkgu.assemblies.gather_indexes(gd_array)
        gd_arrays.append(gd_array)
        nonzeros_raw.append(np.nonzero(~np.isnan(gd_array)))
    print("nonzeros_raw: ")
    print(nonzeros_raw)
    align_test = xr.align(*gd_arrays, join="outer")
    nonzeros_aligned = [np.nonzero(~np.isnan(da)) for da in align_test]
    print("nonzeros_aligned: ")
    print(nonzeros_aligned)
    assert nonzeros_raw[0].shape == nonzeros_aligned[0].shape
Example #8
0
def river_3d(
    geo, sea_level, coastline_rho,
):
    
    assert type(sea_level) == xr.core.dataarray.DataArray
    assert type(geo) == xr.core.dataarray.Dataset

    top_active_layer = xr.where(geo["IBOUND"]==1, geo.layer, np.nan).min(dim="z")
    
    h_grid, dhdx, outer_ridge = river_grid(geo, sea_level, coastline_rho)
    h_grid = xr.Dataset({"h_grid" : h_grid})
    z_bins = _mid_to_binedges(geo["z"].values)

    h_grid = h_grid.groupby_bins("h_grid", z_bins, labels=geo.layer).apply(_dumb).rename({"h_grid_bins" : "h_l"})
    h_grid = h_grid.sortby("x").sortby("y")

    #Needed for xarray > 0.15
    h_grid, top_active_layer = xr.align(h_grid, top_active_layer, join="outer")

    #Ensure river layer does not exceed IBOUND.
    h_grid["h_l"] = xr.where(h_grid["h_l"] < top_active_layer, top_active_layer, h_grid["h_l"]) 

    riv = h_grid * geo["IBOUND"].where((geo["IBOUND"] == 1) & (geo.layer == h_grid["h_l"]))
    
    return(riv, z_bins, dhdx, outer_ridge)
def align_cdump(cdump1, cdump2, dd, tag1, tag2):
    cdump1 = align_subA(cdump1)
    cdump2 = align_subA(cdump2)
    minlat = np.min([np.min(cdump1.latitude.values), np.min(cdump2.latitude.values)])
    maxlat = np.max([np.max(cdump1.latitude.values), np.max(cdump2.latitude.values)])
    maxlon = np.max([np.max(cdump1.longitude.values), np.max(cdump2.longitude.values)])
    minlon = np.min([np.min(cdump1.longitude.values), np.min(cdump2.longitude.values)])

    # round to nearest hundred
    minlat = np.round(minlat * 100) / 100
    maxlat = np.round(maxlat * 100) / 100
    minlon = np.round(minlon * 100) / 100
    maxlon = np.round(maxlon * 100) / 100

    print(minlon, maxlon)
    print(minlat, maxlat)
    nlat = np.abs(np.ceil((maxlat - minlat) / dd)) + 1
    nlon = np.abs(np.ceil((maxlon - minlon) / dd)) + 1
    conc1 = par2conc.reindex(cdump1, minlat, minlon, nlat, nlon, dd, dd)
    conc2 = par2conc.reindex(cdump2, minlat, minlon, nlat, nlon, dd, dd)
    conc1 = conc1.drop("latitude")
    conc2 = conc2.drop("latitude")
    conc1 = conc1.drop("longitude")
    conc2 = conc2.drop("longitude")
    new1, new2 = xr.align(conc1, conc2, join="outer")
    new1.expand_dims("run")
    new1["run"] = tag1
    new2.expand_dims("run")
    new2["run"] = tag2
    return xr.concat([new1, new2], dim="run")
Example #10
0
def align(*objects, **kwargs):
    """Given any number of Dataset objects, returns new
    objects with aligned indexes.

    Array from the aligned objects are suitable as input to mathematical
    operators, because along each dimension they have the same indexes.

    Missing values (if ``join != 'inner'``) are filled with NaN.

    Parameters
    ----------
    *objects : Dataset
        Objects to align.
    join : {'outer', 'inner', 'left', 'right'}, optional
        Method for joining the indexes of the passed objects along each
        dimension:
        - 'outer': use the union of object indexes
        - 'inner': use the intersection of object indexes
        - 'left': use indexes from the first object with each dimension
        - 'right': use indexes from the last object with each dimension
    copy : bool, optional
        If ``copy=True``, the returned objects contain all new variables. If
        ``copy=False`` and no reindexing is required then the aligned objects
        will include original variables.

    Returns
    -------
    aligned : same as *objects
        Tuple of objects with aligned coordinates.
    """
    xarray_datasets = [obj.xarray for obj in objects]
    aligned_datasets = xarray.align(xarray_datasets, **kwargs)
    return [Dataset(ds) for ds in aligned_datasets]
Example #11
0
def read_all_ensembles_extremes():
    """
    This function reads all the extremes into a dataset, concatenating in
    time and along a new ensemble dimension.
    The resulting dimensions of the dataset should be
    (ensemble member, year, lat, lon).
    
    Returns a dataset, with four arrays according to each type of extreme
    (hot/cold djf/jja).
    """
    extremes_pth = "$somepath/extreme_counts/"
    member_ids = [i for i in chain(range(2, 36), range(101, 106))]

    dsets = []
    for mem in member_ids:
        glob_res = sorted(glob(f'{extremes_pth}/*.{mem:03d}*.nc'))
        dsets.append(
            xr.open_mfdataset(glob_res, concat_dim='year', combine='nested'))

    dsets_aligned = xr.align(*dsets, join='inner')
    first = dsets_aligned[0]
    rest = [ds.reset_coords(drop=True) for ds in dsets_aligned[1:]]
    objs_to_concat = [first] + rest

    # concatenate
    ensemble_dim = xr.DataArray(member_ids, dims='member_id', name='member_id')
    ds = xr.concat(objs_to_concat, dim=ensemble_dim, coords='minimal')

    # restore non_dim_coords to variables
    non_dim_coords_reset = set(ds.coords) - set(ds.dims)
    ds = ds.reset_coords(non_dim_coords_reset)

    return ds
Example #12
0
def scatter_xarray(x,
                   y,
                   hue="location",
                   time="time",
                   ax=None,
                   window=1,
                   xlim=None,
                   ylim=None,
                   **kwargs):
    if ax is None:
        _, ax = subplots()

    if window != 1:
        x = x.rolling({time: window}, center=True).mean().dropna(time)
        y = y.rolling({time: window}, center=True).mean().dropna(time)

    x, y = xr.align(x, y)

    for h, color in zip(x[hue].values, itertools.cycle(sns.color_palette())):
        xx = x.sel(**{hue: h}).values
        yy = y.sel(**{hue: h}).values
        ax.plot(xx, yy, "-", color=color, alpha=0.3, linewidth=2)
        ax.plot(xx[-1:], yy[-1:], "o", color=color, label=h, **kwargs)
        xp = xx[-1:] * 1.05
        yp = yy[-1:]
        if (xlim is None or xlim[0] < xp < xlim[1]) and \
                (ylim is None or ylim[0] < yp < ylim[1]):
            ax.annotate(h, (xp, yp), color=color)

    return ax
Example #13
0
 def test_align_and_fillna_complex(self):
     ds1 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'],
                             cases=[(1j, 10), (2j, 20)],
                             var_names=['x', 'y'],
                             var_dims=[['time']],
                             var_coords={'time':
                                         ['a', 'b', 'c', 'd', 'e']})
     ds2 = case_runner_to_ds(foo2_zarray1_zarray2, fn_args=['a', 'b'],
                             cases=[(2j, 10), (1j, 20)],
                             var_names=['x', 'y'],
                             var_dims=[['time']],
                             var_coords={'time':
                                         ['a', 'b', 'c', 'd', 'e']})
     assert not np.logical_not(np.isnan(ds1['x'].data)).all()
     assert not np.logical_not(np.isnan(ds1['y'].data)).all()
     assert not np.logical_not(np.isnan(ds2['x'].data)).all()
     assert not np.logical_not(np.isnan(ds2['y'].data)).all()
     assert all(t == complex for t in (ds1.x.dtype, ds2.x.dtype,
                                       ds1.y.dtype, ds2.y.dtype))
     assert ds1.y.dtype == complex
     assert ds2.y.dtype == complex
     ds1, ds2 = xr.align(ds1, ds2, join='outer')
     fds = ds1.fillna(ds2)
     assert np.logical_not(np.isnan(fds['x'].data)).all()
     assert np.logical_not(np.isnan(fds['y'].data)).all()
Example #14
0
def mix_weights(primary, secondary, max_weight=0.049):
    primary, secondary = xr.align(primary, secondary, join='outer')

    primary = primary.fillna(0)
    secondary = secondary.fillna(0)

    primary_exposure = qnstats.calc_exposure(primary)
    primary_max_exposure = primary_exposure.max('asset')
    primary_abs_sum = abs(primary).sum('asset')

    secondary_exposure = qnstats.calc_exposure(secondary)
    secondary_max_exposure = secondary_exposure.max('asset')
    secondary_abs_sum = abs(secondary).sum('asset')

    # formula
    k = primary_abs_sum * (primary_max_exposure - max_weight) / \
        (secondary_abs_sum * ( max_weight - secondary_max_exposure) )

    k = k.where(k > 0, 0)  # k > 0

    mix = primary + secondary * k
    # normalization
    sum = abs(mix).sum('asset')
    sum = sum.where(sum > 1, 1)
    mix = mix / sum

    return mix
Example #15
0
    def group(self, datasets: VirtualDatasetBag,
              **group_settings: Dict[str, Any]) -> VirtualDatasetBox:
        self._assert(
            'juxtapose' in datasets.bag
            and len(datasets.bag['juxtapose']) == len(self._children),
            "invalid dataset bag")

        groups = [
            product.group(
                VirtualDatasetBag(dataset_bag, datasets.geopolygon,
                                  datasets.product_definitions),
                **group_settings) for product, dataset_bag in zip(
                    self._children, datasets.bag['juxtapose'])
        ]

        aligned_boxes = xarray.align(*[grouped.box for grouped in groups])

        def tuplify(indexes, _):
            return {
                'juxtapose':
                [box.sel(**indexes).item() for box in aligned_boxes]
            }

        return VirtualDatasetBox(
            xr_apply(aligned_boxes[0], tuplify),
            select_unique([grouped.geobox for grouped in groups]),
            select_unique([grouped.load_natively for grouped in groups]),
            merge_dicts([grouped.product_definitions for grouped in groups]),
            geopolygon=select_unique(
                [grouped.geopolygon for grouped in groups]))
Example #16
0
    def read_ds_anew():
        ds = []
        for i, task_name in enumerate(params.tasks):

            if not task_name:
                task_name = params.matrix_directory
                name = params.matrix_labels
            else:
                name = params.matrix_labels[i]

            if params.verbose:
                print(
                    f'\nReading in {task_name} data from directory {params.matrix_directory}...'
                )

            partial, subjects = read_mat_data(
                f'{input_dir}/{params.matrix_directory}/{task_name}')
            nodes = [f'node_{x}' for x in range(partial.shape[-1])]

            partial = xr.DataArray(partial.squeeze(),
                                   coords=[subjects, nodes, nodes],
                                   dims=['subject', 'dim1', 'dim2'],
                                   name=name)

            ds.append(partial)

        ds = xr.align(*ds,
                      join='inner')  # 'inner' takes intersection of ds objects
        ds = xr.merge(ds, compat='override', join='exact')

        return ds
Example #17
0
 def test_align_and_fillna_complex(self):
     ds1 = case_runner_to_ds(foo2_zarray1_zarray2,
                             fn_args=['a', 'b'],
                             cases=[(1j, 10), (2j, 20)],
                             var_names=['x', 'y'],
                             var_dims={('x', 'y'): 'time'},
                             var_coords={'time': ['a', 'b', 'c', 'd', 'e']})
     ds2 = case_runner_to_ds(foo2_zarray1_zarray2,
                             fn_args=['a', 'b'],
                             cases=[(2j, 10), (1j, 20)],
                             var_names=['x', 'y'],
                             var_dims={('x', 'y'): 'time'},
                             var_coords={'time': ['a', 'b', 'c', 'd', 'e']})
     assert not np.logical_not(np.isnan(ds1['x'].data)).all()
     assert not np.logical_not(np.isnan(ds1['y'].data)).all()
     assert not np.logical_not(np.isnan(ds2['x'].data)).all()
     assert not np.logical_not(np.isnan(ds2['y'].data)).all()
     assert all(t == complex for t in (ds1.x.dtype, ds2.x.dtype,
                                       ds1.y.dtype, ds2.y.dtype))
     assert ds1.y.dtype == complex
     assert ds2.y.dtype == complex
     ds1, ds2 = xr.align(ds1, ds2, join='outer')
     fds = ds1.fillna(ds2)
     assert np.logical_not(np.isnan(fds['x'].data)).all()
     assert np.logical_not(np.isnan(fds['y'].data)).all()
Example #18
0
def cor(x, y, time_axis = 0,lagx=0, lagy=0):
    """
    Computes Pearson Correlation coefficient between x and y along time dimension, accounting for given lags (if any)
    Input: Two single- or multi-dimensional xrarray DataArray objects (x and y) which have 'time' as the first dimension
        Default time axis is considered as 0, but can be changed using the 'time_axis' argument.
        Lag values (lagx for input data x, and lagy for input data y) can also be prescribed. Default lag values are zero.
    Output: An xarray DataArray object showing Pearson Correlation coefficient between x and y along the 'time' dimension
        If lag values are provided, the returned object will show lagged correlation.
    """
    #1. Add lag information if any, and shift the data accordingly
    if lagx!=0:
        #If x lags y by 1, x must be shifted 1 step backwards.
        #But as the 'zero-th' value is nonexistant, xr assigns it as invalid (nan). Hence it needs to be dropped
        x   = x.shift(time = -lagx).dropna(dim='time', how = 'all')

    if lagy!=0:
        y   = y.shift(time = -lagy).dropna(dim='time', how = 'all')

    #2. Ensure that the data are properly alinged to each other.
    x,y = xr.align(x,y)

    #3. Compute data length, mean and standard deviation along time dimension for further use:
    n     = x.time.shape[0]
    xmean = x.mean(dim='time')
    ymean = y.mean(dim='time')
    xstd  = x.std(dim='time')
    ystd  = y.std(dim='time')

    #4. Compute covariance along time dimension
    cov   =  np.sum((x - xmean)*(y - ymean), axis=time_axis)/(n)

    #5. Compute correlation along time dimension
    cor   = cov/(xstd*ystd)

    return cor
Example #19
0
 def getGridInformation(self):
     """
     """
     atm = ocn = lnd = None
     try:
         atm = self._getVariableChild("areacella",
                                      synonyms=["area"]).convert("m2")
         atm = atm.ds[atm.varname]
     except Exception as e:
         pass
     try:
         ocn = self._getVariableChild("areacello").convert("m2")
         ocn = ocn.ds[ocn.varname]
     except Exception as e:
         pass
     try:
         lnd = self._getVariableChild("sftlf",
                                      synonyms=["landfrac"]).convert("1")
         lnd = lnd.ds[lnd.varname]
     except Exception as e:
         pass
     if atm is not None and lnd is not None:
         atm, lnd = xr.align(atm, lnd, join='override', copy=False)
     if atm is not None: self.area_atm = atm
     if ocn is not None: self.area_ocn = ocn
     if lnd is not None: self.frac_lnd = lnd
     for child in self.children:
         self.children[child].getGridInformation()
Example #20
0
def align_bug_reproduce():
    dims = ("x", "y")
    shape = (10, 5)
    das = []
    for j in (0, 1):
        data = np.full(shape, np.nan, dtype="float64")
        for i in range(shape[0]):
            data[i, i % shape[1]] = float(i)
        coords_d = {
            "ints": ("x", range(j * shape[0], (j + 1) * shape[0])),
            "nans": ("x", np.array([np.nan] * shape[0], dtype="float64")),
            "lower": ("y", list(string.ascii_lowercase[:shape[1]]))
        }
        da = xr.DataArray(data=data, dims=dims, coords=coords_d)
        da.set_index(append=True,
                     inplace=True,
                     x=["ints", "nans"],
                     y=["lower"])
        das.append(da)
    nonzeros_raw = [np.nonzero(~np.isnan(da)) for da in das]
    print("nonzeros_raw: ")
    print(nonzeros_raw)
    aligned = xr.align(*das, join="outer")
    nonzeros_aligned = [np.nonzero(~np.isnan(da)) for da in aligned]
    print("nonzeros_aligned: ")
    print(nonzeros_aligned)
    assert nonzeros_raw[0].shape == nonzeros_aligned[0].shape
Example #21
0
def xrmerge(das, accept_new=True):
    """
    Merges xarrays with different dimension sets
    Parameters
    ----------
    das : list of data_arrays

    accept_new

    Returns
    -------
    da : an xarray that is the merge of das

    References
    ----------
    Thanks to @jcmgray https://github.com/pydata/xarray/issues/742#issue-130753818

    In the future, we may not need this as xarray may provide the merge for us.
    """
    da = das[0]
    for new_da in das[1:]:
        # Expand both to have same dimensions, padding with NaN
        da, new_da = xr.align(da, new_da, join='outer')
        # Fill NaNs one way or the other re. accept_new
        da = new_da.fillna(da) if accept_new else da.fillna(new_da)
    return da
Example #22
0
    def setUp(self):
        self.relief_map = xr.open_rasterio(test_relief,
                                           parse_coordinates=True)[0]
        self.slope_map = xr.open_rasterio(test_slope,
                                          parse_coordinates=True)[0]
        self.pga = xr.open_dataset(test_pga)
        self.saturation = xr.open_rasterio(test_saturation,
                                           parse_coordinates=True)[0]
        self.friction = xr.open_rasterio(test_friction,
                                         parse_coordinates=True)[0]
        self.cohesion = xr.open_rasterio(test_cohesion,
                                         parse_coordinates=True)[0]
        self.Dn_single = xr.open_dataset(test_Dn_single)["Dn"]
        self.Dn_set = xr.open_dataset(test_Dn_set)

        (
            self.relief_map,
            self.slope_map,
            self.pga,
            self.saturation,
            self.friction,
            self.cohesion,
        ) = xr.align(
            self.relief_map,
            self.slope_map,
            self.pga,
            self.saturation,
            self.friction,
            self.cohesion,
            join="override",
        )
Example #23
0
def check_forward_looking(cropped_output, whole_output):
    cropped_output = sort_and_crop_output(cropped_output)
    whole_output = sort_and_crop_output(whole_output)

    max_time = min(cropped_output.coords[ds.TIME].values.max(),
                   whole_output.coords[ds.TIME].values.max())

    cropped_output = cropped_output.loc[:max_time]
    whole_output = whole_output.loc[:max_time]

    cropped_output, whole_output = xr.align(cropped_output,
                                            whole_output,
                                            join='outer')

    cropped_output = cropped_output.fillna(0)
    whole_output = whole_output.fillna(0)

    diff = whole_output - cropped_output
    # print(diff.where(diff!=0).dropna('time', 'all').dropna('asset','all'))
    delta = abs(diff).max().values
    if delta > FORWARD_LOOKING_TEST_DELTA:
        print('WARNING: This strategy uses forward looking! Delta = ' +
              str(delta))
        return True
    else:
        print('Ok. There is no forward looking.')
        return False
Example #24
0
    def set_dataset(self, array_list, align_type="outer"):
        """

        :param array_list: list of xarrays
        :type array_list: list of :class:`mth5.timeseries.ChannelTS` objects
        :param align_type: how the different times will be aligned
            * ’outer’: use the union of object indexes
            * ’inner’: use the intersection of object indexes
            * ’left’: use indexes from the first object with each dimension
            * ’right’: use indexes from the last object with each dimension
            * ’exact’: instead of aligning, raise ValueError when indexes to
            be aligned are not equal
            * ’override’: if indexes are of same size, rewrite indexes to
            be those of the first object with that dimension. Indexes for
            the same dimension must have the same size in all objects.
        :type align_type: string

        """
        if isinstance(array_list, (list, tuple)):
            x_array_list = self._validate_array_list(array_list)

            # first need to align the time series.
            x_array_list = xr.align(*x_array_list, join=align_type)

            # input as a dictionary
            xdict = dict([(x.component.lower(), x) for x in x_array_list])
            self._dataset = xr.Dataset(xdict)

        elif isinstance(array_list, xr.Dataset):
            self._dataset = array_list

        self.validate_metadata()
        self._dataset.attrs.update(self.run_metadata.to_dict(single=True))
Example #25
0
    def group(self, datasets: VirtualDatasetBag,
              **search_terms: Dict[str, Any]) -> VirtualDatasetBox:
        self._assert(
            'juxtapose' in datasets.pile
            and len(datasets.pile['juxtapose']) == len(self._children),
            "invalid dataset pile")

        groups = [
            product.group(
                VirtualDatasetBag(dataset_pile, datasets.geopolygon,
                                  datasets.product_definitions),
                **search_terms) for product, dataset_pile in zip(
                    self._children, datasets.pile['juxtapose'])
        ]

        aligned_piles = xarray.align(*[grouped.pile for grouped in groups])

        def tuplify(indexes, _):
            return {
                'juxtapose':
                [pile.sel(**indexes).item() for pile in aligned_piles]
            }

        return VirtualDatasetBox(
            xr_apply(aligned_piles[0], tuplify),
            select_unique([grouped.geobox for grouped in groups]),
            merge_dicts([grouped.product_definitions for grouped in groups]))
Example #26
0
    def _process(self, overlay, key=None):
        if not isinstance(overlay, CompositeOverlay):
            return overlay
        elif len(overlay) == 1:
            return overlay.last if isinstance(overlay, NdOverlay) else overlay.get(0)

        imgs = []
        for rgb in overlay:
            if not isinstance(rgb, RGB):
                raise TypeError('stack operation expect RGB type elements, '
                                'not %s name.' % type(rgb).__name__)
            rgb = rgb.rgb
            dims = [kd.name for kd in rgb.kdims][::-1]
            coords = {kd.name: rgb.dimension_values(kd, False)
                      for kd in rgb.kdims}
            imgs.append(tf.Image(self.uint8_to_uint32(rgb), coords=coords, dims=dims))

        try:
            imgs = xr.align(*imgs, join='exact')
        except ValueError:
            raise ValueError('RGB inputs to stack operation could not be aligned, '
                             'ensure they share the same grid sampling.')

        stacked = tf.stack(*imgs, how=self.p.compositor)
        arr = shade.uint32_to_uint8(stacked.data)[::-1]
        data = (coords[dims[1]], coords[dims[0]], arr[:, :, 0],
                arr[:, :, 1], arr[:, :, 2])
        if arr.shape[-1] == 4:
            data = data + (arr[:, :, 3],)
        return rgb.clone(data, datatype=[rgb.interface.datatype]+rgb.datatype)
Example #27
0
def divide_sessions_old(temp_comp, div_dict):
    temp_comp_div_list = []
    len_frame = len(temp_comp.coords['frame'])
    for s_orig, s_group in temp_comp.groupby('session_id'):
        if s_orig in div_dict.keys():
            div = div_dict[s_orig]
            s_new_array = np.empty(len_frame, dtype='U10')
            sd_new_array = np.empty(len_frame)
            nan_mask = np.full(len_frame, False)
            for s_new, sd_new in div.items():
                b, e = sd_new
                s_new_array[b:e] = s_new
                sd_new_array[b:e] = np.arange(e - b)
                nan_mask[b:e] = True
            s_new_array[nan_mask] = 'trivial'
            sd_new_array[nan_mask] = np.arange(np.sum(nan_mask))
            idx_new = pd.MultiIndex.from_arrays(
                [s_new_array, sd_new_array],
                names=['segment_id', 'frame_split'])
            s_group.coords['frame'] = idx_new
        else:
            s_new_array = np.full(len_frame, 'all')
            idx_new = pd.MultiIndex.from_arrays(
                [s_new_array, s_group.coords['frame']],
                names=['segment_id', 'frame_split'])
            s_group.coords['frame'] = idx_new
        s_group.unstack('frame').rename({'frame_split': 'frame'})
        print("finished unstacking for" + s_orig)
        temp_comp_div_list.append(s_group)
    temp_comp_div = xr.align(
        *temp_comp_div_list,
        copy=False,
        join='outer',
        exclude=('animal', 'mapping_id'))
    return xr.concat(temp_comp_div, dim='session_id')
Example #28
0
def fix_grid_continuity(dset):
    # if grid already continuos don't do anything.
    if check_grid_continuity(dset):
        return dset

    xvv = dset.x.values
    yvv = dset.y.values

    xlim = [xvv[0], xvv[-1]]
    ylim = [yvv[0], yvv[-1]]

    xindx = np.arange(xlim[0], xlim[1] + 1)
    yindx = np.arange(ylim[0], ylim[1] + 1)

    mgrid = get_latlongrid(dset, xindx, yindx)
    # mgrid = get_even_latlongrid(dset, xlim, ylim)
    conc = np.zeros_like(mgrid[0])
    dummy = xr.DataArray(conc, dims=["y", "x"])
    dummy = dummy.assign_coords(latitude=(("y", "x"), mgrid[1]))
    dummy = dummy.assign_coords(longitude=(("y", "x"), mgrid[0]))
    dummy = dummy.assign_coords(x=(("x"), xindx))
    dummy = dummy.assign_coords(y=(("y"), yindx))
    cdset, dummy2 = xr.align(dset, dummy, join="outer")
    cdset = cdset.assign_coords(latitude=(("y", "x"), mgrid[1]))
    cdset = cdset.assign_coords(longitude=(("y", "x"), mgrid[0]))
    return cdset.fillna(0)
Example #29
0
 def _integrate_space(self, region=None, mean=False):
     assert "cell_measure" in self.ds
     ds = self.ds if region is None else ilamb_regions.maskedDataset(
         region, self)
     da = ds[self.varname]
     cm = ds['cell_measure']
     v, dx = xr.align(da,
                      xr.where(cm < 1, np.nan, cm),
                      join='override',
                      copy=False)
     out = (v * dx).sum(dx.dims)
     units = Unit(self.ds[self.varname].attrs['units'])
     out.attrs = {
         key: a
         for key, a in v.attrs.items() if "cell_" not in key
     }
     if 'ilamb' not in out.attrs: out.attrs['ilamb'] = ''
     out.attrs['ilamb'] += "integrate(dim='space',mean=%s%s); " % (
         mean, "" if region is None else ",region='%s'" % region)
     if mean:
         mask = da.isnull()
         dims = set(mask.dims).difference(set(dx.dims))
         if dims: mask = mask.all(dims)
         out /= (dx * (mask == False)).sum()
     else:
         if 'm-2' in str(units):
             units = str(units).replace("m-2", "")
         else:
             units *= Unit('m2')
     out.attrs['units'] = str(units)
     tm = self.ds.time_measure if "time_measure" in self.ds else None
     v = Variable(da=out, varname=str(da.name) + "_sint", time_measure=tm)
     return v
Example #30
0
def xrmerge(das, accept_new=True):
    """
    Merges xarrays with different dimension sets
    Parameters
    ----------
    das : list of data_arrays

    accept_new

    Returns
    -------
    da : an xarray that is the merge of das

    References
    ----------
    Thanks to @jcmgray https://github.com/pydata/xarray/issues/742#issue-130753818

    In the future, we may not need this as xarray may provide the merge for us.
    """
    da = das[0]
    for new_da in das[1:]:
        # Expand both to have same dimensions, padding with NaN
        da, new_da = xr.align(da, new_da, join='outer')
        # Fill NaNs one way or the other re. accept_new
        da = new_da.fillna(da) if accept_new else da.fillna(new_da)
    return da
Example #31
0
def stack(*imgs, **kwargs):
    """Combine images together, overlaying later images onto earlier ones.

    Parameters
    ----------
    imgs : iterable of Image
        The images to combine.
    how : str, optional
        The compositing operator to combine pixels. Default is `'over'`.
    """
    if not imgs:
        raise ValueError("No images passed in")
    shapes = []
    for i in imgs:
        if not isinstance(i, Image):
            raise TypeError("Expected `Image`, got: `{0}`".format(type(i)))
        elif not shapes:
            shapes.append(i.shape)
        elif shapes and i.shape not in shapes:
            raise ValueError("The stacked images must have the same shape.")

    name = kwargs.get('name', None)
    op = composite_op_lookup[kwargs.get('how', 'over')]
    if len(imgs) == 1:
        return imgs[0]
    imgs = xr.align(*imgs, copy=False, join='outer')
    with np.errstate(divide='ignore', invalid='ignore'):
        out = tz.reduce(tz.flip(op), [i.data for i in imgs])
    return Image(out, coords=imgs[0].coords, dims=imgs[0].dims, name=name)
Example #32
0
def create_and_train_models(data):
    asset_name_all = data.coords['asset'].values

    data = data.sel(time=slice(
        '2013-05-01', None))  # cut the head before 2013-05-01 (a lot of noise)

    features_all = get_features(data)
    target_all = get_target_classes(data)

    models = dict()

    for asset_name in asset_name_all:
        target_cur = target_all.sel(asset=asset_name).dropna('time', 'any')
        features_cur = features_all.sel(asset=asset_name).dropna('time', 'any')

        # align features and targets
        target_for_learn_df, feature_for_learn_df = xr.align(target_cur,
                                                             features_cur,
                                                             join='inner')

        if len(features_cur.time) < 10:
            # not enough points for training
            continue

        model = create_model()
        try:
            model.fit(feature_for_learn_df.values, target_for_learn_df)
            models[asset_name] = model
        except KeyboardInterrupt as e:
            raise e
        except:
            logging.exception("model training failed")

    return models
Example #33
0
def get_decoupling_dataset(time_range=None, window='3H'):
    ceilometer = get_dataset('ceilometer', time_range=time_range)
    first_cbh = ceilometer['first_cbh']
    first_cbh = first_cbh[first_cbh.values < 3000.].resample(
        window, dim='time', how=lambda x, axis=None: np.percentile(x, q=95, axis=axis))
    cf_dataset = get_dataset('cloud_fraction', time_range=time_range, window=window, label='left')
    cloud_fraction, first_cbh = xarray.align(cf_dataset['low_cloud_fraction'], first_cbh)
    first_cbh[cloud_fraction < 0.5] = np.nan
    surface = get_dataset('marmet', time_range=time_range).xarray.resample(
        window, dim='time', how='mean')
    surface, first_cbh = xarray.align(surface, first_cbh, join='outer')
    zlcl = zlcl_from_T_RH(surface['air_temperature'], surface['relative_humidity'])
    data_vars = {
        'LCL': (['time'], zlcl, {'units': 'm'}),
        'cbh': (['time'], first_cbh, {'units': 'm'}),
    }
    coords = {'time': (['time'], surface['time'])}
    return Dataset(xarray.Dataset(data_vars, coords))
Example #34
0
def run(params):
    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, signal_field = params

    # Get file paths
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/stats_semi/signal/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc'\
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    output_file = '{:s}/stats_semi_signal_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(output_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc'

    # Load data to memory and align coordinates
    with xr.open_dataarray(signal_file) as da:
        signal = da.load()
    with xr.open_dataarray(mask_file) as da:
        mask = da.load()
    # Load one noise file to get coordinates.
    noise = xr.open_dataarray(
        '/scratch/pkittiwi/fg1p/noise_map/bin0.08/fbw8.00/theta90.0/shift0/'
        'noise_map_bin0.08_fbw8.00_theta90.0_shift0_333.nc'
    )
    for key, values in noise.coords.items():
        signal.coords[key] = values
        mask.coords[key] = values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Mask observation
    signal = signal.where(mask == 1)

    # Calculate statistic
    out = get_stats(signal)
    out.attrs = {'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth,
                 'theta': theta, 'shift': shift}

    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    out.close()

    print('Finish. signal_file = {:s}. output_file = {:s}. '
          'Time spent {:.5f} sec.'
          .format(signal_file, output_file,
                  (datetime.now() - start_time).total_seconds()))
def stack(*imgs):
    """Merge a number of images together, overlapping earlier images with
    later ones."""
    _validate_images(imgs)
    if len(imgs) == 1:
        return imgs[0]
    imgs = xr.align(*imgs, copy=False, join='outer')
    out = imgs[0].data.copy()
    for img in imgs[1:]:
        out = np.where(_to_channels(img.data)['a'] == 0, out, img.data)
    return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
def merge(*imgs):
    """Merge a number of images together, averaging the channels"""
    _validate_images(imgs)
    if len(imgs) == 1:
        return imgs[0]
    imgs = xr.align(*imgs, copy=False, join='outer')
    coords, dims = imgs[0].coords, imgs[0].dims
    imgs = _to_channels(np.stack([i.data for i in imgs]))
    r = imgs['r'].mean(axis=0, dtype='f8').astype('uint8')
    g = imgs['g'].mean(axis=0, dtype='f8').astype('uint8')
    b = imgs['b'].mean(axis=0, dtype='f8').astype('uint8')
    a = imgs['a'].mean(axis=0, dtype='f8').astype('uint8')
    out = np.dstack([r, g, b, a]).view(np.uint32).reshape(a.shape)
    return Image(out, coords=coords, dims=dims)
def stack(*imgs, **kwargs):
    """Combine images together, overlaying later images onto earlier ones.

    Parameters
    ----------
    imgs : iterable of Image
        The images to combine.
    how : str, optional
        The compositing operator to combine pixels. Default is `'over'`.
    """
    if not imgs:
        raise ValueError("No images passed in")
    for i in imgs:
        if not isinstance(i, Image):
            raise TypeError("Expected `Image`, got: `{0}`".format(type(i)))
    op = composite_op_lookup[kwargs.get('how', 'over')]
    if len(imgs) == 1:
        return imgs[0]
    imgs = xr.align(*imgs, copy=False, join='outer')
    out = tz.reduce(tz.flip(op), [i.data for i in imgs])
    return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
Example #38
0
def aggregate(*dss, accept_new=False):
    """ Aggregates xarray Datasets and DataArrays """
    # TODO: overwrite option, rather than accept_new, raise error if not
    # TODO: rename --> aggregate, look into, part_align -> concat.
    # TODO: check if result var is all non-nan and could be all same dtype

    if accept_new:
        dss = tuple(reversed(dss))

    ds = dss[0]
    for new_ds in dss[1:]:
        # First make sure both datasets have the same variables
        for data_var in new_ds.data_vars:
            if data_var not in ds.data_vars:
                ds[data_var] = np.nan
        # Expand both to have same dimensions, padding with NaN
        ds, _ = xr.align(ds, new_ds, join="outer")
        # assert all(ds.loc[new_ds.coords].isnull())
        # Fill NaNs one way or the other w.r.t. accept_new
        ds = ds.fillna(new_ds)
    return ds
Example #39
0
 def test_align_and_fillna_int(self):
     ds1 = case_runner_to_ds(foo2_array_array, fn_args=['a', 'b'],
                             cases=[(1, 10), (2, 20)],
                             var_names=['x', 'y'],
                             var_dims=[['time']],
                             var_coords={'time':
                                         ['a', 'b', 'c', 'd', 'e']})
     ds2 = case_runner_to_ds(foo2_array_array, fn_args=['a', 'b'],
                             cases=[(2, 10), (1, 20)],
                             var_names=['x', 'y'],
                             var_dims=[['time']],
                             var_coords={'time':
                                         ['a', 'b', 'c', 'd', 'e']})
     assert not np.logical_not(ds1['x'].isnull()).all()
     assert not np.logical_not(ds1['y'].isnull()).all()
     assert not np.logical_not(ds2['x'].isnull()).all()
     assert not np.logical_not(ds2['y'].isnull()).all()
     ds1, ds2 = xr.align(ds1, ds2, join='outer')
     fds = ds1.fillna(ds2)
     assert np.logical_not(fds['x'].isnull()).all()
     assert np.logical_not(fds['y'].isnull()).all()
Example #40
0
        try:
            netwtsd = pickle.load(f, encoding='latin1')
        except:
            netwtsd = pickle.load(f)
            
#%%
wt_av_cov = spatial_weight_normcov(netwtsd) 
resp_av_cov = spatial_resp_normcov(da) 
k_pos, k_stim = kurtosis_da(da)
#%%
pwr = tot_var(da)
#non_k_var = (k_pos<42) * (k_pos>2) * (pwr>0) *(k_stim<42) * (k_stim>2)
#resp_av_cov = resp_av_cov[non_k_var]

#%%
wt_av_cov, resp_av_cov = xr.align(wt_av_cov, resp_av_cov, join='inner')
layer_labels_ind = np.array(map(str, wt_av_cov.coords['layer_label'].values))

n_plots = len(np.unique(layer_labels_ind))
plt.figure(figsize=(12,3))
layer_labels = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6']

for i, layer in enumerate(layer_labels[1:]):
    plt.subplot(1, n_plots, i+1)
    x = wt_av_cov[layer_labels_ind==layer].values
    y = resp_av_cov[layer_labels_ind==layer].values
    if i<4:
        s=4
    else:
        s=1
    plt.scatter(x, y, s=s, color='k', edgecolors='none')
Example #41
0
    rx = np.double(np.squeeze(mat['data'][0][0][0]))
    ry = np.double(np.squeeze(mat['data'][0][0][1]))
    #print ry
    rfDiameter.append(np.sqrt( rx**2 + ry**2 )*0.625 + 40)

    transPos.append(np.squeeze(mat['data'][0][0][2]))
    resps.append(np.squeeze(mat['data'][0][0][3]))

#lets get svd measurements over cells
#originally: resps cellXposXrotXshape --> converted to cell X pos X unique_shape
cell_resps = [np.dstack(cell).T.reshape(cell.shape[0], np.prod(cell[0].shape))
             for cell in resps]

## putting yasmin data into data_array
lsxr = [xr.DataArray(aresp, dims=['x','shapes']) for aresp in cell_resps]
resp= xr.concat(xr.align(*lsxr, join='outer'), dim='cells')
resp.to_dataset('resp').to_netcdf(top_dir + 'data/an_results/v4_ti_resp.nc')

print([cell[2].shape==cell[0].shape for cell in resps])


#acell = cell_resps[0]
#acell = acell - np.mean(acell, 1, keepdims=True)
#u, s, v = np.linalg.svd(acell, full_matrices=False)
##use first princomp
#recell = np.dot(np.expand_dims(u[:,0],1), np.expand_dims(v[0,:]*s[0],0))
##convince myself these are all the same
#np.corrcoef(acell.ravel(), recell.ravel())
#np.dot(acell.ravel(), recell.ravel()) / (np.linalg.norm(acell.ravel())*np.linalg.norm(recell.ravel()))
#(s[0]**2/sum(s**2))**0.5
    rx = np.double(np.squeeze(mat['data'][0][0][0]))
    ry = np.double(np.squeeze(mat['data'][0][0][1]))
    #print ry
    rfDiameter.append(np.sqrt( rx**2 + ry**2 )*0.625 + 40)

    transPos.append(np.squeeze(mat['data'][0][0][2]))
    resps.append(np.squeeze(mat['data'][0][0][3]))

#lets get svd measurements over cells
#originally: resps cellXposXrotXshape --> converted to cell X pos X unique_shape
cell_resps = [np.dstack(cell).T.reshape(cell.shape[0], np.prod(cell[0].shape))
             for cell in resps]

# putting yasmin data into data_array
lsxr = [xr.DataArray(aresp, dims=['x','shapes']) for aresp in cell_resps]
resp = xr.concat(xr.align(*lsxr, join='outer'), dim='unit')
resp = resp #convert to spk/s 300 ms averaging window

resp.to_dataset('resp').to_netcdf(top_dir + 'data/an_results/v4_ti_resp.nc')


#apc 109
m = l.loadmat(top_dir + 'data/responses/V4_370PC2001.mat')
v4=m['resp'][0][0]
v4_da = xr.DataArray(v4.T, dims=['shapes', 'unit']).chunk()
#adjustment for repeats [ 14, 15, 16,17, 318, 319, 320, 321]
#a = np.hstack((range(14), range(18,318)))
#a = np.hstack((a, range(322, 370)))
#v4_da = v4_da[a, :]
v4_da = v4_da.to_dataset('resp')
v4_da.to_netcdf(top_dir + 'data/responses/V4_370PC2001.nc')
Example #43
0
def run(params):
    print("Calculating bin_width={:.2f} MHz, filter_bandwidth={:.2f} MHz,"
          "theta={:.1f}, shift={:d}, signal_field={:d}, noise_multiplier={:.3f}"
          .format(*params))

    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, \
        signal_field, noise_multiplier = params

    nnoise = 500

    # Get file path
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \
                'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                shift)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    noise_file = [
        '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_theta{:.1f}_shift{:d}_{:03d}.nc'
        .format(noise_dir, bin_width, filter_bandwidth,
                theta, shift, noise_field)
        for noise_field in range(nnoise)
    ]
    output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}_all.nc' \
        .format(output_dir, noise_multiplier, bin_width, filter_bandwidth,
                theta, shift, signal_field)

    # Load data
    signal = xr.open_dataarray(signal_file)
    noise = xr.open_mfdataset(
        noise_file, concat_dim=pd.Index(range(nnoise), name='noise_field'),
        autoclose=True
    )  # open as dask.array, chunk over noise field
    mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc')

    # Align coordinates - they must match for XArray broadcasting
    for key in ['x', 'y', 'f']:
        signal.coords[key] = noise.coords[key].values
        mask.coords[key] = noise.coords[key].values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Make observation
    signal = signal.where(mask == 1).stack(s=('x', 'y'))
    noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier
    obs = signal + noise
    del signal

    # Calculate noise variance
    noise_var = noise.var(dim='s')
    del noise

    # Calculate biased moments
    m2_biased = xr_moment(obs, 's', order=2)
    m3_biased = xr_moment(obs, 's', order=3)
    m4_biased = xr_moment(obs, 's', order=4)
    del obs

    # Calculate unbiased moments
    m2_unbiased = m2_biased - noise_var
    m3_unbiased = m3_biased
    m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \
        (3 * noise_var ** 2)
    # Note: the second term in m4_unbiased is actually "m2_true", which we
    # estimate here with m2_unbiased

    # Calculate biased vsk
    v_biased = m2_biased
    s_biased = m3_biased / m2_biased ** (3 / 2)
    k_biased = (m4_biased / m2_biased ** 2) - 3

    # Calculate unbiased vsk
    v_unbiased = m2_unbiased
    s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2)
    k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3

    # # Distribute computation on the cluster
    # m2_biased, m3_biased, m4_biased, \
    #     m2_unbiased, m3_unbiased, m4_unbiased,\
    #     v_biased, s_biased, k_biased, \
    #     v_unbiased, s_unbiased, k_unbiased = dask.compute(
    #         m2_biased, m3_biased, m4_biased,
    #         m2_unbiased, m3_unbiased, m4_unbiased,
    #         v_biased, s_biased, k_biased,
    #         v_unbiased, s_unbiased, k_unbiased
    #     )

    # Rename variables and merge
    m2_biased = m2_biased.rename({'__xarray_dataarray_variable__': 'm2_biased'})
    m3_biased = m3_biased.rename({'__xarray_dataarray_variable__': 'm3_biased'})
    m4_biased = m4_biased.rename({'__xarray_dataarray_variable__': 'm4_biased'})
    m2_unbiased = m2_unbiased.rename(
        {'__xarray_dataarray_variable__': 'm2_unbiased'})
    m3_unbiased = m3_unbiased.rename(
        {'__xarray_dataarray_variable__': 'm3_unbiased'})
    m4_unbiased = m4_unbiased.rename(
        {'__xarray_dataarray_variable__': 'm4_unbiased'})
    v_biased = v_biased.rename({'__xarray_dataarray_variable__': 'v_biased'})
    s_biased = s_biased.rename({'__xarray_dataarray_variable__': 's_biased'})
    k_biased = k_biased.rename({'__xarray_dataarray_variable__': 'k_biased'})
    v_unbiased = v_unbiased.rename(
        {'__xarray_dataarray_variable__': 'v_unbiased'})
    s_unbiased = s_unbiased.rename(
        {'__xarray_dataarray_variable__': 's_unbiased'})
    k_unbiased = k_unbiased.rename(
        {'__xarray_dataarray_variable__': 'k_unbiased'})
    out = xr.merge([m2_biased, m3_biased, m4_biased,
                    m2_unbiased, m3_unbiased, m4_unbiased,
                    v_biased, s_biased, k_biased,
                    v_unbiased, s_unbiased, k_unbiased])

    # Distribute computation on the cluster
    out = out.compute()

    # Save output
    out.attrs = {'signal_field': signal_field,
                 'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                 'filter_bandwidth': filter_bandwidth, 'theta': theta,
                 'shift': shift}
    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    print(
        'Finish {:s}. Time spent: {:.5f} minutes'
        .format(signal_file, (datetime.now() - start_time).total_seconds() / 60)
    )
Example #44
0
def run(params):
        bin_width, filter_bandwidth, theta, shift, \
            signal_field, noise_field, noise_multiplier = params

        # Get file path
        signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
                     'fbw{:.2f}/theta{:.1f}/shift{:d}' \
            .format(bin_width, filter_bandwidth, theta, shift)
        noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \
                    'fbw{:.2f}/theta{:.1f}/shift{:d}' \
            .format(bin_width, filter_bandwidth, theta, shift)
        output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \
                     'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \
            .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                    shift, signal_field)
        signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
                      'theta{:.1f}_shift{:d}_{:03d}.nc' \
            .format(signal_dir, bin_width, filter_bandwidth,
                    theta, shift, signal_field)
        noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \
                     'theta{:.1f}_shift{:d}_{:03d}.nc' \
            .format(noise_dir, bin_width, filter_bandwidth,
                    theta, shift, noise_field)
        output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
                      'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \
            .format(output_dir, noise_multiplier, bin_width, filter_bandwidth,
                    theta, shift, signal_field, noise_field)

        # Load data
        signal = xr.open_dataarray(signal_file)
        noise = xr.open_dataarray(noise_file)
        mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc')
        for key, values in noise.coords.items():
            signal.coords[key] = values
            mask.coords[key] = values
        signal, noise, mask = xr.align(signal, noise, mask)

        # Make observation
        signal = signal.where(mask == 1).stack(s=('x', 'y'))
        noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier
        obs = signal + noise

        # Get noise variance
        noise_var = moment(noise.values, moment=2, axis=-1, nan_policy='omit')

        # Get biased moments
        m2_biased = moment(obs.values, moment=2, axis=-1, nan_policy='omit')
        m3_biased = moment(obs.values, moment=3, axis=-1, nan_policy='omit')
        m4_biased = moment(obs.values, moment=4, axis=-1, nan_policy='omit')

        # Get unbiased moments
        m2_unbiased = m2_biased - noise_var
        m3_unbiased = m3_biased
        m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \
            (3 * noise_var ** 2)

        # Get biased vsk
        v_biased = m2_biased
        s_biased = m3_biased / m2_biased ** (3 / 2)
        k_biased = (m4_biased / m2_biased ** 2) - 3

        # Get unbiased vsk
        v_unbiased = m2_unbiased
        s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2)
        k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3

        # Save output
        out = xr.Dataset(
            {'m2_biased': (['f'], m2_biased),
             'm3_biased': (['f'], m3_biased),
             'm4_biased': (['f'], m4_biased),
             'm2_unbiased': (['f'], m2_unbiased),
             'm3_unbiased': (['f'], m3_unbiased),
             'm4_unbiased': (['f'], m4_unbiased),
             'v_biased': (['f'], v_biased),
             's_biased': (['f'], s_biased),
             'k_biased': (['f'], k_biased),
             'v_unbiased': (['f'], v_unbiased),
             's_unbiased': (['f'], s_unbiased),
             'k_unbiased': (['f'], k_unbiased)},
            coords={'f': noise.coords['f']},
            attrs={
                'signal_field': signal_field, 'noise_field': noise_field,
                'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                'filter_bandwidth': filter_bandwidth, 'theta': theta,
                'shift': shift
            }
        )
        os.makedirs(output_dir, exist_ok=True)
        out.to_netcdf(output_file)

        print(
            'Finish. signal_file = {:s}. noise_file = {:s}. output_file = {:s}.'
            .format(signal_file, noise_file, output_file)
        )
Example #45
0
def run(params, chunks=(1, 256, 256)):
    print("Calculating bin_width={:.2f} MHz, filter_bandwidth={:.2f} MHz,"
          "theta={:.1f}, shift={:d}, signal_field={:d}, noise_field={:d}, "
          "noise_multiplier={:.3f}"
          .format(*params))

    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, \
        signal_field, noise_field, noise_multiplier = params

    # Get file path
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \
                'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \
        .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                shift, signal_field)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \
                 'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(noise_dir, bin_width, filter_bandwidth,
                theta, shift, noise_field)
    output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \
        .format(output_dir, noise_multiplier, bin_width, filter_bandwidth,
                theta, shift, signal_field, noise_field)

    # Load data
    chunks_dict = {'f': chunks[0], 'y': chunks[1], 'x': chunks[2]}
    signal = xr.open_dataarray(signal_file, chunks=chunks_dict)
    noise = xr.open_dataarray(noise_file, chunks=chunks_dict)
    mask = xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc',
                             chunks=chunks_dict)

    # Align coordinates - they must match for XArray broadcasting
    for key in ['x', 'y', 'f']:
        signal.coords[key] = noise.coords[key].values
        mask.coords[key] = noise.coords[key].values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Make observation
    signal = signal.where(mask == 1).stack(s=('x', 'y'))
    noise = noise.where(mask == 1).stack(s=('x', 'y')) * noise_multiplier
    obs = signal + noise

    # Calculate noise variance
    noise_var = noise.var(dim='s')

    # Calculate biased moments
    m2_biased = xr_moment(obs, 's', order=2)
    m3_biased = xr_moment(obs, 's', order=3)
    m4_biased = xr_moment(obs, 's', order=4)

    # Calculate unbiased moments
    m2_unbiased = m2_biased - noise_var
    m3_unbiased = m3_biased
    m4_unbiased = m4_biased - (6 * m2_unbiased * noise_var) - \
        (3 * noise_var ** 2)
    # Note: the second term in m4_unbiased is actually "m2_true", which we
    # estimate here with m2_unbiased

    # Calculate biased vsk
    v_biased = m2_biased
    s_biased = m3_biased / m2_biased ** (3 / 2)
    k_biased = (m4_biased / m2_biased ** 2) - 3

    # Calculate unbiased vsk
    v_unbiased = m2_unbiased
    s_unbiased = m3_unbiased / m2_unbiased ** (3 / 2)
    k_unbiased = (m4_unbiased / m2_unbiased ** 2) - 3

    # Rename variables and merge
    m2_biased.name = 'm2_biased'
    m3_biased.name = 'm3_biased'
    m4_biased.name = 'm4_biased'
    m2_unbiased.name = 'm2_unbiased'
    m3_unbiased.name = 'm3_unbiased'
    m4_unbiased.name = 'm4_unbiased'
    v_biased.name = 'v_biased'
    s_biased.name = 's_biased'
    k_biased.name = 'k_biased'
    v_unbiased.name = 'v_unbiased'
    s_unbiased.name = 's_unbiased'
    k_unbiased.name = 'k_unbiased'
    out = xr.merge([m2_biased, m3_biased, m4_biased,
                    m2_unbiased, m3_unbiased, m4_unbiased,
                    v_biased, s_biased, k_biased,
                    v_unbiased, s_unbiased, k_unbiased])

    # Distribute computation on the cluster
    out = out.compute()

    # Save output
    out.attrs = {'signal_field': signal_field,
                 'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                 'filter_bandwidth': filter_bandwidth, 'theta': theta,
                 'shift': shift}
    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    print(
        'Finish {:s}. Time spent: {:.5f} minutes'
        .format(signal_file, (datetime.now() - start_time).total_seconds() / 60)
    )
Example #46
0
def run(params):
    print(params)
    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, \
        signal_field, noise_field, noise_multiplier = params

    # Get file path
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \
                'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/obs_map/obsn{:.1f}/bin{:.2f}/' \
                 'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \
        .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                shift, signal_field)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \
                 'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(noise_dir, bin_width, filter_bandwidth,
                theta, shift, noise_field)
    output_file = '{:s}/obs_map_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
                  'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \
        .format(output_dir, noise_multiplier, bin_width, filter_bandwidth,
                theta, shift, signal_field, noise_field)

    # Load data
    with xr.open_dataarray(signal_file) as ds:
        signal = ds.load()
    with xr.open_dataarray(noise_file) as ds:
        noise = ds.load()
    with xr.open_dataarray('/scratch/pkittiwi/fg1p/hera331_fov_mask.nc') as ds:
        mask = ds.load()

    # Align coordinates - they must match for XArray broadcasting
    for key in ['x', 'y', 'f']:
        signal.coords[key] = noise.coords[key].values
        mask.coords[key] = noise.coords[key].values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Make observation
    signal = signal.where(mask == 1)
    noise = noise.where(mask == 1) * noise_multiplier
    obs = signal + noise
    obs.name = 'obs'
    obs.attrs = {'signal_field': signal_field, 'noise_field': noise_field,
                 'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                 'filter_bandwidth': filter_bandwidth, 'theta': theta,
                 'shift': shift}

    # Calculate noise variance
    noise_var = noise.var(dim=['y', 'x'])
    noise_var.name = 'noise_var'
    noise_var.attrs = {
        'noise_field': noise_field, 'noise_multiplier': noise_multiplier,
        'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth,
        'theta': theta, 'shift': shift
    }

    # Save output
    out = xr.merge([obs, noise_var])
    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    print('Finish {:s}. Time spent: {:.5f} minutes'
          .format(output_file,
                  (datetime.now() - start_time).total_seconds() / 60))

    return 0
Example #47
0
def run(params):
    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, \
        signal_field, noise_field, noise_multiplier = params

    # Get file paths
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    noise_dir = '/scratch/pkittiwi/fg1p/noise_map/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/stats_mc/obsn{:.1f}/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \
        .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                shift, signal_field)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc'\
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    noise_file = '{:s}/noise_map_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc'\
        .format(noise_dir, bin_width, filter_bandwidth,
                theta, shift, noise_field)
    output_file = '{:s}/stats_mc_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \
        .format(output_dir, noise_multiplier, bin_width, filter_bandwidth,
                theta, shift, signal_field, noise_field)
    mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc'
    obs_dir = '/scratch/pkittiwi/fg1p/obs_map/obsn{:.1f}/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}/s{:03d}' \
        .format(noise_multiplier, bin_width, filter_bandwidth, theta,
                shift, signal_field)
    obs_file = '{:s}/obs_map_obsn{:.1f}_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}_{:03d}.nc' \
        .format(obs_dir, noise_multiplier, bin_width, filter_bandwidth,
                theta, shift, signal_field, noise_field)

    # Load data to memory and align coordinates
    with xr.open_dataarray(signal_file) as da:
        signal = da.load()
    with xr.open_dataarray(noise_file) as da:
        noise = da.load()
    with xr.open_dataarray(mask_file) as da:
        mask = da.load()
    for key, values in noise.coords.items():
        signal.coords[key] = values
        mask.coords[key] = values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Make observation
    signal = signal.where(mask == 1)
    noise = noise.where(mask == 1) * noise_multiplier
    obs = signal + noise
    obs.name = 'obs'
    obs.attrs = {'signal_field': signal_field, 'noise_field': noise_field,
                 'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                 'filter_bandwidth': filter_bandwidth, 'theta': theta,
                 'shift': shift}

    # Calculate noise variance
    noise_var = noise.var(dim=['y', 'x'])
    noise_var.name = 'noise_var'
    noise_var.attrs = {
        'noise_field': noise_field, 'noise_multiplier': noise_multiplier,
        'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth,
        'theta': theta, 'shift': shift
    }

    # Save observation and noise_variance
    os.makedirs(obs_dir, exist_ok=True)
    obs = xr.merge([obs, noise_var])
    obs.to_netcdf(obs_file)

    del signal
    del noise
    del mask

    # Calculate statistic
    out = get_stats(obs)
    out.attrs = {'signal_field': signal_field, 'noise_field': noise_field,
                 'noise_multiplier': noise_multiplier, 'bin_width': bin_width,
                 'filter_bandwidth': filter_bandwidth, 'theta': theta,
                 'shift': shift}

    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    out.close()

    print(
        'Finish. signal_file = {:s}. noise_file = {:s}. output_file = {:s}.'
        'Time spent {:.5f} sec.'
        .format(signal_file, noise_file, output_file,
                (datetime.now() - start_time).total_seconds())
    )