def __init__(self, lattice='square', p=0.5, calc_fields=True): super().__init__(lattice, p, method='charges') self.g = pc.get_geom(lat=lattice, porosity=p) self.length_factor = 1 cs = None if calc_fields: ds = (pc.process_lattice(lattice, layers_to_keep=1, porosity=p).pipe(pc.linear_response).pipe( pc.nl_response)) cs = pc.calc_charge_fields(ds, reference=True) self.ds = ds self.stress = xr.dot(cs.s, ds.Q0, dims='charge').values[[0, 2, 1]] self.d_lin = xr.dot(cs.d.sel(order=1), ds.Q0, dims='charge').values self.d_mode = xr.dot(cs.d.sel(order=1), ds.cmodes.isel(mode=0), dims='charge') # calculate boundary edges, nodes = pc.all_edges(self.g) inds = np.unique(np.concatenate([edges.i1, edges.i2])) self.boundary = inds self.holes = ds.holes.values.T[:, 1:] # shape = (2, N_holes) if cs is not None: self.real_nodes = cs.reference.values else: self.real_nodes = self.g.p self.calc_tri()
def CGGM_neg_log_likelihood(Lambda, Sigma, Theta, Syy, Sxx, Sxy, n_xr): # determinant of the covariance # normalised by n_k/n, our renormalisation convention determinants = np.sum( -0.5 * n_xr * np.log(np.linalg.det(Lambda))) / n_xr.sum() # dotproduct in the exponent # the Sxx... are already renormalised by 1/n dotproduct_yy = xr.dot(Lambda, Syy, dims=["component_y", "component_yT"]) dotproduct_xy = 2 * xr.dot(Theta, Sxy, dims=["component_y", "component_x"]) dotproduct_xx = xr.dot(xr.dot(Theta, xr.dot(Sigma, Theta, dims=["component_y"]).rename({ "component_yT": "component_y", "component_x": "component_xT" }), dims=["component_y"]), Sxx, dims=["component_x", "component_xT"]) dotproducts = 0.5 * (dotproduct_yy + dotproduct_xx + dotproduct_xy).sum(dim=["label"]) return float(determinants + dotproducts) # + 0.5*np.log(2*np.pi)*len(Lambda.component_y)
def by_label_log_likelihood(X, Y, Lambda, Sigma, Theta): # products of all the components together (no sum over the observations yet) # such that: Sxx = sum_{observations} xx / n_total xx = X * X.rename({"component_x": "component_xT"}) yy = Y * Y.rename({"component_y": "component_yT"}) xy = X * Y # determinant of the covariance determinants = xr.DataArray(data=-0.5 * np.log(np.linalg.det(Lambda)), coords=[Lambda.label.values], dims=["label"]) # dotproduct in the exponent dotproduct_yy = xr.dot(Lambda, yy, dims=["component_y", "component_yT"]) dotproduct_xy = 2 * xr.dot(Theta, xy, dims=["component_y", "component_x"]) dotproduct_xx = xr.dot(xr.dot(Theta, xr.dot(Sigma, Theta, dims=["component_y"]).rename({ "component_yT": "component_y", "component_x": "component_xT" }), dims=["component_y"]), xx, dims=["component_x", "component_xT"]) dotproducts = 0.5 * (dotproduct_yy + dotproduct_xx + dotproduct_xy) return -1 * (determinants + dotproducts ) # - 0.5*np.log(2*np.pi)*len(Lambda.component_y)
def ProjectCorrs(self): if not hasattr(self, 'left_evec') or not hasattr(self, 'right_evec'): warn( 'PerformVarMeth need to be called to project, attempting now with default t0=' + str(self.t0) + ' and dt=' + str(self.dt)) self.PerformVarMeth() self.proj_corr = xr.dot(self.left_evec, self.matrix_corr, dims='ism') self.proj_corr = xr.dot(self.right_evec, self.proj_corr, dims='jsm') if hasattr(self, 'booted_mcorr') and self.booted_mcorr is not None: self.booted_proj_corr = self.proj_corr.copy() self.booted_proj_corr = xr.apply_ufunc( lambda x: BootStrap(thisnboot=self.nboot), self.booted_proj_corr, vectorize=True) for iboot in range(self.nboot): this_boot_mcorr = xr.apply_ufunc(lambda x: x.bootvals[iboot], self.booted_mcorr, vectorize=True) this_booted_proj_corr = xr.dot(self.left_evec, this_boot_mcorr, dims='ism') this_booted_proj_corr = xr.dot(self.right_evec, self.proj_corr, dims='jsm') def this_fun(itemp, boot_val): itemp.bootvals = itemp.bootvals.append( pa.Series([boot_val])) return itemp self.booted_proj_corr = xr.apply_ufunc(this_fun, self.booted_proj_corr, this_booted_proj_corr, vectorize=True)
def gaussian_log_likelihood(Y, mu, Lambda): # Y size: n x p # mu size: K x p # Lambda size: K x p x p # returns the log of the gaussian density of each observation in x # for each values of the parameter across all clases # output size: n x K K = len(mu.label) p = len(mu.component_y) n = len(Y.observation_id) # (x-mu)^T Lambda (x-mu) dotproduct = xr.DataArray(xr.dot(Lambda, Y - mu, dims=['component_y']), coords=[range(K), range(p), range(n)], dims=['label', 'component_y', 'observation_id']) dotproduct_final = xr.dot(Y - mu, dotproduct, dims=['component_y']) # det(Lambda) determinants = xr.DataArray([np.linalg.det(Lambda[k]) for k in range(K)], coords=[range(K)], dims=['label']) #Sigma_da.reduce(lambda x, axis: np.linalg.det(x), dim = 'label') # numerator of the Bayes rule to get p_i,k^t log_p = 0.5 * (-dotproduct_final + np.log(determinants) - p * np.log(2 * np.pi)) return log_p
def Q( # observations Y, X, # new parameter values pi, Lambda, Sigma, Theta, # current parameter values pi_t, Lambda_t, Sigma_t, Theta_t): # label weights (E step) p_t = label_weights_E_step( # observations Y, X, # current parameter values pi_t, Lambda_t, Sigma_t, Theta_t) # Exhaustive statistics weighted by the E step # careful here: remember we divide by n_total (p_t.sum()) not by n_k (p_t.sum(dim=["observation_id"])) # this is just by convention, so that the penalty intensity does not have to scale with n, it's done everywhere Sxx_unsupervised = xr.dot(p_t * X, X.rename({"component_x": "component_xT"}), dims="observation_id") / p_t.sum() Syy_unsupervised = xr.dot(p_t * Y, Y.rename({"component_y": "component_yT"}), dims="observation_id") / p_t.sum() Sxy_unsupervised = xr.dot(p_t * X, Y, dims="observation_id") / p_t.sum() # get the log likelihood by class w/ param theta # log P_theta(x_i, z_i = k) = log P_theta(x_i | z_i = k) + log(pi_k) log_likelihood = -1. * CGGM_neg_log_likelihood( Lambda, Sigma, Theta, Syy_unsupervised, Sxx_unsupervised, Sxy_unsupervised, n_xr=p_t.sum(dim=["observation_id"])) # the result is divided by n_total (p_t.sum() ) return np.float(log_likelihood + (np.log(pi) * p_t.sum(dim=["observation_id"])).sum() / p_t.sum())
def NSE_diff(pred, obs): """Calculate the Nash-sutcliffe efficiency for models trained on differences. Denominator: variance of predicting the mean value of differences. This variation of the NSE takes into account that the most appropriate baseline forecast is persistence when predicting differences. TODO: this function assumes pred.forecast_day.values is scalar, what if not? Parameters ---------- pred : xr.DataArray the prediction array obs : xr.DataArray the true values to compare with Returns ------- (float, float) 1) Variance reduction due compared to persistence forecasts 2) NSE of persistence forecasts (is a variance reduction too) """ inits = pred.init_time.values # for subtracting observations, the time coordinate has to be its valid time pred = pred.swap_dims({'init_time': 'time'}) diff = pred - obs # now we will iterate over the init_time, so this shall be our index diff = diff.swap_dims({'time': 'init_time'}) pred = pred.swap_dims({'time': 'init_time'}) err = err_persistence = err_mean_dis = 0 for init in inits: valid_time = pred.sel(init_time=init).time # Variance Reduction compared to persistence forecasts d = diff.sel(init_time=init) # is scalar if using one fcstday! err += float(xr.dot(d, d)) persistence = obs.sel(time=init) d = persistence - obs.sel(time=valid_time) err_persistence += float(xr.dot(d, d)) # NSE of persistence forecasts d = obs.mean() - obs.sel(time=valid_time) err_mean_dis += float(xr.dot(d, d)) return float(1 - err / err_persistence), float(1 - err_persistence / err_mean_dis)
def pass_forward(self, inputs, past_inputs): i, p = self.layer_prev.pass_forward(inputs, past_inputs) pre_activation = xr.dot(i, self.m_weights, dims=([*self.layer_prev.coords]).add(self.m_biases) activation = self.func_activation(pre_activation) return activation, p.append({KEY_IN: i, KEY_OUT_PRE: pre_activation}) def pass_back(self, gradient, past_inputs, past_outputs): grad, inputs, p = self.layer_next.pass_back(gradient, past_inputs, past_outputs) i = inputs.pop() grad_b = np.multiply(grad, self.func_activation_d(i[KEY_OUT_PRE])) grad_w = np.multiply(grad_b, i[KEY_IN]) grad_next = xr.dot(grad_b, self.m_weights, dims=[*coords]) return grad_next, inputs, p.append({KEY_B: grad_b, KEY_W: grad_w}) class ConvolutionLayer(Layer): def pass_forward(self, inputs, past_inputs): pass #TODO def pass_back(self, gradient, past_inputs, past_outputs): pass #TODO def test(): NUM_CASES = 5 NUM_INPUTS = 10 NUM_OUTPUTS = 2 DIM_IN = 'inputs' DIM_OUT = 'neurons' DIM_LABEL = 'labels' DIM_CASE = 'cases' layer0 = InputLayer({DIM_IN: NUM_INPUTS}) layer1 = FullyConnectedLayer(layer0, {DIM_OUT: NUM_OUTPUTS}) layer2 = OutputLayer(layer1) input_coords = {DIM_CASE: np.arange(NUM_CASES), DIM_IN: np.arange(NUM_INPUTS)} inputs = xr.DataArray(np.ones((NUM_CASES,NUM_INPUTS)), dims=[*input_coords], coords=input_coords) label_coords = {DIM_CASE: np.arange(NUM_CASES), KEY_LABELS: np.arange(NUM_OUTPUTS)} labels = xr.DataArray(np.ones((NUM_CASES, NUM_OUTPUTS)), dims=[*label_coords], coords=label_coords) #TODO out, outputs = layer2.pass_forward(inputs) gradients = layer0.pass_back(labels, outputs) if __name__ == '__main__': test()
def hierarchical_ggm(Y, true_labels, rho, l1, l2, Lambda_shift_threshold, loss_shift_threshold, verbose=False): p = len(Y.component_y) K = len(true_labels.label) # get number of observations by class N = true_labels.sum(dim="observation_id") # get empirical mu mu = (Y * true_labels).sum(dim="observation_id") / N # get empirical sigma (S) centered_observations = Y - mu centered_observations_T = xr.DataArray( data=centered_observations, coords=[centered_observations.observation_id, range(p), range(K)], dims=["observation_id", "component_yT", "label"]) S = xr.dot(true_labels * centered_observations, centered_observations_T, dims=['observation_id']) / N # penalised optimisation, GGL Lambda = xr.DataArray([np.eye(p)] * K, coords=S.coords, dims=S.dims) Z = xr.DataArray(np.zeros((K, p, p)), coords=S.coords, dims=S.dims) U = xr.DataArray(np.zeros((K, p, p)), coords=S.coords, dims=S.dims) Lambda_shift = 1 loss_shift = 1 # while loop with convergence check while ((Lambda_shift > Lambda_shift_threshold or loss_shift > loss_shift_threshold)): #and loss_shift > 0): # if penalty == "GGL" loss = GGL_loss(Lambda, S, N, l1, l2) if verbose: print(loss) # one ADMM update #if penalty == "GGL": Lambda_prime, Z, U = update_admm_GGL(S, N, Lambda, Z, U, rho, l1, l2) loss_prime = GGL_loss(Lambda_prime, S, N, l1, l2) loss_shift = (loss - loss_prime) / np.abs(loss) Lambda_shift = np.float( np.sum(np.abs(Lambda_prime - Lambda)) / np.sum(np.abs(Lambda))) if verbose: print(Lambda_shift) print(loss_shift) print() Lambda = Lambda_prime loss = loss_prime return mu, Lambda
def _compute_wave_coefficient_vector(self): """ Build the wave coefficient vector `r` """ r = xr.dot(self.A_inv, self.b).rename({"_hpoly": "hpoly"}) r.name = "wave_coefficient_vector" return r
def get_D_KL_from_xarray(da_P_X_Y, da_P_X, da_P_Y): """ base 10 : Mutual information of I_matrix = xr.apply_ufunc(func_D_KL, P_X_Y, P_X, P_Y) return I_matrix.sum() """ da_log2 = xr.zeros_like(da_P_X_Y) import itertools str_dim_x = da_P_X.dims[0] str_dim_y = da_P_Y.dims[0] for realiz_id_x, realiz_id_y in itertools.product( da_P_X_Y[str_dim_x].values, da_P_X_Y[str_dim_y].values): p_xy = da_P_X_Y.loc[{str_dim_x: realiz_id_x, str_dim_y: realiz_id_y}] p_x = da_P_X.loc[{str_dim_x: realiz_id_x}] p_y = da_P_Y.loc[{str_dim_y: realiz_id_y}] log_p_xy_over_p_x_p_y = ufunc_log_pxy_over_px_py(p_xy, p_x, p_y) da_log2.loc[{ str_dim_x: realiz_id_x, str_dim_y: realiz_id_y }] = log_p_xy_over_p_x_p_y # da_log2.loc[{str_dim_x:realiz_id_x, str_dim_y:realiz_id_y}] = # print("da_log2: ", da_log2) # print("da_P_X_Y: ", da_P_X_Y) mutual_information = xr.dot(da_P_X_Y, da_log2) print("mutual_information (", str_dim_x, ", ", str_dim_y, "): ", mutual_information.values) return mutual_information
def sea_ice_area(sic: xarray.DataArray, area: xarray.DataArray, thresh: str = "15 pct"): """Total sea ice area. Sea ice area measures the total sea ice covered area where sea ice concentration is above a threshold, usually set to 15%. Parameters ---------- sic : xarray.DataArray Sea ice concentration [0,1]. area : xarray.DataArray Grid cell area [m²] thresh : str Minimum sea ice concentration for a grid cell to contribute to the sea ice extent. Returns ------- Sea ice area [m²]. Notes ----- To compute sea ice area over a subregion, first mask or subset the input sea ice concentration data. References ---------- `What is the difference between sea ice area and extent <https://nsidc.org/arcticseaicenews/faq/#area_extent>`_ """ t = convert_units_to(thresh, sic) factor = convert_units_to("100 pct", sic) out = xarray.dot(sic.where(sic >= t, 0), area) / factor out.attrs["units"] = area.units return out
def sea_ice_extent(sic, area, thresh="15 pct"): """Return the total sea ice extent. Sea ice extent measures the *ice-covered* area, where a region is considered ice-covered if its sea ice concentration is above a threshold usually set to 15%. Parameters ---------- sic : xarray.DataArray Sea ice concentration [0,1]. area : xarray.DataArray Grid cell area [m²] thresh : str Minimum sea ice concentration for a grid cell to contribute to the sea ice extent. Returns ------- Sea ice extent [m²]. Notes ----- To compute sea ice area over a subregion, first mask or subset the input sea ice concentration data. References ---------- `What is the difference between sea ice area and extent <https://nsidc.org/arcticseaicenews/faq/#area_extent>`_ """ t = utils.convert_units_to(thresh, sic) out = xarray.dot(sic >= t, area) out.attrs["units"] = area.units return out
def micro(self): weights_micro = xr.apply_ufunc( get_micro_ensemble, self._obj.e_vals.groupby(self.outer_dim), self.E_0.groupby(self.outer_dim), kwargs={"delta_E": self.delta_E}, ) return xr.dot(weights_micro, self._obj.eev, dims=self.state_dim)
def canonical(self): beta = self.beta weights_canonical = xr.apply_ufunc( get_weights_canonical, self._obj.e_vals.groupby(self.outer_dim), beta.groupby(self.outer_dim), ) return xr.dot(weights_canonical, self._obj.eev, dims=self.state_dim)
def covariance(x, y, dim=None): valid_values = x.notnull() & y.notnull() valid_count = valid_values.sum(dim) demeaned_x = (x - x.mean(dim)).fillna(0) demeaned_y = (y - y.mean(dim)).fillna(0) return xr.dot(demeaned_x, demeaned_y, dims=dim) / valid_count
def construct_array_relative_beamvector(maintx: xr.DataArray, mainrx: xr.DataArray, tx_angle: xr.DataArray, rx_angle: xr.DataArray): """ Given the orientation vectors representing the transmitter/receiver at time of ping/receive (maintx, mainrx) and the TX/RX steering angles (tx_angle, rx_angle), determine new 3d beam vector components at the midpoint between the TX and RX. This would be the 'actual' array relative beam vector. This is a simplification of the actual scenario, adding error in the xyz due to the difference in path length/ direction of the actual ray from tx-seafloor and seafloor-rx and this co-located assumption (tx-seafloor and rx-seafloor are the same is the assumption) x = +FORWARD, y=+STARBOARD, z=+DOWN Returns: 3d beam vector in co-located array ref frame. Of shape (xyz, time, beam), with 10 times and 200 beams, beamvecs shape would be (3, 10, 200) | <xarray.DataArray 'tiltangle' (xyz: 3, time: 10, beam: 200)> | dask.array<concatenate, shape=(3, 10, 200), dtype=float64, chunksize=(1, 10, 200), chunktype=numpy.ndarray> | Coordinates: | * time (time) float64 1.496e+09 1.496e+09 ... | * beam (beam) int32 0 1 2 3 4 5 6 7 8 ... 194 195 196 197 198 199 200 | * xyz (xyz) object 'x' 'y' 'z' Parameters ---------- maintx orientation vector for transmitter at time of transmit, 2dim of shape (time, xyz) mainrx orientation vector for receiver at time of receive, 2dim of shape (time, xyz) tx_angle transmitter tiltangle for each ping time rx_angle receiver beam pointing angle for each ping time Returns ------- xr.DataArray 3d beam vector in co-located array ref frame """ # delta - alignment angle between tx/rx vecs delt = np.arccos(xr.dot(maintx, mainrx, dims=['xyz'])) - np.pi / 2 ysub1 = -np.sin(rx_angle) # solve for components of 3d beam vector ysub1 = ysub1 / np.cos(delt) ysub2 = np.sin(tx_angle) * np.tan(delt) radial = np.sqrt((ysub1 + ysub2) ** 2 + np.sin(tx_angle) ** 2) x = np.sin(tx_angle) y = ysub1 + ysub2 z = np.sqrt(1 - radial ** 2) # generate new dataarray object for beam vectors newx, _ = xr.broadcast(x, y) # broadcast to duplicate x along beam dimension beamvecs = xr.concat([newx, y, z], pd.Index(list('xyz'), name='xyz')) return beamvecs
def _xcentral_moments( x, mom, w=None, axis=0, last=True, mom_dims=None, ): assert isinstance(x, xr.DataArray) if isinstance(mom, tuple): mom = mom[0] if mom_dims is None: mom_dims = ("mom_0", ) if isinstance(mom_dims, str): mom_dims = (mom_dims, ) assert len(mom_dims) == 1 if w is None: w = xr.ones_like(x) else: w = xr.DataArray(w).broadcast_like(x) if isinstance(axis, int): dim = x.dims[axis] else: dim = axis wsum = w.sum(dim=dim) wsum_inv = 1.0 / wsum xave = xr.dot(w, x, dims=dim) * wsum_inv p = xr.DataArray(np.arange(0, mom + 1), dims=mom_dims) dx = (x - xave)**p out = xr.dot(w, dx, dims=dim) * wsum_inv out.loc[{mom_dims[0]: 0}] = wsum out.loc[{mom_dims[0]: 1}] = xave if last: out = out.transpose(..., *mom_dims) return out
def ProjectCorrsProny(self, ism='First'): if not hasattr(self, 'pro_evec'): warn( 'PerformProny need to be called to project, attempting now with default t0=' + str(self.t0) + ', dt=' + str(self.dt) + ' and ism=' + str(ism)) self.PerformProny() self.pro_proj_corr = xr.dot(self.pro_evec, self.matrix_corr, dims='jsm') if ism == 'First': self.pro_proj_corr = self.pro_proj_corr.isel(ism=0) else: self.pro_proj_corr = self.pro_proj_corr.sel(ism=ism) if hasattr(self, 'booted_mcorr') and self.booted_mcorr is not None: self.booted_pro_proj_corr = self.pro_proj_corr.copy() self.booted_pro_proj_corr = xr.apply_ufunc( lambda x: BootStrap(thisnboot=self.nboot), self.booted_pro_proj_corr, vectorize=True) for iboot in range(self.nboot): this_boot_mcorr = xr.apply_ufunc(lambda x: x.bootvals[iboot], self.booted_mcorr, vectorize=True) # this_booted_proj_corr = xr.dot(self.left_evec,this_boot_mcorr,dims='ism') if ism == 'First': this_booted_proj_corr = this_boot_mcorr.isel(ism=0) else: this_booted_proj_corr = this_boot_mcorr.sel(ism=ism) this_booted_proj_corr = xr.dot(self.pro_evec, this_booted_proj_corr, dims='jsm') def this_fun(itemp, boot_val): itemp.bootvals = itemp.bootvals.append( pa.Series([boot_val])) return itemp self.booted_pro_proj_corr = xr.apply_ufunc( this_fun, self.booted_pro_proj_corr, this_booted_proj_corr, vectorize=True)
def reduced_Q(Lambda, S, N_t): # simply log det - trace, no 2pi, no pi_t K = len(Lambda.label) # det(Lambda) determinants = xr.DataArray([np.linalg.det(Lambda[k]) for k in range(K)], coords=[range(K)], dims=['label']) # trace( Lambda.S) traces = xr.dot(Lambda, S, dims=["component_y", "component_yT"]) # reduced Q function (part depending of Lambda only) return np.float(np.sum(0.5 * (np.log(determinants) - traces) * N_t))
def pass_forward(self, inputs, func_normalize=lambda x: x): """Applies NeuralNet to inputs Arguments: inputs {xarray[dims: DIM_IN]} -- xarray with dimension DIM_IN, same size as DIM_IN in self.matrices[mkey(0, KEY_WEIGHT)] Keyword Arguments: func_normalize {function(np_array): np_array} -- function to apply to inputs before passing through neural network (default: {lambdax:x}) Raises: ValueError -- Missing dimension DIM_IN in inputs ValueError -- Size of inputs dimension DIM_IN does not match layer 0 size Returns: dict(xarray[dims: KEY_OUT_PRE or KEY_OUT_POST]) -- Dictionary containing (num_layers + 1) xarrays of intermediate layer outputs, with 2 matrices: KEY_OUT_PRE (without activation function applied) and KEY_OUT_POST (with activation function applied) """ if not DIM_IN in inputs.dims: raise ValueError('Missing dimension \'' + DIM_IN + '\' in inputs') tsize = inputs.sizes[DIM_IN] msize = self.matrices[mkey(0, KEY_WEIGHT)].sizes[DIM_IN] if tsize != msize: raise ValueError('Size of \'' + DIM_IN + '\'=' + str(tsize) + ' does not match layer 0 size: ' + str(msize)) # ugly hack: remove coordinates for dimension 'inputs' if coordinates present if DIM_IN in inputs.coords: inputs = inputs.reset_index(DIM_IN, drop=True) activations = { mkey(0, KEY_OUT_PRE): inputs, mkey(0, KEY_OUT_POST): func_normalize(inputs) } for i in range(self.num_layers): pre_activation = np.add( xr.dot(activations[mkey(i, KEY_OUT_POST)], self.matrices[mkey(i, KEY_WEIGHT)], dims=(DIM_IN)), self.matrices[mkey(i, KEY_BIAS)]) activations[mkey(i + 1, KEY_OUT_PRE)] = pre_activation.rename( {DIM_OUT: DIM_IN}) activations[mkey( i + 1, KEY_OUT_POST)] = self.func_activation(pre_activation).rename( {DIM_OUT: DIM_IN}) return activations
def test_dot_align_coords(use_dask): # GH 3694 if use_dask: if not has_dask: pytest.skip("test for dask.") a = np.arange(30 * 4).reshape(30, 4) b = np.arange(30 * 4 * 5).reshape(30, 4, 5) # use partially overlapping coords coords_a = {"a": np.arange(30), "b": np.arange(4)} coords_b = {"a": np.arange(5, 35), "b": np.arange(1, 5)} da_a = xr.DataArray(a, dims=["a", "b"], coords=coords_a) da_b = xr.DataArray(b, dims=["a", "b", "c"], coords=coords_b) if use_dask: da_a = da_a.chunk({"a": 3}) da_b = da_b.chunk({"a": 3}) # join="inner" is the default actual = xr.dot(da_a, da_b) # `dot` sums over the common dimensions of the arguments expected = (da_a * da_b).sum(["a", "b"]) xr.testing.assert_allclose(expected, actual) actual = xr.dot(da_a, da_b, dims=...) expected = (da_a * da_b).sum() xr.testing.assert_allclose(expected, actual) with xr.set_options(arithmetic_join="exact"): with raises_regex(ValueError, "indexes along dimension"): xr.dot(da_a, da_b) # NOTE: dot always uses `join="inner"` because `(a * b).sum()` yields the same for all # join method (except "exact") with xr.set_options(arithmetic_join="left"): actual = xr.dot(da_a, da_b) expected = (da_a * da_b).sum(["a", "b"]) xr.testing.assert_allclose(expected, actual) with xr.set_options(arithmetic_join="right"): actual = xr.dot(da_a, da_b) expected = (da_a * da_b).sum(["a", "b"]) xr.testing.assert_allclose(expected, actual) with xr.set_options(arithmetic_join="outer"): actual = xr.dot(da_a, da_b) expected = (da_a * da_b).sum(["a", "b"]) xr.testing.assert_allclose(expected, actual)
def aggregate_clustersum(ds, cluster, clusterdim): """Aggregate a 3-dimensional array over certain points (latitude, longitude). Parameters ---------- ds : xr.Dataset the array to aggregate (collapse) spatially cluster : xr.DataArray 3-dimensional array (clusterdim, latitude, longitude), `clusterdim` contains the True/False mask of points to aggregate over e.g. len(clusterdim)=4 means you have 4 clusters clusterdim : str dimension name to access the different True/False masks Returns ------- xr.DataArray 1-dimensional """ out = xr.Dataset() # enforce same coordinates interp = True if (len(ds.latitude.values) == len(cluster.latitude.values) and len(ds.longitude.values) == len(cluster.longitude.values)): if (np.allclose(ds.latitude.values, cluster.latitude.values) and np.allclose(ds.longitude.values, cluster.longitude.values)): interp = False if interp: ds = ds.interp(latitude=cluster.latitude, longitude=cluster.longitude) area_per_gridpoint = calc_area(ds.isel(time=0)) if isinstance(ds, xr.DataArray): ds = ds.to_dataset() for var in ds: for cl in cluster.coords[clusterdim]: newname = var + '_cluster' + str(cl.values) this_cluster = cluster.sel({clusterdim: cl}) da = ds[var].where(this_cluster, 0.) # no contribution from outside cluster out[newname] = xr.dot(da, area_per_gridpoint) return out.drop(clusterdim)
def project_onto_eof(field, eofs, sensor_dims, weight=None): """Project a field onto a set of provided EOFs to generate a corresponding set of pseudo-PCs Parameters ---------- field : xarray DataArray Array containing the data to project onto the EOFs eofs : xarray DataArray Array contain set of EOFs to project onto. sensor_dims : str, optional EOFs sensor dimension. weight : str or xarray DataArray of Dataset Weighting to apply prior to projection. This should match the weighting used to calculate the eofs (see xeof.eof) Returns ------- projection : xarray DataArray Array containing the pseudo-PCs Examples -------- >>> A = xr.DataArray(np.random.normal(size=(6,4,40)), ... coords=[('lat', np.arange(-75,76,30)), ('lon', np.arange(45,316,90)), ... ('time', pd.date_range('2000-01-01', periods=40, freq='M'))]) >>> eofs = xeof.eof(A, sensor_dims=['lat','lon']) >>> project_onto_eof(A, eofs['eof'], sensor_dims=['lat','lon']) <xarray.DataArray (mode: 20, time: 40)> array([[ ... ]]) Coordinates: * mode (mode) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 * time (time) datetime64[ns] 2000-01-31 2000-02-29 ... 2003-04-30 """ if not isinstance(field, xr.DataArray): raise ValueError("field must be a xarray DataArray") if not isinstance(eofs, xr.DataArray): raise ValueError("eofs must be a xarray DataArray") field_weighted = _apply_weights(field, weight) return xr.dot(eofs, field_weighted, dims=sensor_dims)
def reconstructed_fields(self, mode=slice(1, None)): '''Reconstruct original input fields based on specified `mode`s. Parameters ---------- mode : int, slice Modes to be considered for reconstructing the original fields. The default is `slice(1, None)` which returns the original fields based on *all* modes. Returns ------- dict[DataArray, DataArray] Left and right reconstructed fields. ''' eofs = self.eofs(scaling=None) pcs = self.pcs(scaling='eigen') coords = self._field_coords std = self._field_stds mean = self._field_means rec_fields = {} for key in self._fields.keys(): eofs[key] = eofs[key].sel(mode=mode) pcs[key] = pcs[key].sel(mode=mode) rec_fields[key] = xr.dot(pcs[key], eofs[key].conjugate(), dims=['mode']) rec_fields[key] = rec_fields[key].real if self._analysis['is_coslat_corrected']: coslat = np.cos(np.deg2rad(coords[key]['lat'])) rec_fields[key] /= np.sqrt(coslat) if self._analysis['is_normalized']: rec_fields[key] *= std[key] # add mean fields rec_fields[key] += mean[key] return rec_fields
def pass_back( self, activations, goal_label, func_loss_d=lambda output_v, goal_v: np.subtract(goal_v, output_v)): """Backpropagates activations to get gradients Arguments: activations {dict(xarray[dims: KEY_OUT_PRE or KEY_OUT_POST])} -- dict which is the return value of self.pass_forward() goal_label {xarray[dims: DIM_LABEL]} -- array of onehot vectors encoded along dim=DIM_LABEL Keyword Arguments: func_loss_d {function(xarray, xarray)} -- derivative of loss function, returns gradients of dim=DIM_OUT same size as final layer outputs (default: {lambda output_v, goal_v: np.subtract(goal_v, output_v)}) Returns: dict(xarray[dims: DIM_IN, DIM_OUT]) -- dict of gradients, containing xarrays in same format as self.matrices """ gradients = {} partial_d = func_loss_d( activations[mkey(self.num_layers, KEY_OUT_POST)], goal_label.rename({DIM_LABEL: DIM_IN})) for i in reversed(range(self.num_layers)): partial_d = np.multiply( partial_d, self.func_activation_d(activations[mkey( i + 1, KEY_OUT_PRE)])).rename({DIM_IN: DIM_OUT}) # times 1, the bias's derivative gradients[mkey(i, KEY_BIAS)] = partial_d gradients[mkey(i, KEY_WEIGHT)] = np.multiply( partial_d, activations[mkey(i, KEY_OUT_POST)]) # times input partial_d = xr.dot(partial_d, self.matrices[mkey(i, KEY_WEIGHT)], dims=(DIM_OUT)) return gradients
def build_geographic_beam_vectors(rotgeo: xr.DataArray, beamvecs: xr.DataArray): """ Apply rotation matrix to bring transducer rel. beam vectors to geographic ref frame Parameters ---------- rotgeo rotation matrices at each time/beam, of shape (beam, rot_i, time, xyz), see return_array_geographic_rotation beamvecs 3d beam vector in co-located array ref frame (xyz, time, beam), see construct_array_relative_beamvector Returns ------- xr.DataArray beam vectors in geographic ref frame, of shape (time, beam, bv_xyz) """ bv_geo = xr.dot(rotgeo, beamvecs, dims='xyz') bv_geo = bv_geo.rename({'rot_i': 'bv_xyz'}) bv_geo.coords['bv_xyz'] = ['x', 'y', 'z'] bv_geo = bv_geo.transpose('time', 'beam', 'bv_xyz') return bv_geo
def predict_xr(result_ds, regressors): """input: results_ds as came out of MLR and saved to file, regressors dataset""" # if produce_RI isn't called on data then you should explicitely put time info import xarray as xr import aux_functions_strat as aux rds = result_ds regressors = regressors.sel(time=rds.time) # slice regressors = regressors.apply(aux.normalize_xr, norm=1, verbose=False) # normalize reg_dict = dict(zip(rds.regressors.values, regressors.data_vars.values())) # make sure that all the regressors names are linking to their respective dataarrays for key, value in reg_dict.items(): # print(key, value) assert value.name == key reg_da = xr.concat(reg_dict.values(), dim='regressors') reg_da['regressors'] = list(reg_dict.keys()) reg_da.name = 'regressors_time_series' rds['predicted'] = xr.dot(rds.params, reg_da) + rds.intercept rds = aux.xr_order(rds) # retures the same dataset but with total predicted reconstructed geo-time-series field result_ds = rds return result_ds
def RMSE_persistence(pred, obs): """Calculate the RMSE for persistence forecasts. Parameters ---------- pred : xr.DataArray the prediction array, from which the timestamps are taken to verify the persistence forecast on obs : xr.DataArray the true values to compare with TODO: now assuming pred.forecast_day.values is scalar """ inits = pred.init_time.values err = np.zeros(len(inits)) for i, init in enumerate(inits): valid_time = pred.sel(init_time=init).time persistence = obs.sel(time=init) d = persistence - obs.sel(time=valid_time) err[i] = float(xr.dot(d, d)) return np.sqrt(np.mean(err))
def reconstructed_fields(self, mode=slice(1,None)): eofs = self.eofs(scaling=None) pcs = self.pcs(scaling='eigen') coords = self._field_coords std = self._field_stds mean = self._field_means rec_fields = {} for key in self._fields.keys(): eofs[key] = eofs[key].sel(mode=mode) pcs[key] = pcs[key].sel(mode=mode) rec_fields[key] = xr.dot(pcs[key],eofs[key].conjugate(),dims=['mode']) rec_fields[key] = rec_fields[key].real if self._analysis['is_coslat_corrected']: rec_fields[key] /= np.sqrt(np.cos(np.deg2rad(coords[key]['lat']))) if self._analysis['is_normalized']: rec_fields[key] *= std[key] # add mean fields rec_fields[key] += mean[key] return rec_fields
def test_dot(use_dask): if use_dask: if not has_dask: pytest.skip('test for dask.') a = np.arange(30 * 4).reshape(30, 4) b = np.arange(30 * 4 * 5).reshape(30, 4, 5) c = np.arange(5 * 60).reshape(5, 60) da_a = xr.DataArray(a, dims=['a', 'b'], coords={'a': np.linspace(0, 1, 30)}) da_b = xr.DataArray(b, dims=['a', 'b', 'c'], coords={'a': np.linspace(0, 1, 30)}) da_c = xr.DataArray(c, dims=['c', 'e']) if use_dask: da_a = da_a.chunk({'a': 3}) da_b = da_b.chunk({'a': 3}) da_c = da_c.chunk({'c': 3}) actual = xr.dot(da_a, da_b, dims=['a', 'b']) assert actual.dims == ('c', ) assert (actual.data == np.einsum('ij,ijk->k', a, b)).all() assert isinstance(actual.variable.data, type(da_a.variable.data)) actual = xr.dot(da_a, da_b) assert actual.dims == ('c', ) assert (actual.data == np.einsum('ij,ijk->k', a, b)).all() assert isinstance(actual.variable.data, type(da_a.variable.data)) if use_dask: import dask if LooseVersion(dask.__version__) < LooseVersion('0.17.3'): pytest.skip("needs dask.array.einsum") # for only a single array is passed without dims argument, just return # as is actual = xr.dot(da_a) assert da_a.identical(actual) # test for variable actual = xr.dot(da_a.variable, da_b.variable) assert actual.dims == ('c', ) assert (actual.data == np.einsum('ij,ijk->k', a, b)).all() assert isinstance(actual.data, type(da_a.variable.data)) if use_dask: da_a = da_a.chunk({'a': 3}) da_b = da_b.chunk({'a': 3}) actual = xr.dot(da_a, da_b, dims=['b']) assert actual.dims == ('a', 'c') assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all() assert isinstance(actual.variable.data, type(da_a.variable.data)) actual = xr.dot(da_a, da_b, dims=['b']) assert actual.dims == ('a', 'c') assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all() actual = xr.dot(da_a, da_b, dims='b') assert actual.dims == ('a', 'c') assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all() actual = xr.dot(da_a, da_b, dims='a') assert actual.dims == ('b', 'c') assert (actual.data == np.einsum('ij,ijk->jk', a, b)).all() actual = xr.dot(da_a, da_b, dims='c') assert actual.dims == ('a', 'b') assert (actual.data == np.einsum('ij,ijk->ij', a, b)).all() actual = xr.dot(da_a, da_b, da_c, dims=['a', 'b']) assert actual.dims == ('c', 'e') assert (actual.data == np.einsum('ij,ijk,kl->kl ', a, b, c)).all() # should work with tuple actual = xr.dot(da_a, da_b, dims=('c', )) assert actual.dims == ('a', 'b') assert (actual.data == np.einsum('ij,ijk->ij', a, b)).all() # default dims actual = xr.dot(da_a, da_b, da_c) assert actual.dims == ('e', ) assert (actual.data == np.einsum('ij,ijk,kl->l ', a, b, c)).all() # 1 array summation actual = xr.dot(da_a, dims='a') assert actual.dims == ('b', ) assert (actual.data == np.einsum('ij->j ', a)).all() # empty dim actual = xr.dot(da_a.sel(a=[]), da_a.sel(a=[]), dims='a') assert actual.dims == ('b', ) assert (actual.data == np.zeros(actual.shape)).all() # Invalid cases if not use_dask or LooseVersion(dask.__version__) > LooseVersion('0.17.4'): with pytest.raises(TypeError): xr.dot(da_a, dims='a', invalid=None) with pytest.raises(TypeError): xr.dot(da_a.to_dataset(name='da'), dims='a') with pytest.raises(TypeError): xr.dot(dims='a') # einsum parameters actual = xr.dot(da_a, da_b, dims=['b'], order='C') assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all() assert actual.values.flags['C_CONTIGUOUS'] assert not actual.values.flags['F_CONTIGUOUS'] actual = xr.dot(da_a, da_b, dims=['b'], order='F') assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all() # dask converts Fortran arrays to C order when merging the final array if not use_dask: assert not actual.values.flags['C_CONTIGUOUS'] assert actual.values.flags['F_CONTIGUOUS'] # einsum has a constant string as of the first parameter, which makes # it hard to pass to xarray.apply_ufunc. # make sure dot() uses functools.partial(einsum, subscripts), which # can be pickled, and not a lambda, which can't. pickle.loads(pickle.dumps(xr.dot(da_a)))