def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987]) # x = ma.fix_invalid([ 0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan ]) y = ma.fix_invalid([ 0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0 ]) result = mstats.kendalltau(x, y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) # test for namedtuple attributes res = mstats.kendalltau(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def test_spearmanr(self): # Tests some computations of Spearman's rho (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95]) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) (x, y) = ([5.05, 6.75, 3.21, 2.66, np.nan], [1.65, 2.64, 2.64, 6.95, np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7 ] y = [ 22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4 ] assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299) x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan ] y = [ 22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan ] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299) # test for namedtuple attributes res = mstats.spearmanr(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def test_spearmanr(self): "Tests some computations of Spearman's rho" (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95]) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) (x, y) = ([5.05, 6.75, 3.21, 2.66, np.nan], [1.65, 2.64, 2.64, 6.95, np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) # x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7 ] y = [ 22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4 ] assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299) x = [ 2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan ] y = [ 22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan ] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
def r2eff_TSMFK01(r20a=None, dw=None, dw_orig=None, k_AB=None, tcp=None, back_calc=None): """Calculate the R2eff values for the TSMFK01 model. See the module docstring for details. @keyword r20a: The R20 parameter value of state A (R2 with no exchange). @type r20a: numpy float array of rank [NE][NS][NM][NO][ND] @keyword dw: The chemical exchange difference between states A and B in rad/s. @type dw: numpy float array of rank [NE][NS][NM][NO][ND] @keyword dw_orig: The chemical exchange difference between states A and B in ppm. This is only for faster checking of zero value, which result in no exchange. @type dw_orig: numpy float array of rank-1 @keyword k_AB: The k_AB parameter value (the forward exchange rate in rad/s). @type k_AB: float @keyword tcp: The tau_CPMG times (1 / 4.nu1). @type tcp: numpy float array of rank [NE][NS][NM][NO][ND] @keyword back_calc: The array for holding the back calculated R2eff values. Each element corresponds to one of the CPMG nu1 frequencies. @type back_calc: numpy float array of rank [NE][NS][NM][NO][ND] """ # Flag to tell if values should be replaced if max_etapos in cosh function is violated. t_dw_zero = False # Catch parameter values that will result in no exchange, returning flat R2eff = R20 lines (when kex = 0.0, k_AB = 0.0). # Test if k_AB is zero. if k_AB == 0.0: back_calc[:] = r20a return # Test if dw is zero. Create a mask for the affected spins to replace these with R20 at the end of the calculationWait for replacement, since this is spin specific. if min(fabs(dw_orig)) == 0.0: t_dw_zero = True mask_dw_zero = masked_where(dw == 0.0, dw) # Denominator. denom = dw * tcp # The numerator. numer = sin(denom) # Catch zeros (to avoid pointless mathematical operations). # This will result in no exchange, returning flat lines. if min(fabs(numer)) == 0.0: # Calculate R2eff for forward. back_calc[:] = r20a + k_AB else: # Calculate R2eff. back_calc[:] = r20a + k_AB - k_AB * numer / denom # Replace data in array. # If dw is zero. if t_dw_zero: back_calc[mask_dw_zero.mask] = r20a[mask_dw_zero.mask] # Catch errors, taking a sum over array is the fastest way to check for # +/- inf (infinity) and nan (not a number). if not isfinite(sum(back_calc)): # Replaces nan, inf, etc. with fill value. fix_invalid(back_calc, copy=False, fill_value=1e100)
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0]) result = mstats.kendalltau(x, y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
def r2eff_LM63(r20=None, phi_ex=None, kex=None, cpmg_frqs=None, back_calc=None): """Calculate the R2eff values for the LM63 model. See the module docstring for details. @keyword r20: The R20 parameter value (R2 with no exchange). @type r20: numpy float array of rank [NE][NS][NM][NO][ND] @keyword phi_ex: The phi_ex parameter value (pA * pB * delta_omega^2). @type phi_ex: numpy float array of rank [NE][NS][NM][NO][ND] @keyword kex: The kex parameter value (the exchange rate in rad/s). @type kex: float @keyword cpmg_frqs: The CPMG nu1 frequencies. @type cpmg_frqs: numpy float array of rank [NE][NS][NM][NO][ND] @keyword back_calc: The array for holding the back calculated R2eff values. Each element corresponds to one of the CPMG nu1 frequencies. @type back_calc: numpy float array of rank [NE][NS][NM][NO][ND] """ # Flag to tell if values should be replaced if phi_ex is zero. t_phi_ex_zero = False # Catch divide with zeros (to avoid pointless mathematical operations). if kex == 0.0: back_calc[:] = r20 return # Catch zeros (to avoid pointless mathematical operations). # This will result in no exchange, returning flat lines. if min(phi_ex) == 0.0: t_phi_ex_zero = True mask_phi_ex_zero = masked_where(phi_ex == 0.0, phi_ex) # Repetitive calculations (to speed up calculations). rex = phi_ex / kex kex_4 = 4.0 / kex # Calculate R2eff. back_calc[:] = r20 + rex * (1.0 - kex_4 * cpmg_frqs * tanh(kex / (4.0 * cpmg_frqs))) # Replace data in array. # If phi_ex is zero. if t_phi_ex_zero: back_calc[mask_phi_ex_zero.mask] = r20[mask_phi_ex_zero.mask] # Catch errors, taking a sum over array is the fastest way to check for # +/- inf (infinity) and nan (not a number). if not isfinite(sum(back_calc)): # Replaces nan, inf, etc. with fill value. fix_invalid(back_calc, copy=False, fill_value=1e100)
def test_spearmanr(self): # Tests some computations of Spearman's rho (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95]) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) (x, y) = ([5.05, 6.75, 3.21, 2.66, np.nan], [1.65, 2.64, 2.64, 6.95, np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
def preprocess(self, X, method): """ Preprocess the data by scaling into the range of 0-1 with bins. """ if method == "bucket": # scales into 0-1 range with bins print("using the bucket prep method") from sklearn.preprocessing import KBinsDiscretizer est = KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="quantile") est.fit(X) X_processed = est.transform(X) X_processed /= 10 # transform from nominal values to 0-1 return X_processed elif method == "clip": # clips the raw counts into a certain range print("using the clip prep method") cutoff = 1000 X_processed = np.minimum(X, cutoff) + np.sqrt( np.maximum(X - cutoff, 0)) return X_processed elif method == "log": # takes the log of the count print("using the log prep method") import numpy.ma as ma mask = ma.log(X) # mask logged data to replace NaN (log0) with 0 X_processed = ma.fix_invalid(mask, fill_value=0).data return X_processed else: raise Exception("Incorrect preprocess method name passed!")
def hdquantiles_sd(data, prob=list([.25, .5, .75]), axis=None): """Computes the standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : ndarray Data array. prob : sequence Sequence of quantiles to compute. axis : int Axis along which to compute the quantiles. If None, use a flattened array. Notes ----- The function is restricted to 2D arrays. """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) #......... hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan #......... vv = np.arange(n) / float(n - 1) betacdf = beta.cdf # for (i, p) in enumerate(prob): _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([ np.dot( w, xsorted[np.r_[list(range(0, k)), list(range(k + 1, n))].astype(int_)]) for k in range(n) ], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n - 1) hdsd[i] = float(n - 1) * np.sqrt( np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks --------- data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError( "Array 'data' must be at most two dimensional, but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) # return ma.fix_invalid(result, copy=False).ravel()
def impute_missing_total_reads(total_reads, missing_variant_confidence): # Change NaNs to masked values via SciPy. masked_total_reads = ma.fix_invalid(total_reads) # Going forward, suppose you have v variants and s samples in a v*s matrix of # read counts. Missing values are masked. # Calculate geometric mean of variant read depth in each sample. Result: s*1 sample_means = gmean(masked_total_reads, axis=0) assert np.sum(sample_means <= 0) == np.sum(np.isnan(sample_means)) == 0 # Divide every variant's read count by its mean sample read depth to get read # depth enrichment relative to other variants in sample. Result: v*s normalized_to_sample = np.dot(masked_total_reads, np.diag(1./sample_means)) # For each variant, calculate geometric mean of its read depth enrichment # across samples. Result: v*1 variant_mean_reads = gmean(normalized_to_sample, axis=1) assert np.sum(variant_mean_reads <= 0) == np.sum(np.isnan(variant_mean_reads)) == 0 # Convert 1D arrays to vectors to permit matrix multiplication. imputed_counts = np.dot(variant_mean_reads.reshape((-1, 1)), sample_means.reshape((1, -1))) nan_coords = np.where(np.isnan(total_reads)) total_reads[nan_coords] = imputed_counts[nan_coords] assert np.sum(total_reads <= 0) == np.sum(np.isnan(total_reads)) == 0 total_reads[nan_coords] *= missing_variant_confidence return np.floor(total_reads).astype(np.int)
def loadmap_nansAsZeros(image, fill_value=0, copy=False): #input diferent fill_value, like np.mean(image) or np.median(image) #overrides the values in input image. Returned image is only a masked array image = imread(image) image = ma.fix_invalid(image, copy=copy, fill_value=fill_value) return image
def constant_cluster_size(x, tol=0): """Estimate the cluster size with (nearly) constant value Returns how many consecutive neighbor values are within a given tolerance range. Note that invalid values, like NaN, are ignored. """ assert np.ndim(x) == 1, 'Not ready for more than 1 dimension' # Adding a tolerance to handle roundings due to different numeric types. tol = tol + 1e-5 * tol ivalid = np.nonzero(~ma.getmaskarray(ma.fix_invalid(x)))[0] dx = np.diff(np.atleast_1d(x)[ivalid]) cluster_size = np.zeros(np.shape(x), dtype='i') for i, iv in enumerate(ivalid): idx = np.absolute(dx[i:].cumsum()) > tol if True in idx: cluster_size[iv] += np.nonzero(idx)[0].min() else: cluster_size[iv] += idx.size idx = np.absolute(dx[0:i][::-1].cumsum()) > tol if True in idx: cluster_size[iv] += np.nonzero(idx)[0].min() else: cluster_size[iv] += idx.size return cluster_size
def test_cov(self): "Tests the cov function." x = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[1, 0, 0], [0, 0, 0]]) c = mstats.cov(x[0]) assert_equal(c, x[0].var(ddof=1)) c = mstats.cov(x[1]) assert_equal(c, x[1].var(ddof=1)) c = mstats.cov(x) assert_equal(c[1, 0], (x[0].anom() * x[1].anom()).sum()) # x = [[nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan], [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter, spring, summer, fall) = x.T # assert_almost_equal(mstats.cov(winter, winter, bias=True), winter.var(ddof=0)) assert_almost_equal(mstats.cov(winter, winter, bias=False), winter.var(ddof=1)) assert_almost_equal(mstats.cov(winter, spring)[0, 1], 7.7) assert_almost_equal(mstats.cov(winter, spring)[1, 0], 7.7) assert_almost_equal(mstats.cov(winter, summer)[0, 1], 19.1111111, 7) assert_almost_equal(mstats.cov(winter, summer)[1, 0], 19.1111111, 7) assert_almost_equal(mstats.cov(winter, fall)[0, 1], 20) assert_almost_equal(mstats.cov(winter, fall)[1, 0], 20)
def ccf(x, y, periodogram=True): """Computes the auto-correlation of the series x and y at different lags. The computations are performed on anomalies (deviations from average). Gaps in the series are filled first, anomalies are then computed and missing values filled with 0. If x and y are valid TimeSeries object, they are aligned so that their starting and ending point match. Parameters ---------- x : sequence Input data. y : sequence Input data. If y is longer than x, it is truncated to match the length of x. If y is shorter than x, x is truncated. periodogram : {True, False} optional Whether to return a periodogram or a standard estimate of the autocovariance. Returns ------- cvf : ma.array Cross-correlation at lags [0,1,...,n,n-1,...,-1] """ ccf_ = cvf(x, y, periodogram) return ma.fix_invalid(ccf_ / ccf_[0])
def _acf(x, mode): """Computes the auto-correlation function of the time series x. Note that the computations are performed on anomalies (deviations from average). Gaps in the series are filled first, the anomalies are then computed and the missing values filled with 0. :Parameters: `x` : TimeSeries Time series. """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) xx = (x * x) n = len(x) # _avf = np.correlate(x, x, 'full')[n - 1:] if mode: dnm_ = np.fromiter((np.sum(x[k:] * x[:-k]) / np.sum(m[k:] * xx[:-k]) for k in range(1, n)), dtype=float) else: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\ np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum()) for k in range(1,n)), dtype=float) poslags = _avf[1:] / dnm_ return ma.fix_invalid( np.concatenate([np.array([1.]), poslags, poslags[::-1]]))
def _moving_func(data, cfunc, kwargs): data = ma.fix_invalid(data) data = ma.array(data.filled(0), mask=data._mask) if data.ndim == 1: kwargs['array'] = data result_dict = cfunc(**kwargs) return _process_result_dict(data, result_dict) elif data.ndim == 2: for i in range(data.shape[-1]): kwargs['array'] = data[:,i] result_dict = cfunc(**kwargs) if i == 0: rtype = result_dict['array'].dtype result = data.astype(rtype) print data.dtype, result.dtype rmask = result_dict.get('mask', ma.nomask) curr_col = marray(result_dict['array'], mask=rmask, copy=False) result[:,i] = curr_col return result else: raise ValueError, "Data should be at most 2D"
def ccf(x, y, periodogram=True): """Computes the auto-correlation of the series x and y at different lags. The computations are performed on anomalies (deviations from average). Gaps in the series are filled first, anomalies are then computed and missing values filled with 0. If x and y are valid TimeSeries object, they are aligned so that their starting and ending point match. Parameters ---------- x : sequence Input data. y : sequence Input data. If y is longer than x, it is truncated to match the length of x. If y is shorter than x, x is truncated. periodogram : {True, False} optional Whether to return a periodogram or a standard estimate of the autocovariance. Returns ------- cvf : ma.array Cross-correlation at lags [0,1,...,n,n-1,...,-1] """ ccf_ = cvf(x,y,periodogram) return ma.fix_invalid(ccf_/ccf_[0])
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
def test_cov(self): "Tests the cov function." x = ma.array([[1,2,3],[4,5,6]], mask=[[1,0,0],[0,0,0]]) c = mstats.cov(x[0]) assert_equal(c, x[0].var(ddof=1)) c = mstats.cov(x[1]) assert_equal(c, x[1].var(ddof=1)) c = mstats.cov(x) assert_equal(c[1,0], (x[0].anom()*x[1].anom()).sum()) # x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter,spring,summer,fall) = x.T # assert_almost_equal(mstats.cov(winter,winter,bias=True), winter.var(ddof=0)) assert_almost_equal(mstats.cov(winter,winter,bias=False), winter.var(ddof=1)) assert_almost_equal(mstats.cov(winter,spring)[0,1], 7.7) assert_almost_equal(mstats.cov(winter,spring)[1,0], 7.7) assert_almost_equal(mstats.cov(winter,summer)[0,1], 19.1111111, 7) assert_almost_equal(mstats.cov(winter,summer)[1,0], 19.1111111, 7) assert_almost_equal(mstats.cov(winter,fall)[0,1], 20) assert_almost_equal(mstats.cov(winter,fall)[1,0], 20)
class TestVariability(TestCase): """ Comparison numbers are found using R v.1.5.1 note that length(testcase) = 4 """ testcase = ma.fix_invalid([1,2,3,4,np.nan]) def test_signaltonoise(self): # This is not in R, so used: # mean(testcase, axis=0) / (sqrt(var(testcase)*3/4)) y = mstats.signaltonoise(self.testcase) assert_almost_equal(y,2.236067977) def test_sem(self): # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3) y = mstats.sem(self.testcase) assert_almost_equal(y,0.6454972244) def test_zmap(self): # This is not in R, so tested by using: # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4) y = mstats.zmap(self.testcase, self.testcase) desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996, 0.44721359549996, 1.3416407864999]) assert_array_almost_equal(desired_unmaskedvals, y.data[y.mask == False], decimal=12) def test_zscore(self): # This is not in R, so tested by using: # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4) y = mstats.zscore(self.testcase) desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996, 0.44721359549996, 1.3416407864999, np.nan]) assert_almost_equal(desired, y, decimal=12)
def _moving_func(data, cfunc, kwargs): data = ma.fix_invalid(data) data = ma.array(data.filled(0), mask=data._mask) if data.ndim == 1: kwargs['array'] = data result_dict = cfunc(**kwargs) return _process_result_dict(data, result_dict) elif data.ndim == 2: for i in range(data.shape[-1]): kwargs['array'] = data[:, i] result_dict = cfunc(**kwargs) if i == 0: rtype = result_dict['array'].dtype result = data.astype(rtype) print data.dtype, result.dtype rmask = result_dict.get('mask', ma.nomask) curr_col = marray(result_dict['array'], mask=rmask, copy=False) result[:, i] = curr_col return result else: raise ValueError, "Data should be at most 2D"
def test_zscore(self): # This is not in R, so tested by using: # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4) y = mstats.zscore(self.testcase) desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996, 0.44721359549996, 1.3416407864999, np.nan]) assert_almost_equal(desired, y, decimal=12)
def filter_invalid_value(data_array): """ This filter applies a mask to all numerically invalid inputs on a programing side. Numbers that are usually infinite or some other nonsensical quantity serve no real usage in calculations further downstream. Therefore, they are masked here. See numpy.ma.fix_invalid for what is considered invalid. Parameters ---------- data_array : ndarray The data array that the mask will be calculated from. Returns ------- final_mask : ndarray -> dictionary A boolean array for pixels that are masked (True) or are valid (False). """ # As fixing all invalid data is required, masks might obscure # the data itself. raw_data_array = np_ma.getdata(data_array) # Mask all of the invalid data. final_mask = np_ma.getmaskarray(np_ma.fix_invalid(raw_data_array)) return final_mask
def spike(x): """ Spike """ y = ma.fix_invalid(np.ones_like(x) * np.nan) y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:])/2.0) - \ np.abs((x[2:] - x[:-2])/2.0) return y
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int, optional Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. See Also -------- hdquantiles """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan vv = np.arange(n) / float(n-1) betacdf = beta.cdf for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)), list(range(k+1,n))].astype(int_)]) for k in range(n)], dtype=float_) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1) hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) return ma.fix_invalid(result, copy=False).ravel()
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """ The standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int, optional Axis along which to compute the quantiles. If None, use a flattened array. Returns ------- hdquantiles_sd : MaskedArray Standard error of the Harrell-Davis quantile estimates. See Also -------- hdquantiles """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan vv = np.arange(n) / float(n-1) betacdf = beta.cdf for (i,p) in enumerate(prob): _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter([w[:k] @ xsorted[:k] + w[k:] @ xsorted[k+1:] for k in range(n)], dtype=float_) # mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / (n - 1) # hdsd[i] = (n - 1) * np.sqrt(mx_var / n) hdsd[i] = np.sqrt(mx_.var() * (n - 1)) return hdsd # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None): result = _hdsd_1D(data, p) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hdsd_1D, axis, data, p) return ma.fix_invalid(result, copy=False).ravel()
def set_features(self): if ("LATITUDE" in self.data.keys()) and ("LONGITUDE" in self.data.keys()): lat = self.data["LATITUDE"] lon = self.data["LONGITUDE"] elif ("LATITUDE" in self.data.attrs) and ("LONGITUDE" in self.data.attrs): lat = self.data.attrs["LATITUDE"] lon = self.data.attrs["LONGITUDE"] else: module_logger.debug("Missing geolocation (lat/lon)") self.features = {} return try: self.features = get_bathymetry(lat=lat, lon=lon) except: self.features = { "bathymetry": ma.fix_invalid([np.nan]), "bathymetry_std": ma.fix_invalid([np.nan]), } return if (("LATITUDE" not in self.data.attrs) or (self.data.attrs["LATITUDE"] is None) or ("LONGITUDE" not in self.data.attrs) or (self.data.attrs["LONGITUDE"] is None)): module_logger.debug("Missing geolocation (lat/lon)") self.features = { "bathymetry": ma.fix_invalid([np.nan]), "bathymetry_std": ma.fix_invalid([np.nan]), } self.flags["valid_position"] = self.flag_bad return if ((self.data.attrs["LATITUDE"] > 90) or (self.data.attrs["LATITUDE"] < -90) or (self.data.attrs["LONGITUDE"] > 360) or (self.data.attrs["LONGITUDE"] < -180)): self.features = { "bathymetry": ma.fix_invalid([np.nan]), "bathymetry_std": ma.fix_invalid([np.nan]), } return lat = self.data.attrs["LATITUDE"] lon = self.data.attrs["LONGITUDE"] try: self.features = get_bathymetry(lat=lat, lon=lon) except: self.features = { "bathymetry": ma.fix_invalid([np.nan]), "bathymetry_std": ma.fix_invalid([np.nan]), }
def get_mask_for_unphysical(U, cutoffU=2000., fill_value=np.nan): """ Returns a mask for masking module. if absolute value of value is greater than cutoff, the value is masked. Parameters ---------- U: array-like cutoffU: float if |value| > cutoff, this method considers those values unphysical. fill_value: Returns ------- mask: multidimensional boolean array """ print 'number of invalid values (nan and inf) in the array: ' + str( np.isnan(U).sum() + np.isinf(U).sum()) print 'number of nan values in U: ' + str(np.isnan(U).sum()) print 'number of inf values in U: ' + str(np.isinf(U).sum()) + '\n' # a=ma.masked_invalid(U) # print 'number of masked elements by masked_invalid: '+ str(ma.count_masked(a)) # Replace all nan and inf values with fill_value. # fix_invalid still enforces a mask on elements with originally invalid values U_fixed = ma.fix_invalid(U, fill_value=99999) n_invalid = ma.count_masked(U_fixed) print 'number of masked elements by masked_invalid: ' + str(n_invalid) # Update the mask to False (no masking) U_fixed.mask = False # Mask unreasonable values of U_fixed b = ma.masked_greater(U_fixed, cutoffU) c = ma.masked_less(U_fixed, -cutoffU) n_greater = ma.count_masked(b) - n_invalid n_less = ma.count_masked(c) print 'number of masked elements greater than cutoff: ' + str(n_greater) print 'number of masked elements less than -cutoff: ' + str(n_less) # Generate a mask for all nonsense values in the array U mask = ~(~b.mask * ~c.mask) d = ma.array(U_fixed, mask=mask) n_total = ma.count_masked(d) # U_filled = ma.filled(d, fill_value) #Total number of elements in U N = 1 for i in range(len(U.shape)): N *= U.shape[i] print 'total number of unphysical values: ' + str( ma.count_masked(d)) + ' (' + str((float(n_total) / N * 100)) + '%)\n' return mask
def cum_rate_of_change(x, memory): y = ma.fix_invalid(np.ones_like(x) * np.nan) y[1:] = ma.absolute(ma.diff(x)) for i in range(2, y.size): if y[i] < y[i - 1]: y[i] = (1 - memory) * y[i] + memory * y[i - 1] return y
def gradient(x): """ Gradient QC This is different the mathematical gradient: d/dx + d/dy + d/dz, but as defined by GTSPP, EuroGOOS and others. """ y = ma.fix_invalid(np.ones_like(x) * np.nan) y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:]) / 2.0) return y
def test_kendalltau_seasonal(self): # Tests the seasonal Kendall tau. x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T output = mstats.kendalltau_seasonal(x) assert_almost_equal(output['global p-value (indep)'], 0.008, 3) assert_almost_equal(output['seasonal p-value'].round(2), [0.18,0.53,0.20,0.04])
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) # test for namedtuple attributes res = mstats.kendalltau(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def estimate_anomaly(features, params, method='produtorium'): """ Estimate probability from PDF defined by params The output is the natural logarithm of the estimated probability. params are the parameters that define the PDF for each feature in features. This function estimate the combined probability of each row in features as the produtorium between the probabilities of the different features on the same row. ATENTION!! I should think more about what would I like from this function. What should happens in case of a masked feature? And if all features for one measurement are masked? Right now it simply don't add for the estimate, so that all features masked would lead to an expectation of 100% it's good. """ assert hasattr(params, 'keys') assert hasattr(features, 'keys') features_names = list(features.keys()) for k in params.keys(): assert k in features_names, "features doesn't have: %s" % k prob = ma.masked_all(np.shape(features[features_names[0]]), dtype='f8') for t in params.keys(): param = params[t]['param'] valid = ~ma.fix_invalid(features[t]).mask tmp = exponweib.sf(np.asanyarray(features[t]), *param[:-2], loc=param[-2], scale=param[-1]) # Arbitrary solution. No value can have a probability of 0. tmp[tmp == 0] = 1e-25 p = ma.log(tmp) # If both are valid, operate as choosed method. ind = ~prob.mask & valid if method == 'produtorium': prob[ind] = prob[ind] + p[ind] elif method == 'min': prob[ind] = min(prob[ind], p[ind]) else: assert "Invalid method: %s" % method # Update prob if new value is valid and prob is masked # Operate twice the first feature if moved above. ind = prob.mask & valid prob[ind] = p[ind] return prob
def hdquantiles_sd(data, prob=list([0.25, 0.5, 0.75]), axis=None): """Computes the standard error of the Harrell-Davis quantile estimates by jackknife. Parameters ---------- data: ndarray Data array. prob: sequence Sequence of quantiles to compute. axis : int Axis along which to compute the quantiles. If None, use a flattened array. Notes ----- The function is restricted to 2D arrays. """ def _hdsd_1D(data, prob): "Computes the std error for 1D arrays." xsorted = np.sort(data.compressed()) n = len(xsorted) # ......... hdsd = np.empty(len(prob), float_) if n < 2: hdsd.flat = np.nan # ......... vv = np.arange(n) / float(n - 1) betacdf = beta.cdf # for (i, p) in enumerate(prob): _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p)) w = _w[1:] - _w[:-1] mx_ = np.fromiter( [np.dot(w, xsorted[np.r_[range(0, k), range(k + 1, n)].astype(int_)]) for k in range(n)], dtype=float_ ) mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n - 1) hdsd[i] = float(n - 1) * np.sqrt(np.diag(mx_var).diagonal() / float(n)) return hdsd # Initialization & checks --------- data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if axis is None: result = _hdsd_1D(data, p) else: assert data.ndim <= 2, "Array should be 2D at most !" result = ma.apply_along_axis(_hdsd_1D, axis, data, p) # return ma.fix_invalid(result, copy=False).ravel()
def chi2(data=None, back_calc=None, errors=None, params=None): """Function to calculate the chi-squared value.""" # Calculate the chi-squared statistic. if raise_warnings: try: t_chi2 = sum((1.0 / errors * (data - back_calc))**2) except RuntimeWarning: # Handle if algorithm takes wrong step. #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f" % (np_i, sim_j, params[0], params[1]) t_chi2 = 1e100 else: t_chi2 = sum((1.0 / errors * (data - back_calc))**2) if 0: fix_invalid(t_chi2, copy=False, fill_value=1e100) t_chi2 = nan_to_num( t_chi2 ) if not isfinite(t_chi2): t_chi2_2 = nan_to_num( t_chi2 ) #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f %s %s" % (np_i, sim_j, params[0], params[1], t_chi2, t_chi2_2) t_chi2 = t_chi2_2 return t_chi2
def test_kstwosamp(self): x = [ [nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan], [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1], ] x = ma.fix_invalid(x).T (winter, spring, summer, fall) = x.T assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring), 4), (0.1818, 0.9892)) assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "g"), 4), (0.1469, 0.7734)) assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "l"), 4), (0.1818, 0.6744))
def test_kstwosamp(self): x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter,spring,summer,fall) = x.T assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4), (0.1818,0.9892)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4), (0.1469,0.7734)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4), (0.1818,0.6744))
def __init__(self): self.attrs = { 'datetime': datetime(2016, 6, 4), 'LATITUDE': 15, 'LONGITUDE': -38 } self.data = { 'PRES': ma.fix_invalid([ 2, 6, 10, 21, 44, 79, 100, 150, 200, 400, 410, 650, 1000, 2000, 5000 ]), 'TEMP': ma.fix_invalid([ 25.32, 25.34, 25.34, 25.31, 24.99, 23.46, 21.85, 17.95, 15.39, 11.08, 6.93, 7.93, 5.71, 3.58, np.nan ]), 'PSAL': ma.fix_invalid([ 36.49, 36.51, 36.52, 36.53, 36.59, 36.76, 36.81, 36.39, 35.98, 35.30, 35.28, 34.93, 34.86, np.nan, np.nan ]) }
def test_spearmanr(self): # Tests some computations of Spearman's rho (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95]) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan]) (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan] y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan] (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) # test for namedtuple attributes res = mstats.spearmanr(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def test_kstwosamp(self): "Tests the Kolmogorov-Smirnov 2 samples test" x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [ 4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [ 3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x).T (winter,spring,summer,fall) = x.T # assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4), (0.1818,0.9892)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4), (0.1469,0.7734)) assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4), (0.1818,0.6744))
def densitystep(S, T, P): """ """ assert S.shape == T.shape assert S.shape == P.shape try: import gsw rho0 = gsw.pot_rho_t_exact(S, T, P, 0) assert S.ndim == 1, "Not able to densitystep an array ndim > 1" ds = ma.concatenate([ma.masked_all(1), np.sign(np.diff(P))*np.diff(rho0)]) return ma.fix_invalid(ds) except ImportError: print("Package gsw is required and is not available.")
def densitystep(S, T, P): """ """ assert S.shape == T.shape assert S.shape == P.shape try: import gsw rho0 = gsw.pot_rho_t_exact(S, T, P, 0) assert S.ndim == 1, "Not able to densitystep an array ndim > 1" ds = ma.concatenate( [ma.masked_all(1), np.sign(np.diff(P)) * np.diff(rho0)]) return ma.fix_invalid(ds) except ImportError: print("Package gsw is required and is not available.")
def test_avf_masked(self): presidents = ma.fix_invalid(self.presidents) # periodogram : True avfp = avf(presidents) assert_almost_equal(avfp[:21].round(2), [241.74,185.75,159.63,116.92, 95.91, 60.36, 45.69, 34.97, 31.74, 10.91, 7.48, 1.32, 11.70, 7.71, 13.57, 4.16, -1.05, -9.76,-11.24,-15.67,-12.32]) # preiodogram : False avfp = avf(presidents,0) pz = presidents.anom() mz = (~pz.mask).astype(int) assert_almost_equal(avfp[:21], np.r_[[pz.var()], [(pz[k:]*pz[:-k]).sum()/(mz[k:]*mz[:-k]).sum() for k in range(1,21)]])
def estimate_anomaly(features, params, method='produtorium'): """ Estimate probability from PDF defined by params The output is the natural logarithm of the estimated probability. params are the parameters that define the PDF for each feature in features. This function estimate the combined probability of each row in features as the produtorium between the probabilities of the different features on the same row. ATENTION!! I should think more about what would I like from this function. What should happens in case of a masked feature? And if all features for one measurement are masked? Right now it simply don't add for the estimate, so that all features masked would lead to an expectation of 100% it's good. """ assert hasattr(params, 'keys') assert hasattr(features, 'keys') for k in params.keys(): assert k in features.keys(), "features doesn't have: %s" % k prob = ma.masked_all(len(features[features.keys()[0]])) for t in params.keys(): param = params[t]['param'] valid = ~ma.fix_invalid(features[t]).mask tmp = exponweib.sf(np.asanyarray(features[t]), *param[:-2], loc=param[-2], scale=param[-1]) # Arbitrary solution. No value can have a probability of 0. tmp[tmp == 0] = 1e-15 p = ma.log(tmp) # Update prob if new value is valid and prob is masked ind = prob.mask & valid prob[ind] = p[ind] # If both are valid, operate as choosed method. ind = ~prob.mask & valid if method == 'produtorium': prob[ind] = prob[ind] + p[ind] elif method == 'min': prob[ind] = min(prob[ind], p[ind]) else: return return prob
def test_friedmanchisq(self): # No missing values args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0], [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0], [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0]) result = mstats.friedmanchisquare(*args) assert_almost_equal(result[0], 10.4737, 4) assert_almost_equal(result[1], 0.005317, 6) # Missing values x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x) result = mstats.friedmanchisquare(*x) assert_almost_equal(result[0], 2.0156, 4) assert_almost_equal(result[1], 0.5692, 4)
def avf(x, periodogram=True): """Computes the auto-covariance function of the series `x`. The computations are performed on anomalies (deviations from average). Gaps in the series are filled first, anomalies are then computed and missing values filled with 0. The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with mean 0 is defined as: \hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}} where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked. If the optional parameter `periodogram` is True, the denominator of the previous expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$. Parameters ---------- x : sequence Input data. If x is a TimeSeries object, it is filled first. mode : {True, False} optional Whether to return a periodogram or a standard estimate of the autocovariance. Returns ------- avf : ma.array Autocovariance at lags [0,1,...,n,n-1,...,-1] """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) n = len(x) # _avf = np.correlate(x,x,'full') denom = np.correlate(m,m,'full') if periodogram: denom += np.concatenate([np.arange(n-1,0,-1), np.arange(n)]) _avf /= denom _avf = np.concatenate([_avf[n-1:],_avf[:n-1]]) return ma.fix_invalid(_avf)
def test_friedmanchisq(self): # No missing values args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0], [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0], [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0]) result = mstats.friedmanchisquare(*args) assert_almost_equal(result[0], 10.4737, 4) assert_almost_equal(result[1], 0.005317, 6) # Missing values x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] x = ma.fix_invalid(x) result = mstats.friedmanchisquare(*x) assert_almost_equal(result[0], 2.0156, 4) assert_almost_equal(result[1], 0.5692, 4) # test for namedtuple attributes attributes = ('statistic', 'pvalue') check_named_results(result, attributes, ma=True)
def qqcalc(data, distrib=ssd.norm, alpha=.4, beta=.4): """ Returns the theoretical quantiles from an empirical distribution. Parameters ---------- data : array Input data distrib : {norm, function}, optional Theoretical distribution used to compute the expected quantiles. If None, use a normal distribution. Otherwise, ``distrib`` must have a :meth:`.ppf` method. alpha : {float}, optional Coefficient for the computation of plotting positions beta : {float}, optional Coefficient for the computation of plotting positions. """ pp = mstats.plotting_positions(data, alpha=alpha, beta=beta) qq = ma.fix_invalid(distrib.ppf(pp)) qq._mask = pp._mask return qq
def woa_track_from_file(d, lat, lon, filename, varnames=None): """ Temporary solution: WOA for surface track """ d = np.asanyarray(d) lat = np.asanyarray(lat) lon = np.asanyarray(lon) lon[lon < 0] += 360 doy = np.array([int(dd.strftime('%j')) for dd in d]) nc = netCDF4.Dataset(expanduser(filename), 'r') if varnames is None: varnames = {} for v in nc.variables.keys(): if nc.variables[v].dimensions == \ (u'time', u'depth', u'lat', u'lon'): varnames[v] = v output = {} for v in varnames: output[v] = [] for d_n, lat_n, lon_n in zip(doy, lat, lon): # Get the nearest point. In the future interpolate. n_d = (np.abs(d_n - nc.variables['time'][:])).argmin() n_x = (np.abs(lon_n - nc.variables['lon'][:])).argmin() n_y = (np.abs(lat_n - nc.variables['lat'][:])).argmin() for v in varnames: output[v].append(nc.variables[varnames[v]][n_d, 0, n_y, n_x]) for v in varnames: output[v] = ma.fix_invalid(output[v]) return output
def _acf(x, mode): """Computes the auto-correlation function of the time series x. Note that the computations are performed on anomalies (deviations from average). Gaps in the series are filled first, the anomalies are then computed and the missing values filled with 0. :Parameters: `x` : TimeSeries Time series. """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) xx = (x*x) n = len(x) # _avf = np.correlate(x,x,'full')[n-1:] if mode: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/np.sum(m[k:]*xx[:-k]) for k in range(1,n)), dtype=float) else: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\ np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum()) for k in range(1,n)), dtype=float) poslags = _avf[1:]/dnm_ return ma.fix_invalid(np.concatenate([np.array([1.]), poslags, poslags[::-1]]))
def extrapolate_data(dataset, basemap, gridsize_x, gridsize_y, maskoceans=False): """ Extrapolate `dataset` on a grid of size `(gridsize_x, gridsize_y)` based on `basemap`. A regular grid of the user-defined size is created from the basemap. The dataset coordinates are then Delaunay triangulated, and the corresponding data extrapolated on the regular grid using the nearest-neighbor method Parameters ---------- dataset : ndarray A structured ndarray, w/ fields ['lon', 'lat', 'data'] basemap : Basemap The projection basemap gridsize_x : int Number of cells in the x direction ('lon') gridsize_y : int Number of cells in the x direction ('lat') maskoceans : """ # Get the grid (glon, glat, gx, gy) = basemap.makegrid(gridsize_x, gridsize_y, returnxy=True) # Transforms the lon/lat of the dataset in basemap units (llon, llat) = basemap(dataset['lon'], dataset['lat']) # Triangulate the dataset triangul = delaunay.Triangulation(llon, llat) # Define an extrapolator (using natural neighbors)... # ... and extrapolate the data along the grid... extrapolator = triangul.nn_extrapolator(dataset['data']) extrapolated = ma.fix_invalid(extrapolator(gx, gy)) if maskoceans: extrapolated = mtb.maskoceans(glon, glat, extrapolated) return (extrapolated, gx, gy)
def hdquantiles(data, prob=list([0.25, 0.5, 0.75]), axis=None, var=False): """ Computes quantile estimates with the Harrell-Davis method. The quantile estimates are calculated as a weighted linear combination of order statistics. Parameters ---------- data : array_like Data array. prob : sequence, optional Sequence of quantiles to compute. axis : int or None, optional Axis along which to compute the quantiles. If None, use a flattened array. var : bool, optional Whether to return the variance of the estimate. Returns ------- hdquantiles : MaskedArray A (p,) array of quantiles (if `var` is False), or a (2,p) array of quantiles and variances (if `var` is True), where ``p`` is the number of quantiles. """ def _hd_1D(data, prob, var): "Computes the HD quantiles for a 1D array. Returns nan for invalid data." xsorted = np.squeeze(np.sort(data.compressed().view(ndarray))) # Don't use length here, in case we have a numpy scalar n = xsorted.size hd = np.empty((2, len(prob)), float_) if n < 2: hd.flat = np.nan if var: return hd return hd[0] v = np.arange(n + 1) / float(n) betacdf = beta.cdf for (i, p) in enumerate(prob): _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p)) w = _w[1:] - _w[:-1] hd_mean = np.dot(w, xsorted) hd[0, i] = hd_mean # hd[1, i] = np.dot(w, (xsorted - hd_mean) ** 2) # hd[0, prob == 0] = xsorted[0] hd[0, prob == 1] = xsorted[-1] if var: hd[1, prob == 0] = hd[1, prob == 1] = np.nan return hd return hd[0] # Initialization & checks data = ma.array(data, copy=False, dtype=float_) p = np.array(prob, copy=False, ndmin=1) # Computes quantiles along axis (or globally) if (axis is None) or (data.ndim == 1): result = _hd_1D(data, p, var) else: if data.ndim > 2: raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim) result = ma.apply_along_axis(_hd_1D, axis, data, p, var) return ma.fix_invalid(result, copy=False)
def r1rho_DPL94(r1rho_prime=None, phi_ex=None, kex=None, theta=None, R1=0.0, spin_lock_fields2=None, back_calc=None): """Calculate the R1rho values for the DPL94 model. See the module docstring for details. @keyword r1rho_prime: The R1rho_prime parameter value (R1rho with no exchange). @type r1rho_prime: numpy float array of rank [NE][NS][NM][NO][ND] @keyword phi_ex: The phi_ex parameter value (pA * pB * delta_omega^2). @type phi_ex: numpy float array of rank [NE][NS][NM][NO][ND] @keyword kex: The kex parameter value (the exchange rate in rad/s). @type kex: float @keyword theta: The rotating frame tilt angles for each dispersion point. @type theta: numpy float array of rank [NE][NS][NM][NO][ND] @keyword R1: The R1 relaxation rate. @type R1: numpy float array of rank [NE][NS][NM][NO][ND] @keyword spin_lock_fields2: The R1rho spin-lock field strengths squared (in rad^2.s^-2). @type spin_lock_fields2: numpy float array of rank [NE][NS][NM][NO][ND] @keyword back_calc: The array for holding the back calculated R1rho values. Each element corresponds to the combination of theta and spin lock field. @type back_calc: numpy float array of rank [NE][NS][NM][NO][ND] """ # Flag to tell if values should be replaced if numer is zero. t_numer_zero = False t_denom_zero = False # The non-Rex factors. sin_theta2 = sin(theta)**2 R1_R2 = R1 * cos(theta)**2 + r1rho_prime * sin_theta2 # The numerator. numer = sin_theta2 * phi_ex * kex # Catch zeros (to avoid pointless mathematical operations). # This will result in no exchange, returning flat lines. if min(numer) == 0.0: t_numer_zero = True mask_numer_zero = masked_where(numer == 0.0, numer) # Denominator. denom = kex**2 + spin_lock_fields2 # Catch math domain error of dividing with 0. # This is when denom =0. mask_denom_zero = denom == 0.0 if any(mask_denom_zero): t_denom_zero = True denom[mask_denom_zero] = 1.0 # R1rho calculation. back_calc[:] = R1_R2 + numer / denom # Replace data in array. # If numer is zero. if t_numer_zero: back_calc[mask_numer_zero.mask] = R1_R2[mask_numer_zero.mask] # If denom is zero. if t_denom_zero: back_calc[mask_denom_zero] = 1e100 # Catch errors, taking a sum over array is the fastest way to check for # +/- inf (infinity) and nan (not a number). if not isfinite(sum(back_calc)): # Replaces nan, inf, etc. with fill value. fix_invalid(back_calc, copy=False, fill_value=1e100)
def worker(job_json): """ For every incoming message, this worker function is called. Be extremely careful not to do anything CPU-intensive here, or you will see blocking. Sockets are async under gevent, so those are fair game. """ # Receive raw market JSON strings. market_json = zlib.decompress(job_json) # Un-serialize the JSON data to a Python dict. market_data = simplejson.loads(market_json) # Save to your choice of DB here. global dbConn query = PySQLPool.getNewQuery(dbConn) if market_data['resultType'] == 'orders': rows = market_data['rowsets'] try: for row in rows: if len(row['rows']) == 0: pass genTime = dateutil.parser.parse(row['generatedAt']) genTime = int(time.mktime(genTime.timetuple())) typeID = row['typeID'] regionID = row['regionID'] buyCount = [] sellCount = [] buyPrice = [] sellPrice = [] tempMask = [] buyAvg = 0 buyMean = 0 buyTotal = 0 sellAvg = 0 sellMean = 0 sellTotal = 0 buy = 0 sell = 0 set = 0 stuff = row['rows'] search = "SELECT * FROM prices WHERE uniquek = '%s' AND dateTime > '%s'" % (str(regionID) + str(typeID), genTime) query.Query(search) if (len(query.record) == 1) or (genTime > int(time.mktime(time.gmtime()))): pass for data in stuff: if data[6] == True: buyPrice.append(data[0]) buyCount.append(data[4] - data[1]) elif data[6] == False: sellPrice.append(data[0]) sellCount.append(data[4] - data[1]) else: pass if len(buyPrice) > 1: top = stats.scoreatpercentile(buyPrice, 90) bottom = stats.scoreatpercentile(buyPrice, 10) buyMasked = ma.masked_outside(buyPrice, bottom, top) tempMask = buyMasked.mask buyCountMasked = ma.array(buyCount, mask=tempMask, fill_value = 0) ma.fix_invalid(buyMasked, mask=0) ma.fix_invalid(buyCountMasked, mask=0) buyAvg = ma.average(buyMasked, 0, buyCountMasked) buyMean = ma.mean(buyMasked) buyTotal = ma.sum(buyCountMasked) if buyTotal == 0: buyAvg = 0 buyMean = 0 set = 1 if len(buyPrice) < 4: buyAvg = ma.average(buyPrice) buyMean = ma.mean(buyPrice) buyPrice.sort() buy = buyPrice.pop() if len(sellPrice) > 3: top = stats.scoreatpercentile(sellPrice, 90) bottom = stats.scoreatpercentile(sellPrice, 1) sellMasked = ma.masked_outside(sellPrice, bottom, top) tempMask = sellMasked.mask sellCountMasked = ma.array(sellCount, mask=tempMask, fill_value = 0) ma.fix_invalid(sellMasked, mask=0) ma.fix_invalid(sellCountMasked, mask=0) sellAvg = ma.average(sellMasked, 0, sellCountMasked) sellMean = ma.mean(sellMasked) sellTotal = ma.sum(sellCountMasked) if sellTotal == 0: sellAvg = 0 sellMean = 0 set = 1 if len(sellPrice) < 4: sellMean = ma.mean(sellPrice) sellTotal = ma.sum(sellPrice) sellPrice.sort() sellPrice.reverse() sell = sellPrice.pop() data = "REPLACE INTO prices SET uniquek = '%s', region = '%i', itemid = '%i', buymean = '%.2f', buyavg = '%.2f', sellmean = '%.2f', sellavg = '%.2f', buycount = '%i', sellcount = '%i', buy = '%.2f', sell = '%.2f', dateTime = '%i'" % (str(regionID) + str(typeID), regionID, typeID, np.nan_to_num(buyMean), np.nan_to_num(buyAvg), np.nan_to_num(sellMean), np.nan_to_num(sellAvg), np.nan_to_num(buyTotal), np.nan_to_num(sellTotal), buy, sell, genTime) query.Query(data) except: pass
def target_fn_setup(sim_index=None, scaling_matrix=None, verbosity=0): """Initialise the target function for optimisation or direct calculation. @keyword sim_index: The index of the simulation to optimise. This should be None if normal optimisation is desired. @type sim_index: None or int @keyword scaling_matrix: The diagonal and square scaling matrix. @type scaling_matrix: numpy rank-2, float64 array or None @keyword verbosity: A flag specifying the amount of information to print. The higher the value, the greater the verbosity. @type verbosity: int """ # Test if the N-state model has been set up. if not hasattr(cdp, 'model'): raise RelaxNoModelError('N-state') # '2-domain' model setup tests. if cdp.model == '2-domain': # The number of states. if not hasattr(cdp, 'N'): raise RelaxError("The number of states has not been set.") # The reference domain. if not hasattr(cdp, 'ref_domain'): raise RelaxError("The reference domain has not been set.") # Update the model parameters if necessary. update_model() # Create the initial parameter vector. param_vector = assemble_param_vector(sim_index=sim_index) # Replace all NaNs with 0.0. fix_invalid(param_vector, copy=False, fill_value=0.0) # Determine if alignment tensors or RDCs are to be used. data_types = base_data_types() # The probabilities. probs = None if hasattr(cdp, 'probs') and len(cdp.probs) and cdp.probs[0] != None: probs = cdp.probs # Diagonal scaling. if len(param_vector) and scaling_matrix is not None: param_vector = dot(inv(scaling_matrix), param_vector) # Get the data structures for optimisation using the tensors as base data sets. full_tensors, red_tensor_elem, red_tensor_err, full_in_ref_frame = None, None, None, None if 'tensor' in data_types: full_tensors, red_tensor_elem, red_tensor_err, full_in_ref_frame = minimise_setup_tensors(sim_index=sim_index) # Get the data structures for optimisation using PCSs as base data sets. pcs, pcs_err, pcs_weight, temp, frq, pcs_pseudo_flags = None, None, None, None, None, None if 'pcs' in data_types: pcs, pcs_err, pcs_weight, temp, frq, pcs_pseudo_flags = return_pcs_data(sim_index=sim_index, verbosity=verbosity) # Get the data structures for optimisation using RDCs as base data sets. rdcs, rdc_err, rdc_weight, rdc_vector, rdc_dj, absolute_rdc, T_flags, j_couplings, rdc_pseudo_flags = None, None, None, None, None, None, None, None, None if 'rdc' in data_types: # The data. rdcs, rdc_err, rdc_weight, rdc_vector, rdc_dj, absolute_rdc, T_flags, j_couplings, rdc_pseudo_flags = return_rdc_data(sim_index=sim_index, verbosity=verbosity) # Get the fixed tensors. fixed_tensors = None if 'rdc' in data_types or 'pcs' in data_types: full_tensors = minimise_setup_fixed_tensors() # The flag list. fixed_tensors = [] for i in range(len(cdp.align_tensors)): # Skip non-optimised data. if not opt_uses_align_data(cdp.align_tensors[i].name): continue if cdp.align_tensors[i].fixed: fixed_tensors.append(True) else: fixed_tensors.append(False) # Get the atomic_positions. atomic_pos, paramag_centre, centre_fixed = None, None, True if 'pcs' in data_types or 'pre' in data_types: atomic_pos, paramag_centre = minimise_setup_atomic_pos(sim_index=sim_index) # Optimisation of the centre. if hasattr(cdp, 'paramag_centre_fixed'): centre_fixed = cdp.paramag_centre_fixed # Set up the class instance containing the target function. model = N_state_opt(model=cdp.model, N=cdp.N, init_params=param_vector, probs=probs, full_tensors=full_tensors, red_data=red_tensor_elem, red_errors=red_tensor_err, full_in_ref_frame=full_in_ref_frame, fixed_tensors=fixed_tensors, pcs=pcs, rdcs=rdcs, pcs_errors=pcs_err, rdc_errors=rdc_err, T_flags=T_flags, j_couplings=j_couplings, rdc_pseudo_flags=rdc_pseudo_flags, pcs_pseudo_flags=pcs_pseudo_flags, pcs_weights=pcs_weight, rdc_weights=rdc_weight, rdc_vect=rdc_vector, temp=temp, frq=frq, dip_const=rdc_dj, absolute_rdc=absolute_rdc, atomic_pos=atomic_pos, paramag_centre=paramag_centre, scaling_matrix=scaling_matrix, centre_fixed=centre_fixed) # Return the data. return model, param_vector, data_types