Example #1
0
    def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x, y)),
                            [+0.3333333, 0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x, z)),
                            [-0.5477226, 0.2785987])
        #
        x = ma.fix_invalid([
            0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0,
            0, np.nan
        ])
        y = ma.fix_invalid([
            0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80,
            0, 10, 45, np.nan, 0
        ])
        result = mstats.kendalltau(x, y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #2
0
    def test_spearmanr(self):
        # Tests some computations of Spearman's rho
        (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95])
        assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)
        (x, y) = ([5.05, 6.75, 3.21, 2.66,
                   np.nan], [1.65, 2.64, 2.64, 6.95, np.nan])
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)

        x = [
            2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3,
            3.9, 0.3, 6.7
        ]
        y = [
            22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8,
            1.0, 1.2, 1.4
        ]
        assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
        x = [
            2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3,
            3.9, 0.3, 6.7, np.nan
        ]
        y = [
            22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8,
            1.0, 1.2, 1.4, np.nan
        ]
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)

        # test for namedtuple attributes
        res = mstats.spearmanr(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #3
0
 def test_spearmanr(self):
     "Tests some computations of Spearman's rho"
     (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95])
     assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)
     (x, y) = ([5.05, 6.75, 3.21, 2.66,
                np.nan], [1.65, 2.64, 2.64, 6.95, np.nan])
     (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
     assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)
     #
     x = [
         2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3,
         3.9, 0.3, 6.7
     ]
     y = [
         22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8,
         1.0, 1.2, 1.4
     ]
     assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
     x = [
         2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3,
         3.9, 0.3, 6.7, np.nan
     ]
     y = [
         22.6, 08.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8,
         1.0, 1.2, 1.4, np.nan
     ]
     (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
     assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
Example #4
0
def r2eff_TSMFK01(r20a=None, dw=None, dw_orig=None, k_AB=None, tcp=None, back_calc=None):
    """Calculate the R2eff values for the TSMFK01 model.

    See the module docstring for details.


    @keyword r20a:          The R20 parameter value of state A (R2 with no exchange).
    @type r20a:             numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword dw:            The chemical exchange difference between states A and B in rad/s.
    @type dw:               numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword dw_orig:       The chemical exchange difference between states A and B in ppm. This is only for faster checking of zero value, which result in no exchange.
    @type dw_orig:          numpy float array of rank-1
    @keyword k_AB:          The k_AB parameter value (the forward exchange rate in rad/s).
    @type k_AB:             float
    @keyword tcp:           The tau_CPMG times (1 / 4.nu1).
    @type tcp:              numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword back_calc:     The array for holding the back calculated R2eff values.  Each element corresponds to one of the CPMG nu1 frequencies.
    @type back_calc:        numpy float array of rank [NE][NS][NM][NO][ND]
    """

    # Flag to tell if values should be replaced if max_etapos in cosh function is violated.
    t_dw_zero = False

    # Catch parameter values that will result in no exchange, returning flat R2eff = R20 lines (when kex = 0.0, k_AB = 0.0).
    # Test if k_AB is zero.
    if k_AB == 0.0:
        back_calc[:] = r20a
        return

    # Test if dw is zero. Create a mask for the affected spins to replace these with R20 at the end of the calculationWait for replacement, since this is spin specific.
    if min(fabs(dw_orig)) == 0.0:
        t_dw_zero = True
        mask_dw_zero = masked_where(dw == 0.0, dw)

    # Denominator.
    denom = dw * tcp

    # The numerator.
    numer = sin(denom)

    # Catch zeros (to avoid pointless mathematical operations).
    # This will result in no exchange, returning flat lines.
    if min(fabs(numer)) == 0.0:
        # Calculate R2eff for forward.
        back_calc[:] = r20a + k_AB
    else:
        # Calculate R2eff.
        back_calc[:] = r20a + k_AB - k_AB * numer / denom

    # Replace data in array.
    # If dw is zero.
    if t_dw_zero:
        back_calc[mask_dw_zero.mask] = r20a[mask_dw_zero.mask]

    # Catch errors, taking a sum over array is the fastest way to check for
    # +/- inf (infinity) and nan (not a number).
    if not isfinite(sum(back_calc)):
        # Replaces nan, inf, etc. with fill value.
        fix_invalid(back_calc, copy=False, fill_value=1e100)
Example #5
0
 def test_kendalltau(self):
     # Tests some computations of Kendall's tau
     x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
     y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
     z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
     assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059])
     assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987])
     #
     x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan])
     y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0])
     result = mstats.kendalltau(x, y)
     assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
Example #6
0
def r2eff_LM63(r20=None, phi_ex=None, kex=None, cpmg_frqs=None, back_calc=None):
    """Calculate the R2eff values for the LM63 model.

    See the module docstring for details.


    @keyword r20:           The R20 parameter value (R2 with no exchange).
    @type r20:              numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword phi_ex:        The phi_ex parameter value (pA * pB * delta_omega^2).
    @type phi_ex:           numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword kex:           The kex parameter value (the exchange rate in rad/s).
    @type kex:              float
    @keyword cpmg_frqs:     The CPMG nu1 frequencies.
    @type cpmg_frqs:        numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword back_calc:     The array for holding the back calculated R2eff values.  Each element corresponds to one of the CPMG nu1 frequencies.
    @type back_calc:        numpy float array of rank [NE][NS][NM][NO][ND]
    """

    # Flag to tell if values should be replaced if phi_ex is zero.
    t_phi_ex_zero = False

    # Catch divide with zeros (to avoid pointless mathematical operations).
    if kex == 0.0:
        back_calc[:] = r20
        return

    # Catch zeros (to avoid pointless mathematical operations).
    # This will result in no exchange, returning flat lines.
    if min(phi_ex) == 0.0:
        t_phi_ex_zero = True
        mask_phi_ex_zero = masked_where(phi_ex == 0.0, phi_ex)

    # Repetitive calculations (to speed up calculations).
    rex = phi_ex / kex
    kex_4 = 4.0 / kex

    # Calculate R2eff.
    back_calc[:] = r20 + rex * (1.0 - kex_4 * cpmg_frqs * tanh(kex / (4.0 * cpmg_frqs)))

    # Replace data in array.
    # If phi_ex is zero.
    if t_phi_ex_zero:
        back_calc[mask_phi_ex_zero.mask] = r20[mask_phi_ex_zero.mask]

    # Catch errors, taking a sum over array is the fastest way to check for
    # +/- inf (infinity) and nan (not a number).
    if not isfinite(sum(back_calc)):
        # Replaces nan, inf, etc. with fill value.
        fix_invalid(back_calc, copy=False, fill_value=1e100)
Example #7
0
    def test_spearmanr(self):
        # Tests some computations of Spearman's rho
        (x, y) = ([5.05, 6.75, 3.21, 2.66], [1.65, 2.64, 2.64, 6.95])
        assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)
        (x, y) = ([5.05, 6.75, 3.21, 2.66, np.nan], [1.65, 2.64, 2.64, 6.95, np.nan])
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x, y)[0], -0.6324555)

        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
        assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x, y)[0], 0.6887299)
Example #8
0
    def preprocess(self, X, method):
        """
		Preprocess the data by scaling into the range of 0-1 with bins.
		"""
        if method == "bucket":  # scales into 0-1 range with bins
            print("using the bucket prep method")
            from sklearn.preprocessing import KBinsDiscretizer
            est = KBinsDiscretizer(n_bins=10,
                                   encode="ordinal",
                                   strategy="quantile")
            est.fit(X)
            X_processed = est.transform(X)
            X_processed /= 10  # transform from nominal values to 0-1
            return X_processed
        elif method == "clip":  # clips the raw counts into a certain range
            print("using the clip prep method")
            cutoff = 1000
            X_processed = np.minimum(X, cutoff) + np.sqrt(
                np.maximum(X - cutoff, 0))
            return X_processed
        elif method == "log":  # takes the log of the count
            print("using the log prep method")
            import numpy.ma as ma
            mask = ma.log(X)
            # mask logged data to replace NaN (log0) with 0
            X_processed = ma.fix_invalid(mask, fill_value=0).data
            return X_processed
        else:
            raise Exception("Incorrect preprocess method name passed!")
Example #9
0
def hdquantiles_sd(data, prob=list([.25, .5, .75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.


Parameters
----------
    data : ndarray
        Data array.
    prob : sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.

Notes
-----
    The function is restricted to 2D arrays.

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        #.........
        vv = np.arange(n) / float(n - 1)
        betacdf = beta.cdf
        #
        for (i, p) in enumerate(prob):
            _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([
                np.dot(
                    w, xsorted[np.r_[list(range(0, k)),
                                     list(range(k + 1, n))].astype(int_)])
                for k in range(n)
            ],
                              dtype=float_)
            mx_var = np.array(mx_.var(), copy=False,
                              ndmin=1) * n / float(n - 1)
            hdsd[i] = float(n - 1) * np.sqrt(
                np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError(
                "Array 'data' must be at most two dimensional, but got data.ndim = %d"
                % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return ma.fix_invalid(result, copy=False).ravel()
def impute_missing_total_reads(total_reads, missing_variant_confidence):
  # Change NaNs to masked values via SciPy.
  masked_total_reads = ma.fix_invalid(total_reads)

  # Going forward, suppose you have v variants and s samples in a v*s matrix of
  # read counts. Missing values are masked.

  # Calculate geometric mean of variant read depth in each sample. Result: s*1
  sample_means = gmean(masked_total_reads, axis=0)
  assert np.sum(sample_means <= 0) == np.sum(np.isnan(sample_means)) == 0
  # Divide every variant's read count by its mean sample read depth to get read
  # depth enrichment relative to other variants in sample. Result: v*s
  normalized_to_sample = np.dot(masked_total_reads, np.diag(1./sample_means))
  # For each variant, calculate geometric mean of its read depth enrichment
  # across samples. Result: v*1
  variant_mean_reads = gmean(normalized_to_sample, axis=1)
  assert np.sum(variant_mean_reads <= 0) == np.sum(np.isnan(variant_mean_reads)) == 0

  # Convert 1D arrays to vectors to permit matrix multiplication.
  imputed_counts = np.dot(variant_mean_reads.reshape((-1, 1)), sample_means.reshape((1, -1)))
  nan_coords = np.where(np.isnan(total_reads))
  total_reads[nan_coords] = imputed_counts[nan_coords]
  assert np.sum(total_reads <= 0) == np.sum(np.isnan(total_reads)) == 0

  total_reads[nan_coords] *= missing_variant_confidence
  return np.floor(total_reads).astype(np.int)
def loadmap_nansAsZeros(image, fill_value=0, copy=False):
    #input diferent fill_value, like np.mean(image) or np.median(image)
    #overrides the values in input image. Returned image is only a masked array
    image = imread(image)

    image = ma.fix_invalid(image, copy=copy, fill_value=fill_value)
    return image
Example #12
0
def constant_cluster_size(x, tol=0):
    """Estimate the cluster size with (nearly) constant value

       Returns how many consecutive neighbor values are within a given
         tolerance range. Note that invalid values, like NaN, are ignored.
    """
    assert np.ndim(x) == 1, 'Not ready for more than 1 dimension'

    # Adding a tolerance to handle roundings due to different numeric types.
    tol = tol + 1e-5 * tol

    ivalid = np.nonzero(~ma.getmaskarray(ma.fix_invalid(x)))[0]
    dx = np.diff(np.atleast_1d(x)[ivalid])

    cluster_size = np.zeros(np.shape(x), dtype='i')
    for i, iv in enumerate(ivalid):
        idx = np.absolute(dx[i:].cumsum()) > tol
        if True in idx:
            cluster_size[iv] += np.nonzero(idx)[0].min()
        else:
            cluster_size[iv] += idx.size
        idx = np.absolute(dx[0:i][::-1].cumsum()) > tol
        if True in idx:
            cluster_size[iv] += np.nonzero(idx)[0].min()
        else:
            cluster_size[iv] += idx.size
    return cluster_size
Example #13
0
 def test_cov(self):
     "Tests the cov function."
     x = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[1, 0, 0], [0, 0, 0]])
     c = mstats.cov(x[0])
     assert_equal(c, x[0].var(ddof=1))
     c = mstats.cov(x[1])
     assert_equal(c, x[1].var(ddof=1))
     c = mstats.cov(x)
     assert_equal(c[1, 0], (x[0].anom() * x[1].anom()).sum())
     #
     x = [[nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
          [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
          [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
          [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]]
     x = ma.fix_invalid(x).T
     (winter, spring, summer, fall) = x.T
     #
     assert_almost_equal(mstats.cov(winter, winter, bias=True),
                         winter.var(ddof=0))
     assert_almost_equal(mstats.cov(winter, winter, bias=False),
                         winter.var(ddof=1))
     assert_almost_equal(mstats.cov(winter, spring)[0, 1], 7.7)
     assert_almost_equal(mstats.cov(winter, spring)[1, 0], 7.7)
     assert_almost_equal(mstats.cov(winter, summer)[0, 1], 19.1111111, 7)
     assert_almost_equal(mstats.cov(winter, summer)[1, 0], 19.1111111, 7)
     assert_almost_equal(mstats.cov(winter, fall)[0, 1], 20)
     assert_almost_equal(mstats.cov(winter, fall)[1, 0], 20)
Example #14
0
def ccf(x, y, periodogram=True):
    """Computes the auto-correlation of the series x and y at different lags.
The computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, anomalies are then computed and missing
values filled with 0.
If x and y are valid TimeSeries object, they are aligned so that their starting
and ending point match.

Parameters
----------
    x : sequence
        Input data.
    y : sequence
        Input data.
        If y is longer than x, it is truncated to match the length of x.
        If y is shorter than x, x is truncated.
    periodogram : {True, False} optional
        Whether to return a periodogram or a standard estimate of the autocovariance.

Returns
-------
    cvf : ma.array
        Cross-correlation at lags [0,1,...,n,n-1,...,-1]
    """
    ccf_ = cvf(x, y, periodogram)
    return ma.fix_invalid(ccf_ / ccf_[0])
Example #15
0
def _acf(x, mode):
    """Computes the auto-correlation function of the time series x.
Note that the computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, the anomalies are then computed and the missing
values filled with 0.

:Parameters:
    `x` : TimeSeries
        Time series.
    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    xx = (x * x)
    n = len(x)
    #
    _avf = np.correlate(x, x, 'full')[n - 1:]
    if mode:
        dnm_ = np.fromiter((np.sum(x[k:] * x[:-k]) / np.sum(m[k:] * xx[:-k])
                            for k in range(1, n)),
                           dtype=float)
    else:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\
                            np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum())
                            for k in range(1,n)),
                           dtype=float)
    poslags = _avf[1:] / dnm_
    return ma.fix_invalid(
        np.concatenate([np.array([1.]), poslags, poslags[::-1]]))
def _moving_func(data, cfunc, kwargs):

    data = ma.fix_invalid(data)
    data = ma.array(data.filled(0), mask=data._mask)

    if data.ndim == 1:
        kwargs['array'] = data
        result_dict = cfunc(**kwargs)
        return _process_result_dict(data, result_dict)

    elif data.ndim == 2:
        for i in range(data.shape[-1]):
            kwargs['array'] = data[:,i]
            result_dict = cfunc(**kwargs)

            if i == 0:
                rtype = result_dict['array'].dtype
                result = data.astype(rtype)
                print data.dtype, result.dtype

            rmask = result_dict.get('mask', ma.nomask)

            curr_col = marray(result_dict['array'], mask=rmask, copy=False)
            result[:,i] = curr_col

        return result

    else:
        raise ValueError, "Data should be at most 2D"
Example #17
0
def ccf(x, y, periodogram=True):
    """Computes the auto-correlation of the series x and y at different lags.
The computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, anomalies are then computed and missing
values filled with 0.
If x and y are valid TimeSeries object, they are aligned so that their starting
and ending point match.

Parameters
----------
    x : sequence
        Input data.
    y : sequence
        Input data.
        If y is longer than x, it is truncated to match the length of x.
        If y is shorter than x, x is truncated.
    periodogram : {True, False} optional
        Whether to return a periodogram or a standard estimate of the autocovariance.

Returns
-------
    cvf : ma.array
        Cross-correlation at lags [0,1,...,n,n-1,...,-1]
    """
    ccf_ = cvf(x,y,periodogram)
    return ma.fix_invalid(ccf_/ccf_[0])
Example #18
0
 def test_kendalltau(self):
     # Tests some computations of Kendall's tau
     x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
     y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
     z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
     assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                         [+0.3333333,0.4969059])
     assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                         [-0.5477226,0.2785987])
     #
     x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                         10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
     y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                         25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
     result = mstats.kendalltau(x,y)
     assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
 def test_cov(self):
     "Tests the cov function."
     x = ma.array([[1,2,3],[4,5,6]], mask=[[1,0,0],[0,0,0]])
     c = mstats.cov(x[0])
     assert_equal(c, x[0].var(ddof=1))
     c = mstats.cov(x[1])
     assert_equal(c, x[1].var(ddof=1))
     c = mstats.cov(x)
     assert_equal(c[1,0], (x[0].anom()*x[1].anom()).sum())
     #
     x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
          [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
          [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
          [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
     x = ma.fix_invalid(x).T
     (winter,spring,summer,fall) = x.T
     #
     assert_almost_equal(mstats.cov(winter,winter,bias=True),
                         winter.var(ddof=0))
     assert_almost_equal(mstats.cov(winter,winter,bias=False),
                         winter.var(ddof=1))
     assert_almost_equal(mstats.cov(winter,spring)[0,1], 7.7)
     assert_almost_equal(mstats.cov(winter,spring)[1,0], 7.7)
     assert_almost_equal(mstats.cov(winter,summer)[0,1], 19.1111111, 7)
     assert_almost_equal(mstats.cov(winter,summer)[1,0], 19.1111111, 7)
     assert_almost_equal(mstats.cov(winter,fall)[0,1], 20)
     assert_almost_equal(mstats.cov(winter,fall)[1,0], 20)
Example #20
0
class TestVariability(TestCase):
    """  Comparison numbers are found using R v.1.5.1
         note that length(testcase) = 4
    """
    testcase = ma.fix_invalid([1,2,3,4,np.nan])

    def test_signaltonoise(self):
        # This is not in R, so used:
        #     mean(testcase, axis=0) / (sqrt(var(testcase)*3/4))
        y = mstats.signaltonoise(self.testcase)
        assert_almost_equal(y,2.236067977)

    def test_sem(self):
        # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3)
        y = mstats.sem(self.testcase)
        assert_almost_equal(y,0.6454972244)

    def test_zmap(self):
        # This is not in R, so tested by using:
        #    (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
        y = mstats.zmap(self.testcase, self.testcase)
        desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996,
                                 0.44721359549996, 1.3416407864999])
        assert_array_almost_equal(desired_unmaskedvals,
                                  y.data[y.mask == False], decimal=12)

    def test_zscore(self):
        # This is not in R, so tested by using:
        #     (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
        y = mstats.zscore(self.testcase)
        desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
                                  0.44721359549996, 1.3416407864999, np.nan])
        assert_almost_equal(desired, y, decimal=12)
Example #21
0
def _moving_func(data, cfunc, kwargs):

    data = ma.fix_invalid(data)
    data = ma.array(data.filled(0), mask=data._mask)

    if data.ndim == 1:
        kwargs['array'] = data
        result_dict = cfunc(**kwargs)
        return _process_result_dict(data, result_dict)

    elif data.ndim == 2:
        for i in range(data.shape[-1]):
            kwargs['array'] = data[:, i]
            result_dict = cfunc(**kwargs)

            if i == 0:
                rtype = result_dict['array'].dtype
                result = data.astype(rtype)
                print data.dtype, result.dtype

            rmask = result_dict.get('mask', ma.nomask)

            curr_col = marray(result_dict['array'], mask=rmask, copy=False)
            result[:, i] = curr_col

        return result

    else:
        raise ValueError, "Data should be at most 2D"
Example #22
0
 def test_zscore(self):
     # This is not in R, so tested by using:
     #     (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
     y = mstats.zscore(self.testcase)
     desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
                               0.44721359549996, 1.3416407864999, np.nan])
     assert_almost_equal(desired, y, decimal=12)
Example #23
0
def filter_invalid_value(data_array):
    """ This filter applies a mask to all numerically invalid inputs 
    on a programing side.

    Numbers that are usually infinite or some other nonsensical 
    quantity serve no real usage in calculations further downstream. 
    Therefore, they are masked here.

    See numpy.ma.fix_invalid for what is considered invalid.

    Parameters
    ----------
    data_array : ndarray
        The data array that the mask will be calculated from.

    Returns
    -------
    final_mask : ndarray -> dictionary
        A boolean array for pixels that are masked (True) or are 
        valid (False).

    """
    # As fixing all invalid data is required, masks might obscure 
    # the data itself.
    raw_data_array = np_ma.getdata(data_array)
    # Mask all of the invalid data.
    final_mask = np_ma.getmaskarray(np_ma.fix_invalid(raw_data_array))

    return final_mask
Example #24
0
def spike(x):
    """ Spike
    """
    y = ma.fix_invalid(np.ones_like(x) * np.nan)
    y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:])/2.0) - \
                np.abs((x[2:] - x[:-2])/2.0)
    return y
Example #25
0
 def test_zscore(self):
     # This is not in R, so tested by using:
     #     (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4)
     y = mstats.zscore(self.testcase)
     desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996,
                               0.44721359549996, 1.3416407864999, np.nan])
     assert_almost_equal(desired, y, decimal=12)
Example #26
0
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)

        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan

        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf

        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([np.dot(w,xsorted[np.r_[list(range(0,k)),
                                                      list(range(k+1,n))].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)

    return ma.fix_invalid(result, copy=False).ravel()
Example #27
0
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    """
    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)

        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan

        vv = np.arange(n) / float(n-1)
        betacdf = beta.cdf

        for (i,p) in enumerate(prob):
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter([w[:k] @ xsorted[:k] + w[k:] @ xsorted[k+1:]
                               for k in range(n)], dtype=float_)
            # mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / (n - 1)
            # hdsd[i] = (n - 1) * np.sqrt(mx_var / n)
            hdsd[i] = np.sqrt(mx_.var() * (n - 1))
        return hdsd

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None):
        result = _hdsd_1D(data, p)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, "
                             "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)

    return ma.fix_invalid(result, copy=False).ravel()
Example #28
0
    def set_features(self):
        if ("LATITUDE" in self.data.keys()) and ("LONGITUDE"
                                                 in self.data.keys()):
            lat = self.data["LATITUDE"]
            lon = self.data["LONGITUDE"]
        elif ("LATITUDE" in self.data.attrs) and ("LONGITUDE"
                                                  in self.data.attrs):
            lat = self.data.attrs["LATITUDE"]
            lon = self.data.attrs["LONGITUDE"]
        else:
            module_logger.debug("Missing geolocation (lat/lon)")
            self.features = {}
            return

        try:
            self.features = get_bathymetry(lat=lat, lon=lon)
        except:
            self.features = {
                "bathymetry": ma.fix_invalid([np.nan]),
                "bathymetry_std": ma.fix_invalid([np.nan]),
            }
        return

        if (("LATITUDE" not in self.data.attrs)
                or (self.data.attrs["LATITUDE"] is None)
                or ("LONGITUDE" not in self.data.attrs)
                or (self.data.attrs["LONGITUDE"] is None)):
            module_logger.debug("Missing geolocation (lat/lon)")
            self.features = {
                "bathymetry": ma.fix_invalid([np.nan]),
                "bathymetry_std": ma.fix_invalid([np.nan]),
            }
            self.flags["valid_position"] = self.flag_bad
            return

        if ((self.data.attrs["LATITUDE"] > 90)
                or (self.data.attrs["LATITUDE"] < -90)
                or (self.data.attrs["LONGITUDE"] > 360)
                or (self.data.attrs["LONGITUDE"] < -180)):
            self.features = {
                "bathymetry": ma.fix_invalid([np.nan]),
                "bathymetry_std": ma.fix_invalid([np.nan]),
            }
            return

        lat = self.data.attrs["LATITUDE"]
        lon = self.data.attrs["LONGITUDE"]
        try:
            self.features = get_bathymetry(lat=lat, lon=lon)
        except:
            self.features = {
                "bathymetry": ma.fix_invalid([np.nan]),
                "bathymetry_std": ma.fix_invalid([np.nan]),
            }
Example #29
0
def get_mask_for_unphysical(U, cutoffU=2000., fill_value=np.nan):
    """
    Returns a mask for masking module. if absolute value of value is greater than cutoff, the value is masked.
    Parameters
    ----------
    U: array-like
    cutoffU: float
        if |value| > cutoff, this method considers those values unphysical.
    fill_value:


    Returns
    -------
    mask: multidimensional boolean array

    """
    print 'number of invalid values (nan and inf) in the array: ' + str(
        np.isnan(U).sum() + np.isinf(U).sum())
    print 'number of nan values in U: ' + str(np.isnan(U).sum())
    print 'number of inf values in U: ' + str(np.isinf(U).sum()) + '\n'

    # a=ma.masked_invalid(U)
    # print 'number of masked elements by masked_invalid: '+ str(ma.count_masked(a))

    # Replace all nan and inf values with fill_value.
    # fix_invalid still enforces a mask on elements with originally invalid values
    U_fixed = ma.fix_invalid(U, fill_value=99999)
    n_invalid = ma.count_masked(U_fixed)
    print 'number of masked elements by masked_invalid: ' + str(n_invalid)
    # Update the mask to False (no masking)
    U_fixed.mask = False

    # Mask unreasonable values of U_fixed
    b = ma.masked_greater(U_fixed, cutoffU)
    c = ma.masked_less(U_fixed, -cutoffU)
    n_greater = ma.count_masked(b) - n_invalid
    n_less = ma.count_masked(c)
    print 'number of masked elements greater than cutoff: ' + str(n_greater)
    print 'number of masked elements less than -cutoff: ' + str(n_less)

    # Generate a mask for all nonsense values in the array U
    mask = ~(~b.mask * ~c.mask)

    d = ma.array(U_fixed, mask=mask)
    n_total = ma.count_masked(d)
    # U_filled = ma.filled(d, fill_value)

    #Total number of elements in U
    N = 1
    for i in range(len(U.shape)):
        N *= U.shape[i]

    print 'total number of unphysical values: ' + str(
        ma.count_masked(d)) + '  (' + str((float(n_total) / N * 100)) + '%)\n'

    return mask
Example #30
0
def cum_rate_of_change(x, memory):

    y = ma.fix_invalid(np.ones_like(x) * np.nan)
    y[1:] = ma.absolute(ma.diff(x))

    for i in range(2, y.size):
        if y[i] < y[i - 1]:
            y[i] = (1 - memory) * y[i] + memory * y[i - 1]

    return y
Example #31
0
def gradient(x):
    """ Gradient QC

        This is different the mathematical gradient:
        d/dx + d/dy + d/dz,
        but as defined by GTSPP, EuroGOOS and others.
    """
    y = ma.fix_invalid(np.ones_like(x) * np.nan)
    y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:]) / 2.0)
    return y
Example #32
0
 def test_kendalltau_seasonal(self):
     # Tests the seasonal Kendall tau.
     x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
          [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
          [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
          [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
     x = ma.fix_invalid(x).T
     output = mstats.kendalltau_seasonal(x)
     assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
     assert_almost_equal(output['seasonal p-value'].round(2),
                         [0.18,0.53,0.20,0.04])
Example #33
0
 def test_kendalltau_seasonal(self):
     # Tests the seasonal Kendall tau.
     x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
          [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
          [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
          [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
     x = ma.fix_invalid(x).T
     output = mstats.kendalltau_seasonal(x)
     assert_almost_equal(output['global p-value (indep)'], 0.008, 3)
     assert_almost_equal(output['seasonal p-value'].round(2),
                         [0.18,0.53,0.20,0.04])
Example #34
0
    def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #35
0
def estimate_anomaly(features, params, method='produtorium'):
    """ Estimate probability from PDF defined by params

        The output is the natural logarithm of the estimated probability.

        params are the parameters that define the PDF for each feature
          in features. This function estimate the combined probability of
          each row in features as the produtorium between the probabilities
          of the different features on the same row.

        ATENTION!! I should think more about what would I like from this
          function. What should happens in case of a masked feature? And
          if all features for one measurement are masked? Right now it
          simply don't add for the estimate, so that all features masked
          would lead to an expectation of 100% it's good.
    """
    assert hasattr(params, 'keys')
    assert hasattr(features, 'keys')

    features_names = list(features.keys())
    for k in params.keys():
        assert k in features_names, "features doesn't have: %s" % k

    prob = ma.masked_all(np.shape(features[features_names[0]]), dtype='f8')

    for t in params.keys():
        param = params[t]['param']
        valid = ~ma.fix_invalid(features[t]).mask

        tmp = exponweib.sf(np.asanyarray(features[t]),
                           *param[:-2],
                           loc=param[-2],
                           scale=param[-1])
        # Arbitrary solution. No value can have a probability of 0.
        tmp[tmp == 0] = 1e-25
        p = ma.log(tmp)

        # If both are valid, operate as choosed method.
        ind = ~prob.mask & valid
        if method == 'produtorium':
            prob[ind] = prob[ind] + p[ind]
        elif method == 'min':
            prob[ind] = min(prob[ind], p[ind])
        else:
            assert "Invalid method: %s" % method

        # Update prob if new value is valid and prob is masked
        # Operate twice the first feature if moved above.
        ind = prob.mask & valid
        prob[ind] = p[ind]

    return prob
Example #36
0
def hdquantiles_sd(data, prob=list([0.25, 0.5, 0.75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.


Parameters
----------
    data: ndarray
        Data array.
    prob: sequence
        Sequence of quantiles to compute.
    axis : int
        Axis along which to compute the quantiles. If None, use a flattened array.

Notes
-----
    The function is restricted to 2D arrays.

    """

    def _hdsd_1D(data, prob):
        "Computes the std error for 1D arrays."
        xsorted = np.sort(data.compressed())
        n = len(xsorted)
        # .........
        hdsd = np.empty(len(prob), float_)
        if n < 2:
            hdsd.flat = np.nan
        # .........
        vv = np.arange(n) / float(n - 1)
        betacdf = beta.cdf
        #
        for (i, p) in enumerate(prob):
            _w = betacdf(vv, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            mx_ = np.fromiter(
                [np.dot(w, xsorted[np.r_[range(0, k), range(k + 1, n)].astype(int_)]) for k in range(n)], dtype=float_
            )
            mx_var = np.array(mx_.var(), copy=False, ndmin=1) * n / float(n - 1)
            hdsd[i] = float(n - 1) * np.sqrt(np.diag(mx_var).diagonal() / float(n))
        return hdsd

    # Initialization & checks ---------
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if axis is None:
        result = _hdsd_1D(data, p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = ma.apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return ma.fix_invalid(result, copy=False).ravel()
Example #37
0
def chi2(data=None, back_calc=None, errors=None, params=None):
    """Function to calculate the chi-squared value."""

    # Calculate the chi-squared statistic.
    if raise_warnings:
        try:
            t_chi2 = sum((1.0 / errors * (data - back_calc))**2)
        except RuntimeWarning:
            # Handle if algorithm takes wrong step.
            #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f" % (np_i, sim_j, params[0], params[1])
            t_chi2 =  1e100
    else:
        t_chi2 = sum((1.0 / errors * (data - back_calc))**2)
        if 0:
            fix_invalid(t_chi2, copy=False, fill_value=1e100)
            t_chi2 = nan_to_num( t_chi2 )
        if not isfinite(t_chi2):
            t_chi2_2 = nan_to_num( t_chi2 )
            #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f %s %s" % (np_i, sim_j, params[0], params[1], t_chi2, t_chi2_2)
            t_chi2 = t_chi2_2

    return t_chi2
Example #38
0
def chi2(data=None, back_calc=None, errors=None, params=None):
    """Function to calculate the chi-squared value."""

    # Calculate the chi-squared statistic.
    if raise_warnings:
        try:
            t_chi2 = sum((1.0 / errors * (data - back_calc))**2)
        except RuntimeWarning:
            # Handle if algorithm takes wrong step.
            #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f" % (np_i, sim_j, params[0], params[1])
            t_chi2 =  1e100
    else:
        t_chi2 = sum((1.0 / errors * (data - back_calc))**2)
        if 0:
            fix_invalid(t_chi2, copy=False, fill_value=1e100)
            t_chi2 = nan_to_num( t_chi2 )
        if not isfinite(t_chi2):
            t_chi2_2 = nan_to_num( t_chi2 )
            #print "Oppps. np=%i, sim=%i, R2=%3.2f, I0=%3.2f %s %s" % (np_i, sim_j, params[0], params[1], t_chi2, t_chi2_2)
            t_chi2 = t_chi2_2

    return t_chi2
Example #39
0
    def test_kstwosamp(self):
        x = [
            [nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
            [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
            [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan],
            [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1],
        ]
        x = ma.fix_invalid(x).T
        (winter, spring, summer, fall) = x.T

        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring), 4), (0.1818, 0.9892))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "g"), 4), (0.1469, 0.7734))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter, spring, "l"), 4), (0.1818, 0.6744))
Example #40
0
    def test_kstwosamp(self):
        x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
             [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
             [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
             [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
        x = ma.fix_invalid(x).T
        (winter,spring,summer,fall) = x.T

        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
                            (0.1818,0.9892))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
                            (0.1469,0.7734))
        assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
                            (0.1818,0.6744))
Example #41
0
 def __init__(self):
     self.attrs = {
         'datetime': datetime(2016, 6, 4),
         'LATITUDE': 15,
         'LONGITUDE': -38
     }
     self.data = {
         'PRES':
         ma.fix_invalid([
             2, 6, 10, 21, 44, 79, 100, 150, 200, 400, 410, 650, 1000, 2000,
             5000
         ]),
         'TEMP':
         ma.fix_invalid([
             25.32, 25.34, 25.34, 25.31, 24.99, 23.46, 21.85, 17.95, 15.39,
             11.08, 6.93, 7.93, 5.71, 3.58, np.nan
         ]),
         'PSAL':
         ma.fix_invalid([
             36.49, 36.51, 36.52, 36.53, 36.59, 36.76, 36.81, 36.39, 35.98,
             35.30, 35.28, 34.93, 34.86, np.nan, np.nan
         ])
     }
Example #42
0
    def test_spearmanr(self):
        # Tests some computations of Spearman's rho
        (x, y) = ([5.05,6.75,3.21,2.66],[1.65,2.64,2.64,6.95])
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)
        (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan])
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555)

        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4]
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)
        x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1,
              1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan]
        y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6,
              0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan]
        (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y))
        assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299)

        # test for namedtuple attributes
        res = mstats.spearmanr(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
 def test_kstwosamp(self):
     "Tests the Kolmogorov-Smirnov 2 samples test"
     x = [[nan,nan,  4,  2, 16, 26,  5,  1,  5,  1,  2,  3,  1],
          [  4,  3,  5,  3,  2,  7,  3,  1,  1,  2,  3,  5,  3],
          [  3,  2,  5,  6, 18,  4,  9,  1,  1,nan,  1,  1,nan],
          [nan,  6, 11,  4, 17,nan,  6,  1,  1,  2,  5,  1,  1]]
     x = ma.fix_invalid(x).T
     (winter,spring,summer,fall) = x.T
     #
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring),4),
                         (0.1818,0.9892))
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'g'),4),
                         (0.1469,0.7734))
     assert_almost_equal(np.round(mstats.ks_twosamp(winter,spring,'l'),4),
                         (0.1818,0.6744))
Example #44
0
def densitystep(S, T, P):
    """
    """
    assert S.shape == T.shape
    assert S.shape == P.shape
    try:
        import gsw
        rho0 = gsw.pot_rho_t_exact(S, T, P, 0)
        assert S.ndim == 1, "Not able to densitystep an array ndim > 1"
        ds = ma.concatenate([ma.masked_all(1),
                np.sign(np.diff(P))*np.diff(rho0)])
        return ma.fix_invalid(ds)

    except ImportError:
        print("Package gsw is required and is not available.")
Example #45
0
def densitystep(S, T, P):
    """
    """
    assert S.shape == T.shape
    assert S.shape == P.shape
    try:
        import gsw
        rho0 = gsw.pot_rho_t_exact(S, T, P, 0)
        assert S.ndim == 1, "Not able to densitystep an array ndim > 1"
        ds = ma.concatenate(
            [ma.masked_all(1),
             np.sign(np.diff(P)) * np.diff(rho0)])
        return ma.fix_invalid(ds)

    except ImportError:
        print("Package gsw is required and is not available.")
 def test_avf_masked(self):
     presidents = ma.fix_invalid(self.presidents)
     # periodogram : True
     avfp = avf(presidents)
     assert_almost_equal(avfp[:21].round(2),
                         [241.74,185.75,159.63,116.92, 95.91, 60.36, 45.69,
                           34.97, 31.74, 10.91,  7.48,  1.32, 11.70,  7.71,
                           13.57,  4.16, -1.05, -9.76,-11.24,-15.67,-12.32])
     # preiodogram : False
     avfp = avf(presidents,0)
     pz = presidents.anom()
     mz = (~pz.mask).astype(int)
     assert_almost_equal(avfp[:21],
                         np.r_[[pz.var()],
                               [(pz[k:]*pz[:-k]).sum()/(mz[k:]*mz[:-k]).sum()
                                for k in range(1,21)]])
Example #47
0
def estimate_anomaly(features, params, method='produtorium'):
    """ Estimate probability from PDF defined by params

        The output is the natural logarithm of the estimated probability.

        params are the parameters that define the PDF for each feature
          in features. This function estimate the combined probability of
          each row in features as the produtorium between the probabilities
          of the different features on the same row.

        ATENTION!! I should think more about what would I like from this
          function. What should happens in case of a masked feature? And
          if all features for one measurement are masked? Right now it
          simply don't add for the estimate, so that all features masked
          would lead to an expectation of 100% it's good.
    """
    assert hasattr(params, 'keys')
    assert hasattr(features, 'keys')
    for k in params.keys():
        assert k in features.keys(), "features doesn't have: %s" % k

    prob = ma.masked_all(len(features[features.keys()[0]]))

    for t in params.keys():
        param = params[t]['param']
        valid = ~ma.fix_invalid(features[t]).mask

        tmp = exponweib.sf(np.asanyarray(features[t]),
                *param[:-2], loc=param[-2], scale=param[-1])
        # Arbitrary solution. No value can have a probability of 0.
        tmp[tmp == 0] = 1e-15
        p = ma.log(tmp)

        # Update prob if new value is valid and prob is masked
        ind = prob.mask & valid
        prob[ind] = p[ind]

        # If both are valid, operate as choosed method.
        ind = ~prob.mask & valid
        if method == 'produtorium':
            prob[ind] = prob[ind] + p[ind]
        elif method == 'min':
            prob[ind] = min(prob[ind], p[ind])
        else:
            return

    return prob
Example #48
0
 def test_friedmanchisq(self):
     # No missing values
     args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
             [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
             [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
     result = mstats.friedmanchisquare(*args)
     assert_almost_equal(result[0], 10.4737, 4)
     assert_almost_equal(result[1], 0.005317, 6)
     # Missing values
     x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
          [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
          [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
          [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
     x = ma.fix_invalid(x)
     result = mstats.friedmanchisquare(*x)
     assert_almost_equal(result[0], 2.0156, 4)
     assert_almost_equal(result[1], 0.5692, 4)
Example #49
0
 def test_friedmanchisq(self):
     # No missing values
     args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
             [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
             [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
     result = mstats.friedmanchisquare(*args)
     assert_almost_equal(result[0], 10.4737, 4)
     assert_almost_equal(result[1], 0.005317, 6)
     # Missing values
     x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
          [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
          [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
          [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
     x = ma.fix_invalid(x)
     result = mstats.friedmanchisquare(*x)
     assert_almost_equal(result[0], 2.0156, 4)
     assert_almost_equal(result[1], 0.5692, 4)
Example #50
0
def avf(x, periodogram=True):
    """Computes the auto-covariance function of the series `x`.
The computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, anomalies are then computed and missing
values filled with 0.
    The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with
mean 0 is defined as:
\hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}}
where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and
where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked.
If the optional parameter `periodogram` is True, the denominator of the previous
expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$.

Parameters
----------
    x : sequence
        Input data. If x is a TimeSeries object, it is filled first.
    mode : {True, False} optional
        Whether to return a periodogram or a standard estimate of the autocovariance.

Returns
-------
    avf : ma.array
        Autocovariance at lags [0,1,...,n,n-1,...,-1]

    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    n = len(x)
    #
    _avf = np.correlate(x,x,'full')
    denom = np.correlate(m,m,'full')
    if periodogram:
        denom += np.concatenate([np.arange(n-1,0,-1), np.arange(n)])
    _avf /= denom
    _avf = np.concatenate([_avf[n-1:],_avf[:n-1]])
    return ma.fix_invalid(_avf)
Example #51
0
    def test_friedmanchisq(self):
        # No missing values
        args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0],
                [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0],
                [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0])
        result = mstats.friedmanchisquare(*args)
        assert_almost_equal(result[0], 10.4737, 4)
        assert_almost_equal(result[1], 0.005317, 6)
        # Missing values
        x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1],
             [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3],
             [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan],
             [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]]
        x = ma.fix_invalid(x)
        result = mstats.friedmanchisquare(*x)
        assert_almost_equal(result[0], 2.0156, 4)
        assert_almost_equal(result[1], 0.5692, 4)

        # test for namedtuple attributes
        attributes = ('statistic', 'pvalue')
        check_named_results(result, attributes, ma=True)
def qqcalc(data, distrib=ssd.norm, alpha=.4, beta=.4):
    """
    Returns the theoretical quantiles from an empirical distribution.
    
    Parameters
    ----------
    data : array
        Input data
    distrib : {norm, function}, optional
        Theoretical distribution used to compute the expected quantiles.
        If None, use a normal distribution.
        Otherwise, ``distrib`` must have a :meth:`.ppf` method.
    alpha : {float}, optional
        Coefficient for the computation of plotting positions
    beta : {float}, optional
        Coefficient for the computation of plotting positions.

    """
    pp = mstats.plotting_positions(data, alpha=alpha, beta=beta)
    qq = ma.fix_invalid(distrib.ppf(pp))
    qq._mask = pp._mask
    return qq
Example #53
0
def woa_track_from_file(d, lat, lon, filename, varnames=None):
    """ Temporary solution: WOA for surface track
    """
    d = np.asanyarray(d)
    lat = np.asanyarray(lat)
    lon = np.asanyarray(lon)

    lon[lon < 0] += 360

    doy = np.array([int(dd.strftime('%j')) for dd in d])

    nc = netCDF4.Dataset(expanduser(filename), 'r')

    if varnames is None:
        varnames = {}
        for v in nc.variables.keys():
            if nc.variables[v].dimensions == \
                    (u'time', u'depth', u'lat', u'lon'):
                        varnames[v] = v

    output = {}
    for v in varnames:
        output[v] = []

    for d_n, lat_n, lon_n in zip(doy, lat, lon):
        # Get the nearest point. In the future interpolate.
        n_d = (np.abs(d_n - nc.variables['time'][:])).argmin()
        n_x = (np.abs(lon_n - nc.variables['lon'][:])).argmin()
        n_y = (np.abs(lat_n - nc.variables['lat'][:])).argmin()

        for v in varnames:
            output[v].append(nc.variables[varnames[v]][n_d, 0, n_y, n_x])

    for v in varnames:
        output[v] = ma.fix_invalid(output[v])

    return output
Example #54
0
def _acf(x, mode):
    """Computes the auto-correlation function of the time series x.
Note that the computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, the anomalies are then computed and the missing
values filled with 0.

:Parameters:
    `x` : TimeSeries
        Time series.
    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    xx = (x*x)
    n = len(x)
    #
    _avf = np.correlate(x,x,'full')[n-1:]
    if mode:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/np.sum(m[k:]*xx[:-k])
                            for k in range(1,n)),
                            dtype=float)
    else:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\
                            np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum())
                            for k in range(1,n)),
                           dtype=float)
    poslags = _avf[1:]/dnm_
    return ma.fix_invalid(np.concatenate([np.array([1.]),
                                          poslags,
                                          poslags[::-1]]))
Example #55
0
def extrapolate_data(dataset, basemap, gridsize_x, gridsize_y,
                     maskoceans=False):
    """
    Extrapolate `dataset` on a grid of size `(gridsize_x, gridsize_y)`
    based on `basemap`.

    A regular grid of the user-defined size is created from the basemap.
    The dataset coordinates are then Delaunay triangulated, and the corresponding
    data extrapolated on the regular grid using the nearest-neighbor method

    Parameters
    ----------
    dataset : ndarray
        A structured ndarray, w/ fields ['lon', 'lat', 'data']
    basemap : Basemap
        The projection basemap
    gridsize_x : int
        Number of cells in the x direction ('lon')
    gridsize_y : int
        Number of cells in the x direction ('lat')
    maskoceans : 
    """
    # Get the grid
    (glon, glat, gx, gy) = basemap.makegrid(gridsize_x, gridsize_y,
                                            returnxy=True)
    # Transforms the lon/lat of the dataset in basemap units
    (llon, llat) = basemap(dataset['lon'], dataset['lat'])
    # Triangulate the dataset
    triangul = delaunay.Triangulation(llon, llat)
    # Define an extrapolator (using natural neighbors)...
    # ... and extrapolate the data along the grid...
    extrapolator = triangul.nn_extrapolator(dataset['data'])
    extrapolated = ma.fix_invalid(extrapolator(gx, gy))
    if maskoceans:
        extrapolated = mtb.maskoceans(glon, glat, extrapolated)
    return (extrapolated, gx, gy)
Example #56
0
def hdquantiles(data, prob=list([0.25, 0.5, 0.75]), axis=None, var=False):
    """
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    """

    def _hd_1D(data, prob, var):
        "Computes the HD quantiles for a 1D array. Returns nan for invalid data."
        xsorted = np.squeeze(np.sort(data.compressed().view(ndarray)))
        # Don't use length here, in case we have a numpy scalar
        n = xsorted.size

        hd = np.empty((2, len(prob)), float_)
        if n < 2:
            hd.flat = np.nan
            if var:
                return hd
            return hd[0]

        v = np.arange(n + 1) / float(n)
        betacdf = beta.cdf
        for (i, p) in enumerate(prob):
            _w = betacdf(v, (n + 1) * p, (n + 1) * (1 - p))
            w = _w[1:] - _w[:-1]
            hd_mean = np.dot(w, xsorted)
            hd[0, i] = hd_mean
            #
            hd[1, i] = np.dot(w, (xsorted - hd_mean) ** 2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]
        if var:
            hd[1, prob == 0] = hd[1, prob == 1] = np.nan
            return hd
        return hd[0]

    # Initialization & checks
    data = ma.array(data, copy=False, dtype=float_)
    p = np.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None) or (data.ndim == 1):
        result = _hd_1D(data, p, var)
    else:
        if data.ndim > 2:
            raise ValueError("Array 'data' must be at most two dimensional, " "but got data.ndim = %d" % data.ndim)
        result = ma.apply_along_axis(_hd_1D, axis, data, p, var)

    return ma.fix_invalid(result, copy=False)
Example #57
0
def r1rho_DPL94(r1rho_prime=None, phi_ex=None, kex=None, theta=None, R1=0.0, spin_lock_fields2=None, back_calc=None):
    """Calculate the R1rho values for the DPL94 model.

    See the module docstring for details.


    @keyword r1rho_prime:       The R1rho_prime parameter value (R1rho with no exchange).
    @type r1rho_prime:          numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword phi_ex:            The phi_ex parameter value (pA * pB * delta_omega^2).
    @type phi_ex:               numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword kex:               The kex parameter value (the exchange rate in rad/s).
    @type kex:                  float
    @keyword theta:             The rotating frame tilt angles for each dispersion point.
    @type theta:                numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword R1:                The R1 relaxation rate.
    @type R1:                   numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword spin_lock_fields2: The R1rho spin-lock field strengths squared (in rad^2.s^-2).
    @type spin_lock_fields2:    numpy float array of rank [NE][NS][NM][NO][ND]
    @keyword back_calc:         The array for holding the back calculated R1rho values.  Each element corresponds to the combination of theta and spin lock field.
    @type back_calc:            numpy float array of rank [NE][NS][NM][NO][ND]
    """

    # Flag to tell if values should be replaced if numer is zero.
    t_numer_zero = False
    t_denom_zero = False

    # The non-Rex factors.
    sin_theta2 = sin(theta)**2
    R1_R2 = R1 * cos(theta)**2 + r1rho_prime * sin_theta2

    # The numerator.
    numer = sin_theta2 * phi_ex * kex

    # Catch zeros (to avoid pointless mathematical operations).
    # This will result in no exchange, returning flat lines.
    if min(numer) == 0.0:
        t_numer_zero = True
        mask_numer_zero = masked_where(numer == 0.0, numer)

    # Denominator.
    denom = kex**2 + spin_lock_fields2

    # Catch math domain error of dividing with 0.
    # This is when denom =0.
    mask_denom_zero = denom == 0.0
    if any(mask_denom_zero):
        t_denom_zero = True
        denom[mask_denom_zero] = 1.0

    # R1rho calculation.
    back_calc[:] = R1_R2 + numer / denom

    # Replace data in array.
    # If numer is zero.
    if t_numer_zero:
        back_calc[mask_numer_zero.mask] = R1_R2[mask_numer_zero.mask]

    # If denom is zero.
    if t_denom_zero:
        back_calc[mask_denom_zero] = 1e100

    # Catch errors, taking a sum over array is the fastest way to check for
    # +/- inf (infinity) and nan (not a number).
    if not isfinite(sum(back_calc)):
        # Replaces nan, inf, etc. with fill value.
        fix_invalid(back_calc, copy=False, fill_value=1e100)
Example #58
0
def worker(job_json):
    """
    For every incoming message, this worker function is called. Be extremely
    careful not to do anything CPU-intensive here, or you will see blocking.
    Sockets are async under gevent, so those are fair game.
    """
    # Receive raw market JSON strings.
    market_json = zlib.decompress(job_json)
    # Un-serialize the JSON data to a Python dict.
    market_data = simplejson.loads(market_json)
    # Save to your choice of DB here.
    global dbConn
    query = PySQLPool.getNewQuery(dbConn)
    if market_data['resultType'] == 'orders':
        rows = market_data['rowsets']
        try:
            for row in rows:
                if len(row['rows']) == 0:
                    pass
                genTime = dateutil.parser.parse(row['generatedAt'])
                genTime = int(time.mktime(genTime.timetuple()))
                typeID = row['typeID']
                regionID = row['regionID']
                buyCount = []
                sellCount = []
                buyPrice = []
                sellPrice = []
                tempMask = []
                buyAvg = 0
                buyMean = 0
                buyTotal = 0
                sellAvg = 0
                sellMean = 0
                sellTotal = 0
                buy = 0
                sell = 0
                set = 0
                stuff = row['rows']
                search = "SELECT * FROM prices WHERE uniquek = '%s' AND dateTime > '%s'" % (str(regionID) + str(typeID), genTime)
                query.Query(search)
                if (len(query.record) == 1) or (genTime > int(time.mktime(time.gmtime()))):
                     pass
                for data in stuff:
                    if data[6] == True:
                        buyPrice.append(data[0])
                        buyCount.append(data[4] - data[1])
                    elif data[6] == False:
                        sellPrice.append(data[0])
                        sellCount.append(data[4] - data[1])
                    else:
                        pass

                if len(buyPrice) > 1:
                    top = stats.scoreatpercentile(buyPrice, 90)
                    bottom = stats.scoreatpercentile(buyPrice, 10)
                    buyMasked = ma.masked_outside(buyPrice, bottom, top)
                    tempMask = buyMasked.mask
                    buyCountMasked = ma.array(buyCount, mask=tempMask, fill_value = 0)
                    ma.fix_invalid(buyMasked, mask=0)
                    ma.fix_invalid(buyCountMasked, mask=0)
                    buyAvg = ma.average(buyMasked, 0, buyCountMasked)
                    buyMean = ma.mean(buyMasked)
                    buyTotal = ma.sum(buyCountMasked)
                    if buyTotal == 0:
                        buyAvg = 0
                        buyMean = 0
                    set = 1
                    if len(buyPrice) < 4:
                        buyAvg = ma.average(buyPrice)
                        buyMean = ma.mean(buyPrice)
                    buyPrice.sort()
                    buy = buyPrice.pop()
                        
                if len(sellPrice) > 3:
                    top = stats.scoreatpercentile(sellPrice, 90)
                    bottom = stats.scoreatpercentile(sellPrice, 1)
                    sellMasked = ma.masked_outside(sellPrice, bottom, top)
                    tempMask = sellMasked.mask
                    sellCountMasked = ma.array(sellCount, mask=tempMask, fill_value = 0)
                    ma.fix_invalid(sellMasked, mask=0)
                    ma.fix_invalid(sellCountMasked, mask=0)
                    sellAvg = ma.average(sellMasked, 0, sellCountMasked)
                    sellMean = ma.mean(sellMasked)
                    sellTotal = ma.sum(sellCountMasked)
                    if sellTotal == 0:
                        sellAvg = 0
                        sellMean = 0
                    set = 1
                    if len(sellPrice) < 4:
                        sellMean = ma.mean(sellPrice)
                        sellTotal = ma.sum(sellPrice)
                    sellPrice.sort()
                    sellPrice.reverse()
                    sell = sellPrice.pop()

                data = "REPLACE INTO prices SET uniquek = '%s', region = '%i', itemid = '%i', buymean = '%.2f', buyavg = '%.2f', sellmean = '%.2f', sellavg = '%.2f', buycount = '%i', sellcount = '%i', buy = '%.2f', sell = '%.2f', dateTime = '%i'" % (str(regionID) + str(typeID), regionID, typeID, np.nan_to_num(buyMean), np.nan_to_num(buyAvg), np.nan_to_num(sellMean), np.nan_to_num(sellAvg), np.nan_to_num(buyTotal), np.nan_to_num(sellTotal), buy, sell, genTime)
                query.Query(data)
        except:
            pass
Example #59
0
def target_fn_setup(sim_index=None, scaling_matrix=None, verbosity=0):
    """Initialise the target function for optimisation or direct calculation.

    @keyword sim_index:         The index of the simulation to optimise.  This should be None if normal optimisation is desired.
    @type sim_index:            None or int
    @keyword scaling_matrix:    The diagonal and square scaling matrix.
    @type scaling_matrix:       numpy rank-2, float64 array or None
    @keyword verbosity:         A flag specifying the amount of information to print.  The higher the value, the greater the verbosity.
    @type verbosity:            int
    """

    # Test if the N-state model has been set up.
    if not hasattr(cdp, 'model'):
        raise RelaxNoModelError('N-state')

    # '2-domain' model setup tests.
    if cdp.model == '2-domain':
        # The number of states.
        if not hasattr(cdp, 'N'):
            raise RelaxError("The number of states has not been set.")

        # The reference domain.
        if not hasattr(cdp, 'ref_domain'):
            raise RelaxError("The reference domain has not been set.")

    # Update the model parameters if necessary.
    update_model()

    # Create the initial parameter vector.
    param_vector = assemble_param_vector(sim_index=sim_index)

    # Replace all NaNs with 0.0.
    fix_invalid(param_vector, copy=False, fill_value=0.0)

    # Determine if alignment tensors or RDCs are to be used.
    data_types = base_data_types()

    # The probabilities.
    probs = None
    if hasattr(cdp, 'probs') and len(cdp.probs) and cdp.probs[0] != None:
        probs = cdp.probs

    # Diagonal scaling.
    if len(param_vector) and scaling_matrix is not None:
        param_vector = dot(inv(scaling_matrix), param_vector)

    # Get the data structures for optimisation using the tensors as base data sets.
    full_tensors, red_tensor_elem, red_tensor_err, full_in_ref_frame = None, None, None, None
    if 'tensor' in data_types:
        full_tensors, red_tensor_elem, red_tensor_err, full_in_ref_frame = minimise_setup_tensors(sim_index=sim_index)

    # Get the data structures for optimisation using PCSs as base data sets.
    pcs, pcs_err, pcs_weight, temp, frq, pcs_pseudo_flags = None, None, None, None, None, None
    if 'pcs' in data_types:
        pcs, pcs_err, pcs_weight, temp, frq, pcs_pseudo_flags = return_pcs_data(sim_index=sim_index, verbosity=verbosity)

    # Get the data structures for optimisation using RDCs as base data sets.
    rdcs, rdc_err, rdc_weight, rdc_vector, rdc_dj, absolute_rdc, T_flags, j_couplings, rdc_pseudo_flags = None, None, None, None, None, None, None, None, None
    if 'rdc' in data_types:
        # The data.
        rdcs, rdc_err, rdc_weight, rdc_vector, rdc_dj, absolute_rdc, T_flags, j_couplings, rdc_pseudo_flags = return_rdc_data(sim_index=sim_index, verbosity=verbosity)

    # Get the fixed tensors.
    fixed_tensors = None
    if 'rdc' in data_types or 'pcs' in data_types:
        full_tensors = minimise_setup_fixed_tensors()

        # The flag list.
        fixed_tensors = []
        for i in range(len(cdp.align_tensors)):
            # Skip non-optimised data.
            if not opt_uses_align_data(cdp.align_tensors[i].name):
                continue

            if cdp.align_tensors[i].fixed:
                fixed_tensors.append(True)
            else:
                fixed_tensors.append(False)

    # Get the atomic_positions.
    atomic_pos, paramag_centre, centre_fixed = None, None, True
    if 'pcs' in data_types or 'pre' in data_types:
        atomic_pos, paramag_centre = minimise_setup_atomic_pos(sim_index=sim_index)

        # Optimisation of the centre.
        if hasattr(cdp, 'paramag_centre_fixed'):
            centre_fixed = cdp.paramag_centre_fixed

    # Set up the class instance containing the target function.
    model = N_state_opt(model=cdp.model, N=cdp.N, init_params=param_vector, probs=probs, full_tensors=full_tensors, red_data=red_tensor_elem, red_errors=red_tensor_err, full_in_ref_frame=full_in_ref_frame, fixed_tensors=fixed_tensors, pcs=pcs, rdcs=rdcs, pcs_errors=pcs_err, rdc_errors=rdc_err, T_flags=T_flags, j_couplings=j_couplings, rdc_pseudo_flags=rdc_pseudo_flags, pcs_pseudo_flags=pcs_pseudo_flags, pcs_weights=pcs_weight, rdc_weights=rdc_weight, rdc_vect=rdc_vector, temp=temp, frq=frq, dip_const=rdc_dj, absolute_rdc=absolute_rdc, atomic_pos=atomic_pos, paramag_centre=paramag_centre, scaling_matrix=scaling_matrix, centre_fixed=centre_fixed)

    # Return the data.
    return model, param_vector, data_types