Example #1
0
def plotting_positions(data, alpha=0.4, beta=0.4):
    """Returns the plotting positions (or empirical percentile points) for the
    data.
    Plotting positions are defined as (i-alpha)/(n-alpha-beta), where:
        - i is the rank order statistics
        - n is the number of unmasked values along the given axis
        - alpha and beta are two parameters.
    
    Typical values for alpha and beta are:     
        - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
        - (.5,.5)  : *p(k) = (k-1/2.)/n* : piecewise linear function (R, type 5)
        - (0,0)    : *p(k) = k/(n+1)* : Weibull (R type 6)
        - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
          That's R default (R type 7)
        - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
          The resulting quantile estimates are approximately median-unbiased
          regardless of the distribution of x. (R type 8)
        - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
          The resulting quantile estimates are approximately unbiased
          if x is normally distributed (R type 9)
        - (.4,.4)  : approximately quantile unbiased (Cunnane)
        - (.35,.35): APL, used with PWM
    """
    data = masked_array(data, copy=False).reshape(1,-1)
    n = data.count()
    plpos = numpy.empty(data.size, dtype=float_)
    plpos[n:] = 0
    plpos[data.argsort()[:n]] = (numpy.arange(1,n+1) - alpha)/(n+1-alpha-beta)
    return masked_array(plpos, mask=data._mask)
Example #2
0
def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
    """Computes quantile estimates with the Harrell-Davis method, where the estimates
    are calculated as a weighted linear combination of order statistics.
    If var=True, the variance of the estimate is also returned. 
    Depending on var, returns a (p,) array of quantiles or a (2,p) array of quantiles
    and variances.
    
:Inputs:
    data: ndarray
        Data array.    
    prob: Sequence
        List of quantiles to compute.
    axis : integer *[None]*
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean *[False]*
        Whether to return the variance of the estimate.
        
:Note:
    The function is restricted to 2D arrays.
    """
    def _hd_1D(data,prob,var):
        "Computes the HD quantiles for a 1D array."
        xsorted = numpy.squeeze(numpy.sort(data.compressed().view(ndarray)))
        n = len(xsorted)
        #.........
        hd = empty((2,len(prob)), float_)
        if n < 2:
            hd.flat = numpy.nan
            if var:
                return hd
            return hd[0]
        #......... 
        v = arange(n+1) / float(n)
        betacdf = beta.cdf
        for (i,p) in enumerate(prob):    
            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            hd_mean = dot(w, xsorted)
            hd[0,i] = hd_mean
            #
            hd[1,i] = dot(w, (xsorted-hd_mean)**2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]  
        if var:  
            hd[1, prob == 0] = hd[1, prob == 1] = numpy.nan
            return hd
        return hd[0]
    # Initialization & checks ---------
    data = masked_array(data, copy=False, dtype=float_)
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _hd_1D(data, p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_hd_1D, axis, data, p, var)
    #
    return masked_array(result, mask=numpy.isnan(result))
Example #3
0
 def _quantiles1D(data,m,p):
     x = numpy.sort(data.compressed())
     n = len(x)
     if n == 0:
         return masked_array(numpy.empty(len(p), dtype=float_), mask=True)
     elif n == 1:
         return masked_array(numpy.resize(x, p.shape), mask=nomask)
     aleph = (n*p + m)
     k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
     gamma = (aleph-k).clip(0,1)
     return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]
Example #4
0
def winsorize(data, alpha=0.2):
    """Returns a Winsorized version of the input array: the (alpha/2.) lowest
    values are set to the (alpha/2.)th percentile, and the (alpha/2.) highest
    values are set to the (1-alpha/2.)th percentile 
    Masked values are skipped. The input array is first flattened.
    """
    data = masked_array(data, copy=False).ravel()
    idxsort = data.argsort()
    (nsize, ncounts) = (data.size, data.count())
    ntrim = int(alpha * ncounts)
    (xmin,xmax) = data[idxsort[[ntrim, ncounts-nsize-ntrim-1]]]
    return masked_array(numpy.clip(data, xmin, xmax), mask=data._mask) 
Example #5
0
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.
    
:Inputs:
    data: ndarray
        Data array.    
    prob: Sequence
        List of quantiles to compute.
    axis : integer *[None]*
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean *[False]*
        Whether to return the variance of the estimate.
    stderr : boolean *[False]*
        Whether to return the standard error of the estimate.
        
:Note:
    The function is restricted to 2D arrays.
    """  
    def _hdsd_1D(data,prob):
        "Computes the std error for 1D arrays."
        xsorted = numpy.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = empty(len(prob), float_)
        if n < 2:
            hdsd.flat = numpy.nan
        #......... 
        vv = arange(n) / float(n-1)
        betacdf = beta.cdf
        #
        for (i,p) in enumerate(prob):    
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) 
            w = _w[1:] - _w[:-1]
            mx_ = numpy.fromiter([dot(w,xsorted[r_[range(0,k),
                                                   range(k+1,n)].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = numpy.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * sqrt(numpy.diag(mx_var).diagonal() / float(n))
        return hdsd
    # Initialization & checks ---------
    data = masked_array(data, copy=False, dtype=float_)
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _hdsd_1D(data.compressed(), p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return masked_array(result, mask=numpy.isnan(result)).ravel()
Example #6
0
def trim_both(data, proportiontocut=0.2, axis=None):
    """Trims the data by masking the int(trim*n) smallest and int(trim*n) largest 
    values of data along the given axis, where n is the number of unmasked values.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    trim : float *[0.2]*
        Percentage of trimming. If n is the number of unmasked values before trimming, 
        the number of values after trimming is (1-2*trim)*n.
    axis : integer *[None]*
        Axis along which to perform the trimming.
    """
    #...................
    def _trim_1D(data, trim):
        "Private function: return a trimmed 1D array."
        nsize = data.size
        ncounts = data.count()
        ntrim = int(trim * ncounts)
        idxsort = data.argsort()
        data[idxsort[:ntrim]] = masked
        data[idxsort[ncounts-nsize-ntrim:]] = masked
        return data
    #...................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    if (axis is None): 
        return _trim_1D(data.ravel(), proportiontocut)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trim_1D, axis, data, proportiontocut)
Example #7
0
def median_cihs(data, alpha=0.05, axis=None):
    """Computes the alpha-level confidence interval for the median of the data,
    following the Hettmasperger-Sheather method.
    
:Inputs:
    data : sequence
        Input data. Masked values are discarded. The input should be 1D only
    alpha : float *[0.05]*
        Confidence degree.
    """
    def _cihs_1D(data, alpha):
        data = numpy.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1-alpha)
        k = int(binom._ppf(alpha/2., n, 0.5))
        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        if gk < 1-alpha:
            k -= 1
            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
        I = (gk - 1 + alpha)/(gk - gkk)
        lambd = (n-k) * I / float(k + (n-2*k)*I)    
        lims = (lambd*data[k] + (1-lambd)*data[k-1],
                lambd*data[n-k-1] + (1-lambd)*data[n-k])
        return lims
    data = masked_array(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _cihs_1D(data.compressed(), p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_cihs_1D, axis, data, alpha)
    #
    return result
Example #8
0
def trimmed_stde(data, proportiontocut=0.2, axis=None):
    """Returns the standard error of the trimmed mean for the input data, 
    along the given axis. Trimming is performed on both ends of the distribution.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    proportiontocut : float *[0.2]*
        Proportion of the data to cut from each side of the data . 
        As a result, (2*proportiontocut*n) values are actually trimmed.
    axis : integer *[None]*
        Axis along which to perform the trimming.  
    """
    #........................
    def _trimmed_stde_1D(data, trim=0.2):
        "Returns the standard error of the trimmed mean for a 1D input data."
        winsorized = winsorize(data)
        nsize = winsorized.count()
        winstd = winsorized.stdu()
        return winstd / ((1-2*trim) * numpy.sqrt(nsize))
    #........................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    if (axis is None): 
        return _trimmed_stde_1D(data.ravel(), proportiontocut)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trimmed_stde_1D, axis, data, proportiontocut)
Example #9
0
def rsh(data, points=None):
    """Evalutates Rosenblatt's shifted histogram estimators for each
    point of 'points' on the dataset 'data'.
    
:Inputs:
    data : sequence
        Input data. Masked values are discarded.
    points : 
        Sequence of points where to evaluate Rosenblatt shifted histogram. 
        If None, use the data.        
    """
    data = masked_array(data, copy=False)
    if points is None:
        points = data
    else:
        points = numpy.array(points, copy=False, ndmin=1)
    if data.ndim != 1:
        raise AttributeError("The input array should be 1D only !")
    n = data.count()
    h = 1.2 * idealfourths(data) / n**(1./5)
    nhi = (data[:,None] <= points[None,:] + h).sum(0)
    nlo = (data[:,None] < points[None,:] - h).sum(0)
    return (nhi-nlo) / (2.*n*h)

    
Example #10
0
def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None):
    """Computes empirical quantiles for a *1xN* data array.
Samples quantile are defined by:
*Q(p) = (1-g).x[i] +g.x[i+1]*
where *x[j]* is the jth order statistic, 
with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.

Typical values of (alpha,beta) are:
    
    - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
    - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
    - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
    - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
      That's R default (R type 7)
    - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
      The resulting quantile estimates are approximately median-unbiased
      regardless of the distribution of x. (R type 8)
    - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
      The resulting quantile estimates are approximately unbiased
      if x is normally distributed (R type 9)
    - (.4,.4)  : approximately quantile unbiased (Cunnane)
    - (.35,.35): APL, used with PWM

:Parameters:
    x : Sequence
        Input data, as a sequence or array of dimension at most 2.
    prob : Sequence *[(0.25, 0.5, 0.75)]*
        List of quantiles to compute.
    alpha : Float (*[0.4]*)
        Plotting positions parameter.
    beta : Float (*[0.4]*)
        Plotting positions parameter.
    axis : Integer *[None]*
        Axis along which to compute quantiles. If *None*, uses the whole 
        (flattened/compressed) dataset.
    """
    def _quantiles1D(data,m,p):
        x = numpy.sort(data.compressed())
        n = len(x)
        if n == 0:
            return masked_array(numpy.empty(len(p), dtype=float_), mask=True)
        elif n == 1:
            return masked_array(numpy.resize(x, p.shape), mask=nomask)
        aleph = (n*p + m)
        k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
        gamma = (aleph-k).clip(0,1)
        return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]

    # Initialization & checks ---------
    data = masked_array(data, copy=False)
    p = narray(prob, copy=False, ndmin=1)
    m = alphap + p*(1.-alphap-betap)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        return _quantiles1D(data, m, p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_quantiles1D, axis, data, m, p)
Example #11
0
def mmedian(data, axis=None):
    """Returns the median of data along the given axis. Missing data are discarded."""
    def _median1D(data):
        x = numpy.sort(data.compressed())
        if x.size == 0:
            return masked
        return numpy.median(x)
    data = masked_array(data, subok=True, copy=True)
    if axis is None:
        return _median1D(data)
    else:
        return apply_along_axis(_median1D, axis, data)
Example #12
0
def stde_median(data, axis=None):
    """Returns the McKean-Schrader estimate of the standard error of the sample
    median along the given axis.
    """
    def _stdemed_1D(data):
        sorted = numpy.sort(data.compressed())
        n = len(sorted)
        z = 2.5758293035489004
        k = int(round((n+1)/2. - z * sqrt(n/4.),0))
        return ((sorted[n-k] - sorted[k-1])/(2.*z))
    #
    data = masked_array(data, copy=False, subok=True)
    if (axis is None):
        return _stdemed_1D(data)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_stdemed_1D, axis, data)
Example #13
0
def idealfourths(data, axis=None):
    """Returns an estimate of the interquartile range of the data along the given
    axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = numpy.sort(data.compressed())
        n = len(x)
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j] + h*x[j+1]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return qup - qlo
    data = masked_array(data, copy=False)
    if (axis is None): 
        return _idf(data)
    else:
        return apply_along_axis(_idf, axis, data) 
Example #14
0
def rank_data(data, axis=None, use_missing=False):
    """Returns the rank (also known as order statistics) of each data point 
    along the given axis.
    If some values are tied, their rank is averaged.
    If some values are masked, their rank is set to 0 if use_missing is False, or
    set to the average rank of the unmasked values if use_missing is True.
    
:Inputs:
    data : sequence
        Input data. The data is transformed to a masked array
    axis : integer *[None]*
        Axis along which to perform the ranking. If None, the array is first
        flattened. An exception is raised if the axis is specified for arrays
        with a dimension larger than 2
    use_missing : boolean *[False]*
        Flag indicating whether the masked values have a rank of 0 (False) or
        equal to the average rank of the unmasked values (True)    
    """
    #
    def _rank1d(data, use_missing=False):
        n = data.count()
        rk = numpy.empty(data.size, dtype=float_)
        idx = data.argsort()
        rk[idx[:n]] = numpy.arange(1,n+1)
        #
        if use_missing:
            rk[idx[n:]] = (n+1)/2.
        else:
            rk[idx[n:]] = 0
        #
        repeats = find_repeats(data)
        for r in repeats[0]:
            condition = (data==r).filled(False)
            rk[condition] = rk[condition].mean()
        return rk
    #
    data = masked_array(data, copy=False)
    if axis is None:
        if data.ndim > 1:
            return _rank1d(data.ravel(), use_missing).reshape(data.shape)
        else:
            return _rank1d(data, use_missing)
    else:
        return apply_along_axis(_rank1d, axis, data, use_missing) 
Example #15
0
def trim_tail(data, proportiontocut=0.2, tail='left', axis=None):
    """Trims the data by masking int(trim*n) values from ONE tail of the data
    along the given axis, where n is the number of unmasked values.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    trim : float *[0.2]*
        Percentage of trimming. If n is the number of unmasked values before trimming, 
        the number of values after trimming is (1-2*trim)*n.
    axis : integer *[None]*
        Axis along which to perform the trimming.
    """
    #...................
    def _trim_1D(data, trim, left):
        "Private function: return a trimmed 1D array."
        nsize = data.size
        ncounts = data.count()
        ntrim = int(trim * ncounts)
        idxsort = data.argsort()
        if left:
            data[idxsort[:ntrim]] = masked
        else:
            data[idxsort[ncounts-nsize-ntrim:]] = masked
        return data
    #...................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    #
    if not isinstance(tail, str):
        raise TypeError("The tail argument should be in ('left','right')")
    tail = tail.lower()[0]
    if tail == 'l':
        left = True
    elif tail == 'r':
        left=False
    else:
        raise ValueError("The tail argument should be in ('left','right')")
    #
    if (axis is None): 
        return _trim_1D(data.ravel(), proportiontocut, left)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trim_1D, axis, data, proportiontocut, left)
Example #16
0
def trimmed_mean_ci(data, proportiontocut=0.2, alpha=0.05, axis=None):
    """Returns the selected confidence interval of the trimmed mean along the
    given axis.
    
:Inputs:
    data : sequence
        Input data. The data is transformed to a masked array
    proportiontocut : float *[0.2]*
        Proportion of the data to cut from each side of the data . 
        As a result, (2*proportiontocut*n) values are actually trimmed.
    alpha : float *[0.05]*
        Confidence level of the intervals
    axis : integer *[None]*
        Axis along which to cut.
    """
    data = masked_array(data, copy=False)
    trimmed = trim_both(data, proportiontocut=proportiontocut, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = trimmed_stde(data, proportiontocut=proportiontocut, axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return numpy.array((tmean - tppf*tstde, tmean+tppf*tstde))
Example #17
0
def mjci(data, prob=[0.25,0.5,0.75], axis=None):
    """Returns the Maritz-Jarrett estimators of the standard error of selected 
    experimental quantiles of the data.
    
:Input:
    data : sequence
        Input data.
    prob : sequence *[0.25,0.5,0.75]*
        Sequence of quantiles whose standard error must be estimated.
    axis : integer *[None]*
        Axis along which to compute the standard error.
    """
    def _mjci_1D(data, p):
        data = data.compressed()
        sorted = numpy.sort(data)
        n = data.size
        prob = (numpy.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf
        #
        mj = empty(len(prob), float_)
        x = arange(1,n+1, dtype=float_) / n
        y = x - 1./n
        for (i,m) in enumerate(prob):
            (m1,m2) = (m-1, n-m)
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
            C1 = numpy.dot(W,sorted)
            C2 = numpy.dot(W,sorted**2)
            mj[i] = sqrt(C2 - C1**2)
        return mj
    #
    data = masked_array(data, copy=False)
    assert data.ndim <= 2, "Array should be 2D at most !"
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        return _mjci_1D(data, p)
    else:
        return apply_along_axis(_mjci_1D, axis, data, p) 
 def test_addfield(self):
     "Tests addfield"
     [d, m, mrec, dlist, dates, ts, mts] = self.data
     mts = addfield(mts, masked_array(d+10, mask=m[::-1]))
     assert_equal(mts.f2, d+10)
     assert_equal(mts.f2._mask, m[::-1])            
Example #19
0
def fromtextfile(
    fname, delimitor=None, commentchar="#", missingchar="", dates_column=None, varnames=None, vartypes=None, dates=None
):
    """Creates a multitimeseries from data stored in the file `filename`.

:Parameters:
    - `filename` : file name/handle
      Handle of an opened file.  
    - `delimitor` : Character *None*
      Alphanumeric character used to separate columns in the file.
      If None, any (group of) white spacestring(s) will be used.
    - `commentchar` : String *['#']*
      Alphanumeric character used to mark the start of a comment.
    - `missingchar` : String *['']*
      String indicating missing data, and used to create the masks.
    - `datescol` : Integer *[None]*
      Position of the columns storing dates. If None, a position will be 
      estimated from the variable names.
    - `varnames` : Sequence *[None]*
      Sequence of the variable names. If None, a list will be created from
      the first non empty line of the file.
    - `vartypes` : Sequence *[None]*
      Sequence of the variables dtypes. If None, the sequence will be estimated
      from the first non-commented line.  
    
    
    Ultra simple: the varnames are in the header, one line"""
    # Try to open the file ......................
    f = openfile(fname)
    # Get the first non-empty line as the varnames
    while True:
        line = f.readline()
        firstline = line[: line.find(commentchar)].strip()
        _varnames = firstline.split(delimitor)
        if len(_varnames) > 1:
            break
    if varnames is None:
        varnames = _varnames
    # Get the data ..............................
    _variables = MA.asarray([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1])
    (nvars, nfields) = _variables.shape
    # Check if we need to get the dates..........
    if dates_column is None:
        dates_column = [i for (i, n) in enumerate(list(varnames)) if n.lower() in ["_dates", "dates"]]
    elif isinstance(dates_column, (int, float)):
        if dates_column > nfields:
            raise ValueError, "Invalid column number: %i > %i" % (dates_column, nfields)
        dates_column = [dates_column]
    if len(dates_column) > 0:
        cols = range(nfields)
        [cols.remove(i) for i in dates_column]
        newdates = date_array(_variables[:, dates_column[-1]])
        _variables = _variables[:, cols]
        varnames = [varnames[i] for i in cols]
        if vartypes is not None:
            vartypes = [vartypes[i] for i in cols]
        nfields -= len(dates_column)
    else:
        newdates = None
    # Try to guess the dtype ....................
    if vartypes is None:
        vartypes = _guessvartypes(_variables[0])
    else:
        vartypes = [numeric.dtype(v) for v in vartypes]
        if len(vartypes) != nfields:
            msg = "Attempting to %i dtypes for %i fields!"
            msg += " Reverting to default."
            warnings.warn(msg % (len(vartypes), nfields))
            vartypes = _guessvartypes(_variables[0])
    # Construct the descriptor ..................
    mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)]
    # Get the data and the mask .................
    # We just need a list of masked_arrays. It's easier to create it like that:
    _mask = _variables.T == missingchar
    _datalist = [masked_array(a, mask=m, dtype=t) for (a, m, t) in zip(_variables.T, _mask, vartypes)]
    #
    newdates = __getdates(dates=dates, newdates=newdates, length=nvars, freq=None, start_date=None)
    return MultiTimeSeries(_datalist, dates=newdates, dtype=mdescr)