Ejemplo n.º 1
0
def loessMA(m, windowSize, axis=0, approxMasked=True, verbose=False, callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[axis]+0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % ("%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(Numeric.transpose(MA.getmaskarray(m), [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(MA.transpose(m, [axis] + range(0,axis) + range(axis+1,len(m.shape))), (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1*(mask-1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(Numeric.add.reduce(maskInv,0) > 1, range(data.shape[1])):    # run loess if the profile contains more than 2 values
        try:
            data[:,ii] = MA.array(statc.loess(zip(MA.compress(maskInv[:,ii], xall).tolist(), MA.compress(maskInv[:,ii], data[:,ii]).tolist()), xallList, windowSize))[:,1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] + range(0,axis) + range(axis+1,len(m.shape)))
Ejemplo n.º 2
0
def orng2ma(aExampleTable):
    """Converts orange.ExampleTable to MA.array based on the attribute values.
    rows correspond to examples, columns correspond to attributes, class values are left out
    missing values and attributes of types other than orange.FloatVariable are masked
    """
    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")
    ma = MA.array(vals, Numeric.PyObject)
    if aExampleTable.domain.classVar != None:
        ma = ma[:,:-1]
    mask = MA.where(MA.equal(ma, "?"), 1, 0)
    for varIdx, var in enumerate(aExampleTable.domain.attributes):
        if type(var) != orange.FloatVariable:
            mask[:,varIdx] = Numeric.ones(len(aExampleTable))
    return MA.array(MA.array(ma, Numeric.PyObject, mask=mask).filled(1e20), Numeric.Float, mask=mask)
Ejemplo n.º 3
0
def loessMA(m,
            windowSize,
            axis=0,
            approxMasked=True,
            verbose=False,
            callback=None):
    """Returns a new array with values at the given axis smoothed by loess;
    if approxMasked==True: the masked values are approximated by loess;
    assumes equidistant spacing of points on the given axis.
    """
    assert 0 < windowSize <= m.shape[
        axis] + 0.1, "0 < windowSize[%s] <= 1 OR windowSize in range(1.1,m.shape[axis]+1) expected, got %f" % (
            "%", windowSize)
    m = MA.asarray(m)
    if m.dtype.char <> Numeric.Float:
        m = m.astype(Numeric.Float)
    shp_other = list(m.shape)
    shp_other.pop(axis)
    # get a transposed and reshaped mask and data from m; if m.mask() == None, construct a new array of zeros
    mask = Numeric.reshape(
        Numeric.transpose(MA.getmaskarray(m), [axis] + range(0, axis) +
                          range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    data = MA.reshape(
        MA.transpose(m,
                     [axis] + range(0, axis) + range(axis + 1, len(m.shape))),
        (m.shape[axis], Numeric.multiply.reduce(shp_other)))
    maskInv = -1 * (mask - 1)
    xall = Numeric.arange(data.shape[0])
    xallList = xall.tolist()
    for ii in Numeric.compress(
            Numeric.add.reduce(maskInv, 0) > 1, range(data.shape[1])
    ):  # run loess if the profile contains more than 2 values
        try:
            data[:, ii] = MA.array(
                statc.loess(
                    zip(
                        MA.compress(maskInv[:, ii], xall).tolist(),
                        MA.compress(maskInv[:, ii], data[:, ii]).tolist()),
                    xallList, windowSize))[:, 1]
        except:
            if verbose:
                print "Warning: loessMA: could not loess axis %i index %i" % (
                    axis, ii)
        if callback:
            callback()
    if not approxMasked:
        data = MA.array(data, mask=mask)
    return MA.transpose(MA.reshape(data, [m.shape[axis]] + shp_other), [axis] +
                        range(0, axis) + range(axis + 1, len(m.shape)))
Ejemplo n.º 4
0
def triangularPut(m1d, upper=1, lower=0):
    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
    The lower triangular part (if requested) equals to the transposed upper triangular part.
    If upper == lower == 1 a symetric matrix is returned.
    """
    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
    m1d = MA.asarray(m1d)
    assert MA.rank(m1d) == 1, "1D masked array expected"
    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
    if upper:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
    else:
        if lower:
            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
        else:
            mask = Numeric.ones((m2dShape0, m2dShape0))

    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char))
    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
    MA.put(m2d, putIndices, m1d)
    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
Ejemplo n.º 5
0
def divide_unary(a, b):
    """Returns a*b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.divide(a.filled(1), b.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Ejemplo n.º 6
0
def subtract_unary(a, b):
    """Returns a-b with masked values only in places where both a and b are masked.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    el = MA.subtract(a.filled(0), b.filled(0))
    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
    return MA.array(el, mask=mask)
Ejemplo n.º 7
0
def orng2ma(aExampleTable):
    """Converts orange.ExampleTable to MA.array based on the attribute values.
    rows correspond to examples, columns correspond to attributes, class values are left out
    missing values and attributes of types other than orange.FloatVariable are masked
    """
    vals = aExampleTable.native(0,
                                substituteDK="?",
                                substituteDC="?",
                                substituteOther="?")
    ma = MA.array(vals, Numeric.PyObject)
    if aExampleTable.domain.classVar != None:
        ma = ma[:, :-1]
    mask = MA.where(MA.equal(ma, "?"), 1, 0)
    for varIdx, var in enumerate(aExampleTable.domain.attributes):
        if type(var) != orange.FloatVariable:
            mask[:, varIdx] = Numeric.ones(len(aExampleTable))
    return MA.array(MA.array(ma, Numeric.PyObject, mask=mask).filled(1e20),
                    Numeric.Float,
                    mask=mask)
Ejemplo n.º 8
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:,colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Ejemplo n.º 9
0
def rankDataMA(m, inverse=False):
    """Returns ranks of 1D masked array; masked values ignored, range 1...#non-masked_values.
    """
    m = MA.asarray(m)
    assert MA.rank(m) == 1
    fill_val = m.fill_value()
    m.set_fill_value(MA.maximum(m) + 1)
    r = MA.zeros(m.shape[0], Numeric.Float)
    MA.put(r, MA.argsort(m), Numeric.arange(m.shape[0]))
    m.set_fill_value(fill_val)
    r = MA.array(r, mask=MA.getmaskarray(m))
    if inverse:
        return -1*r+MA.count(m)
    else:
        return r+1
Ejemplo n.º 10
0
def logical_unary_and(m1, m2):
    el = Numeric.logical_and(m1.filled(1), m2.filled(1))
    mask = Numeric.logical_and(MA.getmaskarray(m1), MA.getmaskarray(m2))
    return MA.array(el, mask=mask)    
Ejemplo n.º 11
0
            var[:]=u[i]
        else:
            var[:]=v[i]
            
        f.Conventions = "CF-1.0"

        f.close()

try:
    os.chmod('readonly.nc',0644)
except:
    pass
f = cdms.createDataset('readonly.nc')
tobj = f.createAxis('time',Numeric.array([time[0]]))
tobj.units = 'days since 2000-1-1'
latobj = f.createAxis('latitude',lat)
latobj.units = 'degrees_north'
lonobj = f.createAxis('longitude',lon)
lonobj.units = 'degrees_east'
var = f.createVariable('u',cdms.CdDouble,(tobj,latobj,lonobj))
var.units = 'm/s'
var[:]=u[0]
mvar = f.createVariable('umasked',cdms.CdDouble,(tobj,latobj,lonobj))
umask = MA.array(u[0])
umask[1] = masked
mvar[:] = umask[:]
mvar.missing_value = umask.fill_value()
f.close()

os.chmod('readonly.nc',0444)
Ejemplo n.º 12
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(
            Numeric.logical_and(Numeric.greater(countByRows, 0),
                                Numeric.less(countByRows, columnInd.shape[0])),
            Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(
            MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(
            Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(
                1 - MA.power(distSorted / distSorted[numNonMasked - 1], 3),
                3)  # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(
            Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond),
            columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:, colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(
                1 - MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,
                  colIdx] = MA.average(columnVals.compressed()[:K],
                                       weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0),
                                   Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2