예제 #1
0
 def test_ismissing_7a(self):
     "afunc.ismissing_7a"
     assert_equal(ismissing(np.array([nan, 1])), np.array([True, False])) 
예제 #2
0
 def test_ismissing_8a(self):
     "afunc.ismissing_8a"
     assert_equal(ismissing(np.array([''])), np.array([True])) 
예제 #3
0
 def test_ismissing_5a(self):
     "afunc.ismissing_5a"
     import datetime
     d = datetime.date(2011, 1, 1)
     assert_equal(ismissing(np.array([d])), np.array([False])) 
예제 #4
0
 def test_ismissing_6a(self):
     "afunc.ismissing_6a"
     assert_equal(ismissing(np.array([nan])), np.array([True]))
예제 #5
0
 def test_ismissing_3a(self):
     "afunc.ismissing_3a"
     assert_equal(ismissing(np.array(['str'])), np.array([False]))
예제 #6
0
 def test_ismissing_4a(self):
     "afunc.ismissing_4a"
     assert_equal(ismissing(np.array([None])), np.array([True]))               
예제 #7
0
 def test_ismissing_1(self):
     "afunc.ismissing_1"
     assert_equal(ismissing(larry([1])), np.array([False])) 
예제 #8
0
파일: farray.py 프로젝트: josef-pkt/la
def mov_sum(arr, window, skip=0, axis=-1, norm=False):
    """
    Moving sum ignoring NaNs, optionally normalized for missing (NaN) data.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    window : int
        The number of elements in the moving window.
    skip : int, optional
        By default (skip=0) the movingsum at element *i* is the sum over the
        slice of elements from *i + 1 - window* to *i + 1* (so the last element
        in the sum is *i*). With nonzero `skip` the sum is over the slice from
        *i + 1 window - skip* to *i + 1 - skip*.
    axis : int, optional
        The axis over which to perform the moving sum. By default the moving
        sum is taken over the last axis (-1).
    norm : bool, optional
        Whether or not to normalize the sum. The default is not to normalize.
        If there are 3 missing elements in a window, for example, then the
        normalization would be to multiply the sum in that window by
        *window / (window - 3)*.

    Returns
    -------
    y : ndarray
        The moving sum of the input array along the specified axis.

    Examples
    --------
    >>> arr = np.array([1, 2, 3, 4, 5])
    >>> mov_sum(arr, 2)
    array([ NaN,   3.,   5.,   7.,   9.])

    >>> arr = np.array([1, 2, np.nan, 4, 5])
    >>> mov_sum(arr, 2)
    array([ NaN,   3.,   2.,   4.,   9.])
    >>> mov_sum(arr, 2, norm=True)
    array([ NaN,   3.,   4.,   8.,   9.])    
    
    """
    if window < 1:  
        raise ValueError, 'window must be at least 1'
    if window > arr.shape[axis]:
        raise ValueError, 'Window is too big.'      
    if skip > arr.shape[axis]:
        raise IndexError, 'Your skip is too large.'
    m = ismissing(arr) 
    arr = 1.0 * arr
    arr[m] = 0
    csx = arr.cumsum(axis)
    index1 = [slice(None)] * arr.ndim 
    index1[axis] = slice(window - 1, None)
    index2 = [slice(None)] * arr.ndim 
    index2[axis] = slice(None, -window) 
    msx = csx[index1]
    index3 = [slice(None)] * arr.ndim
    index3[axis] = slice(1, None)
    msx[index3] = msx[index3] - csx[index2] 
    csm = (~m).cumsum(axis)     
    msm = csm[index1]
    msm[index3] = msm[index3] - csm[index2]  
    if norm:
        ms = 1.0 * window * msx / msm
    else:
        ms = msx
        ms[msm == 0] = np.nan
    initshape = list(arr.shape)  
    initshape[axis] = skip + window - 1
    #Note: skip could be included in starting window
    cutslice = [slice(None)] * arr.ndim   
    cutslice[axis] = slice(None, -skip or None, None)
    pad = np.nan * np.zeros(initshape)
    ms = np.concatenate((pad, ms[cutslice]), axis) 
    return ms
예제 #9
0
 def test_ismissing_8(self):
     "afunc.ismissing_8"
     assert_equal(ismissing(larry([""])), np.array([True]))
예제 #10
0
파일: flarry.py 프로젝트: gaybro8777/la
def binaryop(func,
             lar1,
             lar2,
             join='inner',
             cast=True,
             missone='ignore',
             misstwo='ignore',
             **kwargs):
    """
    Binary operation on two larrys using given function and join method.
    
    Parameters
    ----------
    func : function
        A function that takes two Numpy arrays as input and returns a Numpy
        array as output. For example: np.add. You can also pass keyword
        arguments to the function; see `**kwargs`.
    lar1 : larry
        The larry on the left-hand side of the binary operation. Must have
        the same number of dimensions as `lar2`.
    lar2 : larry
        The larry on the right-hand side of the binary operation. Must have
        the same number of dimensions as `lar1`.
    join : {'inner', 'outer', 'left', 'right', list}, optional
        The method used to join the two larrys. The default join method along
        all axes is 'inner', i.e., the intersection of the labels. If `join`
        is a list of strings then the length of the list should be the number
        of dimensions of the two larrys. The first element in the list is the
        join method for axis=0, the second element is the join method for
        axis=1, and so on.
    cast : bool, optional
        Only float, str, and object dtypes have missing value markers (la.nan,
        '', and None, respectively). Other dtypes, such as int and bool, do
        not have missing value markers. If `cast` is set to True (default)
        then int and bool dtypes, for example, will be cast to float if any
        new rows, columns, etc are created. If cast is set to False, then a
        TypeError will be raised for int and bool dtype input if the join
        introduces new rows, columns, etc. An inner join will never introduce
        new rows, columns, etc.   
    missone : {scalar, 'ignore'}, optional
        By default ('ignore') no special treatment of missing values is made.
        If, however, `missone` is set to something other than 'ignore', such
        as 0, then all elements that are missing in one larry but not missing
        in the other larry are replaced by `missone`. For example, if an
        element is in one larry but missing in the other larry then you may
        want to set the missing value to zero when summing two larrys.
    misstwo : {scalar, 'ignore'}, optional
        By default ('ignore') no special treatment of missing values is made.
        If, however, `misstwo` is set to something other than 'ignore', such
        as 0, then all elements that are missing in both larrys are replaced
        by `misstwo`.  
    **kwargs : Keyword arguments, optional
        Keyword arguments to pass to `func`. The keyword arguments passed to
        `func` cannot have the following keys: join, cast, missone, misstwo.
        
    Returns
    -------
    lar3 : larry
        The result of the binary operation.
        
    See Also
    --------
    la.align: Align two larrys using one of five join methods.  
        
    Examples
    --------
    Create two larrys:
    
    >>> from la import nan
    >>> lar1 = larry([1,   2, nan], [['a', 'b', 'c']])
    >>> lar2 = larry([1, nan, nan], [['a', 'b', 'dd']])
    
    The default is an inner join (note that lar1 and lar2 have two labels in
    common):
    
    >>> la.binaryop(np.add, lar1, lar2)
    label_0
        a
        b
    x
    array([  2.,  NaN])
        
    If one data element is missing in one larry but not in the other, then you
    can replace the missing value with `missone` (here 0):     
        
    >>> la.binaryop(np.add, lar1, lar2, missone=0)
    label_0
        a
        b
    x
    array([ 2.,  2.])
        
    An outer join: 
    
    >>> la.binaryop(np.add, lar1, lar2, join='outer')
    label_0
        a
        b
        c
        dd
    x
    array([  2.,  NaN,  NaN,  NaN])
    
    An outer join with single and double missing values replaced by zero:
        
    >>> la.binaryop(np.add, lar1, lar2, join='outer', missone=0, misstwo=0)
    label_0
        a
        b
        c
        dd
    x
    array([ 2.,  2.,  0.,  0.])                               

    """

    # Align
    x1, x2, label, ign1, ign2 = align_raw(lar1, lar2, join=join, cast=cast)

    # Replacing missing values is slow, so only do if requested
    if missone != 'ignore' or misstwo != 'ignore':
        miss1 = ismissing(x1)
        miss2 = ismissing(x2)
    if missone != 'ignore':
        missone1 = miss1 & ~miss2
        if missone1.any():
            x1[missone1] = missone
        missone2 = miss2 & ~miss1
        if missone2.any():
            x2[missone2] = missone
    if misstwo != 'ignore':
        misstwo12 = miss1 & miss2
        if misstwo12.any():
            x1[misstwo12] = misstwo
            x2[misstwo12] = misstwo

    # Binary function
    x = func(x1, x2, **kwargs)

    return larry(x, label, integrity=False)
예제 #11
0
 def test_ismissing_3(self):
     "afunc.ismissing_3"
     assert_equal(ismissing(larry(["str"])), np.array([False]))
예제 #12
0
파일: flarry.py 프로젝트: josef-pkt/la
def binaryop(func, lar1, lar2, join='inner', cast=True, missone='ignore',
             misstwo='ignore', **kwargs):
    """
    Binary operation on two larrys using given function and join method.
    
    Parameters
    ----------
    func : function
        A function that takes two Numpy arrays as input and returns a Numpy
        array as output. For example: np.add. You can also pass keyword
        arguments to the function; see `**kwargs`.
    lar1 : larry
        The larry on the left-hand side of the binary operation. Must have
        the same number of dimensions as `lar2`.
    lar2 : larry
        The larry on the right-hand side of the binary operation. Must have
        the same number of dimensions as `lar1`.
    join : {'inner', 'outer', 'left', 'right', list}, optional
        The method used to join the two larrys. The default join method along
        all axes is 'inner', i.e., the intersection of the labels. If `join`
        is a list of strings then the length of the list should be the number
        of dimensions of the two larrys. The first element in the list is the
        join method for axis=0, the second element is the join method for
        axis=1, and so on.
    cast : bool, optional
        Only float, str, and object dtypes have missing value markers (la.nan,
        '', and None, respectively). Other dtypes, such as int and bool, do
        not have missing value markers. If `cast` is set to True (default)
        then int and bool dtypes, for example, will be cast to float if any
        new rows, columns, etc are created. If cast is set to False, then a
        TypeError will be raised for int and bool dtype input if the join
        introduces new rows, columns, etc. An inner join will never introduce
        new rows, columns, etc.   
    missone : {scalar, 'ignore'}, optional
        By default ('ignore') no special treatment of missing values is made.
        If, however, `missone` is set to something other than 'ignore', such
        as 0, then all elements that are missing in one larry but not missing
        in the other larry are replaced by `missone`. For example, if an
        element is in one larry but missing in the other larry then you may
        want to set the missing value to zero when summing two larrys.
    misstwo : {scalar, 'ignore'}, optional
        By default ('ignore') no special treatment of missing values is made.
        If, however, `misstwo` is set to something other than 'ignore', such
        as 0, then all elements that are missing in both larrys are replaced
        by `misstwo`.  
    **kwargs : Keyword arguments, optional
        Keyword arguments to pass to `func`. The keyword arguments passed to
        `func` cannot have the following keys: join, cast, missone, misstwo.
        
    Returns
    -------
    lar3 : larry
        The result of the binary operation.
        
    See Also
    --------
    la.align: Align two larrys using one of five join methods.  
        
    Examples
    --------
    Create two larrys:
    
    >>> from la import nan
    >>> lar1 = larry([1,   2, nan], [['a', 'b', 'c']])
    >>> lar2 = larry([1, nan, nan], [['a', 'b', 'dd']])
    
    The default is an inner join (note that lar1 and lar2 have two labels in
    common):
    
    >>> la.binaryop(np.add, lar1, lar2)
    label_0
        a
        b
    x
    array([  2.,  NaN])
        
    If one data element is missing in one larry but not in the other, then you
    can replace the missing value with `missone` (here 0):     
        
    >>> la.binaryop(np.add, lar1, lar2, missone=0)
    label_0
        a
        b
    x
    array([ 2.,  2.])
        
    An outer join: 
    
    >>> la.binaryop(np.add, lar1, lar2, join='outer')
    label_0
        a
        b
        c
        dd
    x
    array([  2.,  NaN,  NaN,  NaN])
    
    An outer join with single and double missing values replaced by zero:
        
    >>> la.binaryop(np.add, lar1, lar2, join='outer', missone=0, misstwo=0)
    label_0
        a
        b
        c
        dd
    x
    array([ 2.,  2.,  0.,  0.])                               

    """
    
    # Align
    x1, x2, label, ign1, ign2 = align_raw(lar1, lar2, join=join, cast=cast)
    
    # Replacing missing values is slow, so only do if requested
    if missone != 'ignore' or misstwo != 'ignore':
        miss1 = ismissing(x1)
        miss2 = ismissing(x2)
    if missone != 'ignore':    
        missone1 = miss1 & ~miss2
        if missone1.any():
            x1[missone1] = missone
        missone2 = miss2 & ~miss1    
        if missone2.any():
            x2[missone2] = missone
    if misstwo != 'ignore':            
        misstwo12 = miss1 & miss2    
        if misstwo12.any():
            x1[misstwo12] = misstwo
            x2[misstwo12] = misstwo           
            
    # Binary function
    x = func(x1, x2, **kwargs)
    
    return larry(x, label, integrity=False)
예제 #13
0
def movingsum(arr, window, skip=0, axis=-1, norm=False):
    """
    Moving sum ignoring NaNs, optionally normalized for missing (NaN) data.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    window : int
        The number of elements in the moving window.
    skip : int, optional
        By default (skip=0) the movingsum at element *i* is the sum over the
        slice of elements from *i + 1 - window* to *i + 1* (so the last element
        in the sum is *i*). With nonzero `skip` the sum is over the slice from
        *i + 1 window - skip* to *i + 1 - skip*. `skip` cannot be negative.
    axis : int, optional
        The axis over which to perform the moving sum. By default the moving
        sum is taken over the last axis (-1).
    norm : bool, optional
        Whether or not to normalize the sum. The default is not to normalize.
        If there are 3 missing elements in a window, for example, then the
        normalization would be to multiply the sum in that window by
        *window / (window - 3)*.

    Returns
    -------
    y : ndarray
        The moving sum of the input array along the specified axis.

    Examples
    --------
    >>> arr = np.array([1, 2, 3, 4, 5])
    >>> movingsum(arr, 2)
    array([ NaN,   3.,   5.,   7.,   9.])

    >>> arr = np.array([1, 2, np.nan, 4, 5])
    >>> movingsum(arr, 2)
    array([ NaN,   3.,   2.,   4.,   9.])
    >>> movingsum(arr, 2, norm=True)
    array([ NaN,   3.,   4.,   8.,   9.])    
    
    """

    # Check input
    if window < 1:
        raise ValueError('window must be at least 1')
    if window > arr.shape[axis]:
        raise ValueError('Window is too big.')
    if skip > arr.shape[axis]:
        raise IndexError('Your skip is too large.')

    # Set missing values to 0
    m = ismissing(arr)
    arr = arr.astype(float)
    arr[m] = 0

    # Cumsum
    csx = arr.cumsum(axis)

    # Set up indexes
    index1 = [slice(None)] * arr.ndim
    index2 = list(index1)
    index3 = list(index1)
    index4 = list(index1)
    index1[axis] = slice(window - 1, -skip or None)
    index2[axis] = slice(None, -window - skip)
    index3[axis] = slice(1, None)
    index4[axis] = slice(skip + window - 1, None)

    # Make moving sum
    msx = csx[index1]
    msx[index3] = msx[index3] - csx[index2]
    csm = (~m).cumsum(axis)
    msm = csm[index1]
    msm[index3] = msm[index3] - csm[index2]

    # Normalize
    if norm:
        ms = 1.0 * window * msx / msm
    else:
        ms = msx
        ms[msm == 0] = np.nan

    # Pad to get back to original shape
    arr.fill(np.nan)
    arr[index4] = ms

    return arr
예제 #14
0
 def test_ismissing_9a(self):
     "afunc.ismissing_9a"
     assert_equal(ismissing(np.array([True])), np.array([False])) 
예제 #15
0
 def test_ismissing_2a(self):
     "afunc.ismissing_2a"
     assert_equal(ismissing(np.array([1.0])), np.array([False]))
예제 #16
0
파일: move.py 프로젝트: fhal/la
def movingsum(arr, window, skip=0, axis=-1, norm=False):
    """
    Moving sum ignoring NaNs, optionally normalized for missing (NaN) data.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    window : int
        The number of elements in the moving window.
    skip : int, optional
        By default (skip=0) the movingsum at element *i* is the sum over the
        slice of elements from *i + 1 - window* to *i + 1* (so the last element
        in the sum is *i*). With nonzero `skip` the sum is over the slice from
        *i + 1 window - skip* to *i + 1 - skip*. `skip` cannot be negative.
    axis : int, optional
        The axis over which to perform the moving sum. By default the moving
        sum is taken over the last axis (-1).
    norm : bool, optional
        Whether or not to normalize the sum. The default is not to normalize.
        If there are 3 missing elements in a window, for example, then the
        normalization would be to multiply the sum in that window by
        *window / (window - 3)*.

    Returns
    -------
    y : ndarray
        The moving sum of the input array along the specified axis.

    Examples
    --------
    >>> arr = np.array([1, 2, 3, 4, 5])
    >>> movingsum(arr, 2)
    array([ NaN,   3.,   5.,   7.,   9.])

    >>> arr = np.array([1, 2, np.nan, 4, 5])
    >>> movingsum(arr, 2)
    array([ NaN,   3.,   2.,   4.,   9.])
    >>> movingsum(arr, 2, norm=True)
    array([ NaN,   3.,   4.,   8.,   9.])    
    
    """

    # Check input
    if window < 1:  
        raise ValueError, 'window must be at least 1'
    if window > arr.shape[axis]:
        raise ValueError, 'Window is too big.'      
    if skip > arr.shape[axis]:
        raise IndexError, 'Your skip is too large.'
    
    # Set missing values to 0
    m = ismissing(arr) 
    arr = arr.astype(float)
    arr[m] = 0

    # Cumsum
    csx = arr.cumsum(axis)

    # Set up indexes
    index1 = [slice(None)] * arr.ndim 
    index2 = list(index1) 
    index3 = list(index1)
    index4 = list(index1)
    index1[axis] = slice(window - 1, -skip or None)
    index2[axis] = slice(None, -window-skip) 
    index3[axis] = slice(1, None)
    index4[axis] = slice(skip + window - 1, None)

    # Make moving sum
    msx = csx[index1]
    msx[index3] = msx[index3] - csx[index2] 
    csm = (~m).cumsum(axis)     
    msm = csm[index1]
    msm[index3] = msm[index3] - csm[index2]  
    
    # Normalize
    if norm:
        ms = 1.0 * window * msx / msm
    else:
        ms = msx
        ms[msm == 0] = np.nan
    
    # Pad to get back to original shape
    arr.fill(np.nan) 
    arr[index4] = ms

    return arr
예제 #17
0
def mov_sum(arr, window, skip=0, axis=-1, norm=False):
    """
    Moving sum ignoring NaNs, optionally normalized for missing (NaN) data.
    
    Parameters
    ----------
    arr : ndarray
        Input array.
    window : int
        The number of elements in the moving window.
    skip : int, optional
        By default (skip=0) the movingsum at element *i* is the sum over the
        slice of elements from *i + 1 - window* to *i + 1* (so the last element
        in the sum is *i*). With nonzero `skip` the sum is over the slice from
        *i + 1 window - skip* to *i + 1 - skip*.
    axis : int, optional
        The axis over which to perform the moving sum. By default the moving
        sum is taken over the last axis (-1).
    norm : bool, optional
        Whether or not to normalize the sum. The default is not to normalize.
        If there are 3 missing elements in a window, for example, then the
        normalization would be to multiply the sum in that window by
        *window / (window - 3)*.

    Returns
    -------
    y : ndarray
        The moving sum of the input array along the specified axis.

    Examples
    --------
    >>> arr = np.array([1, 2, 3, 4, 5])
    >>> mov_sum(arr, 2)
    array([ NaN,   3.,   5.,   7.,   9.])

    >>> arr = np.array([1, 2, np.nan, 4, 5])
    >>> mov_sum(arr, 2)
    array([ NaN,   3.,   2.,   4.,   9.])
    >>> mov_sum(arr, 2, norm=True)
    array([ NaN,   3.,   4.,   8.,   9.])    
    
    """
    if window < 1:
        raise ValueError, 'window must be at least 1'
    if window > arr.shape[axis]:
        raise ValueError, 'Window is too big.'
    if skip > arr.shape[axis]:
        raise IndexError, 'Your skip is too large.'
    m = ismissing(arr)
    arr = 1.0 * arr
    arr[m] = 0
    csx = arr.cumsum(axis)
    index1 = [slice(None)] * arr.ndim
    index1[axis] = slice(window - 1, None)
    index2 = [slice(None)] * arr.ndim
    index2[axis] = slice(None, -window)
    msx = csx[index1]
    index3 = [slice(None)] * arr.ndim
    index3[axis] = slice(1, None)
    msx[index3] = msx[index3] - csx[index2]
    csm = (~m).cumsum(axis)
    msm = csm[index1]
    msm[index3] = msm[index3] - csm[index2]
    if norm:
        ms = 1.0 * window * msx / msm
    else:
        ms = msx
        ms[msm == 0] = np.nan
    initshape = list(arr.shape)
    initshape[axis] = skip + window - 1
    #Note: skip could be included in starting window
    cutslice = [slice(None)] * arr.ndim
    cutslice[axis] = slice(None, -skip or None, None)
    pad = np.nan * np.zeros(initshape)
    ms = np.concatenate((pad, ms[cutslice]), axis)
    return ms