Example #1
0
 def test_missing_marker_5(self):
     "afunc.missing_marker_5"
     import datetime
     d = datetime.date(2011, 1, 1)
     assert_equal(missing_marker(larry([d])), None)
Example #2
0
 def test_missing_marker_3(self):
     "afunc.missing_marker_3"
     assert_equal(missing_marker(larry([True])), NotImplemented)
Example #3
0
 def test_missing_marker_4(self):
     "afunc.missing_marker_4"
     assert_equal(missing_marker(larry(['a'])), '')                
Example #4
0
 def test_missing_marker_1(self):
     "afunc.missing_marker_1"
     assert_equal(missing_marker(larry([1])), NotImplemented) 
Example #5
0
 def test_missing_marker_2(self):
     "afunc.missing_marker_2"
     assert_equal(missing_marker(larry([1.0])), nan)
Example #6
0
 def test_missing_marker_4a(self):
     "afunc.missing_marker_4a"
     assert_equal(missing_marker(np.array(['a'])), '')                
Example #7
0
 def test_missing_marker_3a(self):
     "afunc.missing_marker_3a"
     assert_equal(missing_marker(np.array([True])), NotImplemented)
Example #8
0
 def test_missing_marker_2a(self):
     "afunc.missing_marker_2a"
     assert_equal(missing_marker(np.array([1.0])), nan)
Example #9
0
def align_raw(lar1, lar2, join='inner', cast=True):
    """
    Align two larrys but return Numpy arrays and label instead of larrys.
    
    This function is the same as la.align() except that instead of returning
    two larrys, the components of the two larrys are returned (two Numpy
    arrays, a label, and flags for whether the two Numpy arrays are views of
    the data arrays of the corresponding input larrys).
    
    Parameters
    ----------
    lar1 : larry
        One of the input larrys. Must have the same number of dimensions as
        `lar2`.
    lar2 : larry
        One of the input larrys. Must have the same number of dimensions as
        `lar1`.
    join : {'inner', 'outer', 'left', 'right', list}, optional
        The join method used to align the two larrys. The default join method
        along each axis is 'inner', i.e., the intersection of the labels. If
        `join` is a list of strings then the length of the list should be the 
        same as the number of dimensions of the two larrys. The first element
        in the list is the join method for axis=0, the second element is the
        join method for axis=1, and so on.
    cast : bool, optional
        Only float, str, and object dtypes have missing value markers (la.nan,
        '', and None, respectively). Other dtypes, such as int and bool, do
        not have missing value markers. If `cast` is set to True (default)
        then int and bool dtypes, for example, will be cast to float if any
        new rows, columns, etc are created. If cast is set to False, then a
        TypeError will be raised for int and bool dtype input if the join
        introduces new rows, columns, etc. An inner join will never introduce
        new rows, columns, etc.
        
    Returns
    -------
    x1 : ndarray
        The aligned version of `lar1`.
    x2 : ndarray
        The aligned version of `lar2`.
    label : list of lists
        The label of the joined larrys.
    x1isview : bool
        True if x1 is a view of lar1.x; False otherwise. A view of lar1.x is
        retuned if the labels of `lar1` and `lar2` are the same along all
        axes; otherwise a copy is returned.
    x2isview : bool           
        True if x2 is a view of lar2.x; False otherwise.  A view of lar2.x is
        retuned if the labels of `lar1` and `lar2` are the same along all
        axes; otherwise a copy is returned.
        
    See Also
    --------
    la.align: Align two larrys using one of five join methods.   
        
    Notes
    -----
    The returned Numpy arrays are views of the corresponding input larrys if
    the labels of the two input larrys are the same along all axes. If the
    labels are not the same along any axis then a copy is returned.     
       
    Examples
    --------
    Create two larrys:
    
    >>> y1 = larry([1, 2])
    >>> y2 = larry([1, 2, 3])

    The default join method is an inner join:

    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2)
    >>> x1
    array([1, 2])
    >>> x2
    array([1, 2])
    >>> label
    [[0, 1]]
    >>> x1isview
    False
    >>> x2isview
    False

    An outer join adds a missing value (NaN) to lar1, therefore the the dtype
    of lar1 is changed from int to float:

    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2, join='outer')
    >>> x1
    array([  1.,   2.,  NaN])
    >>> x2
    array([1, 2, 3])
    >>> label
    [[0, 1, 2]]
    >>> x1isview
    False
    >>> x2isview
    False
    
    If the labels are already aligned, then a view of the data array is
    returned:
    
    >>> lar1 = larry([1, 2])
    >>> lar2 = larry([3, 4])
    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2)
    >>> x1isview
    True
    >>> x2isview
    True                                 

    """

    # Check number of dimensions
    ndim = lar2.ndim
    if lar1.ndim != ndim:
        msg = "'lar1' and 'lar2' must have the same number of dimensions."
        raise ValueError, msg

    # Check join type
    typejoin = type(join)
    if typejoin is str:
        join = [join] * ndim
    elif typejoin is list:
        if len(join) != ndim:
            msg = "Length of `join` list equal number of dimension of `lar1`."
            raise ValueError, msg
    else:
        raise TypeError, "`join` must be a string or a list."

    # Initialize missing markers, set value later (in loop) only if needed.
    # The weird initialization value ensures a user would never pick the same
    undefined = 'aB!@12#E~=-'
    miss1 = undefined
    miss2 = undefined

    # For loop initialization
    label = []
    x1 = lar1.x
    x2 = lar2.x
    label1 = lar1.label
    label2 = lar2.label
    x1isview = True
    x2isview = True

    # Loop: align one axis at a time
    msg = "`fill` type not compatible with larry dtype"
    for ax in range(ndim):
        list1 = label1[ax]
        list2 = label2[ax]
        joinax = join[ax]
        if joinax == 'inner':
            if list1 == list2:
                list3 = list(list1)
            else:
                list3 = list(set(list1) & (set(list2)))
                list3.sort()
                idx1 = listmap(list1, list3)
                idx2 = listmap(list2, list3)
                x1 = x1.take(idx1, ax)
                x2 = x2.take(idx2, ax)
                x1isview = False
                x2isview = False
        elif joinax == 'outer':
            if list1 == list2:
                list3 = list(list1)
            else:
                list3 = list(set(list1) | (set(list2)))
                list3.sort()
                idx1, idx1_miss = listmap_fill(list1, list3, fill=0)
                idx2, idx2_miss = listmap_fill(list2, list3, fill=0)
                x1 = x1.take(idx1, ax)
                x2 = x2.take(idx2, ax)
                if len(idx1_miss) > 0:
                    if miss1 == undefined:
                        miss1 = missing_marker(lar1)
                    if miss1 == NotImplemented:
                        if cast:
                            x1 = x1.astype(float)
                            miss1 = missing_marker(x1)
                        else:
                            raise TypeError, msg
                    index1 = [slice(None)] * ndim
                    index1[ax] = idx1_miss
                    x1[index1] = miss1
                if len(idx2_miss) > 0:
                    if miss2 == undefined:
                        miss2 = missing_marker(lar2)
                    if miss2 == NotImplemented:
                        if cast:
                            x2 = x2.astype(float)
                            miss2 = missing_marker(x2)
                        else:
                            raise TypeError, msg
                    index2 = [slice(None)] * ndim
                    index2[ax] = idx2_miss
                    x2[index2] = miss2
                x1isview = False
                x2isview = False
        elif joinax == 'left':
            list3 = list(list1)
            if list1 != list2:
                idx2, idx2_miss = listmap_fill(list2, list3, fill=0)
                x2 = x2.take(idx2, ax)
                if len(idx2_miss) > 0:
                    if miss2 == undefined:
                        miss2 = missing_marker(lar2)
                    if miss2 == NotImplemented:
                        if miss2 is None:
                            miss2 = missing_marker(lar2)
                        if miss2 is None:
                            miss2 = missing_marker(lar2)
                        if cast:
                            x2 = x2.astype(float)
                            miss2 = missing_marker(x2)
                        else:
                            raise TypeError, msg
                    index2 = [slice(None)] * ndim
                    index2[ax] = idx2_miss
                    x2[index2] = miss2
                x2isview = False
        elif joinax == 'right':
            list3 = list(list2)
            if list1 != list2:
                idx1, idx1_miss = listmap_fill(list1, list3, fill=0)
                x1 = x1.take(idx1, ax)
                if len(idx1_miss) > 0:
                    if miss1 == undefined:
                        miss1 = missing_marker(lar1)
                    if miss1 == NotImplemented:
                        if cast:
                            x1 = x1.astype(float)
                            miss1 = missing_marker(x1)
                        else:
                            raise TypeError, msg
                    index1 = [slice(None)] * ndim
                    index1[ax] = idx1_miss
                    x1[index1] = miss1
                x1isview = False
        else:
            raise ValueError, 'join type not recognized'
        label.append(list3)

    return x1, x2, label, x1isview, x2isview
Example #10
0
def align_raw(lar1, lar2, join='inner', cast=True):    
    """
    Align two larrys but return Numpy arrays and label instead of larrys.
    
    This function is the same as la.align() except that instead of returning
    two larrys, the components of the two larrys are returned (two Numpy
    arrays, a label, and flags for whether the two Numpy arrays are views of
    the data arrays of the corresponding input larrys).
    
    Parameters
    ----------
    lar1 : larry
        One of the input larrys. Must have the same number of dimensions as
        `lar2`.
    lar2 : larry
        One of the input larrys. Must have the same number of dimensions as
        `lar1`.
    join : {'inner', 'outer', 'left', 'right', list}, optional
        The join method used to align the two larrys. The default join method
        along each axis is 'inner', i.e., the intersection of the labels. If
        `join` is a list of strings then the length of the list should be the 
        same as the number of dimensions of the two larrys. The first element
        in the list is the join method for axis=0, the second element is the
        join method for axis=1, and so on.
    cast : bool, optional
        Only float, str, and object dtypes have missing value markers (la.nan,
        '', and None, respectively). Other dtypes, such as int and bool, do
        not have missing value markers. If `cast` is set to True (default)
        then int and bool dtypes, for example, will be cast to float if any
        new rows, columns, etc are created. If cast is set to False, then a
        TypeError will be raised for int and bool dtype input if the join
        introduces new rows, columns, etc. An inner join will never introduce
        new rows, columns, etc.
        
    Returns
    -------
    x1 : ndarray
        The aligned version of `lar1`.
    x2 : ndarray
        The aligned version of `lar2`.
    label : list of lists
        The label of the joined larrys.
    x1isview : bool
        True if x1 is a view of lar1.x; False otherwise. A view of lar1.x is
        retuned if the labels of `lar1` and `lar2` are the same along all
        axes; otherwise a copy is returned.
    x2isview : bool           
        True if x2 is a view of lar2.x; False otherwise.  A view of lar2.x is
        retuned if the labels of `lar1` and `lar2` are the same along all
        axes; otherwise a copy is returned.
        
    See Also
    --------
    la.align: Align two larrys using one of five join methods.   
        
    Notes
    -----
    The returned Numpy arrays are views of the corresponding input larrys if
    the labels of the two input larrys are the same along all axes. If the
    labels are not the same along any axis then a copy is returned.     
       
    Examples
    --------
    Create two larrys:
    
    >>> y1 = larry([1, 2])
    >>> y2 = larry([1, 2, 3])

    The default join method is an inner join:

    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2)
    >>> x1
    array([1, 2])
    >>> x2
    array([1, 2])
    >>> label
    [[0, 1]]
    >>> x1isview
    False
    >>> x2isview
    False

    An outer join adds a missing value (NaN) to lar1, therefore the the dtype
    of lar1 is changed from int to float:

    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2, join='outer')
    >>> x1
    array([  1.,   2.,  NaN])
    >>> x2
    array([1, 2, 3])
    >>> label
    [[0, 1, 2]]
    >>> x1isview
    False
    >>> x2isview
    False
    
    If the labels are already aligned, then a view of the data array is
    returned:
    
    >>> lar1 = larry([1, 2])
    >>> lar2 = larry([3, 4])
    >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2)
    >>> x1isview
    True
    >>> x2isview
    True                                 

    """
    
    # Check number of dimensions
    ndim = lar2.ndim
    if lar1.ndim != ndim:
        msg = "'lar1' and 'lar2' must have the same number of dimensions."
        raise ValueError, msg
        
    # Check join type    
    typejoin = type(join)
    if typejoin is str:
        join = [join] * ndim
    elif typejoin is list:
        if len(join) != ndim:
            msg = "Length of `join` list equal number of dimension of `lar1`."
            raise ValueError, msg
    else:
        raise TypeError, "`join` must be a string or a list."
        
    # Initialize missing markers, set value later (in loop) only if needed.
    # The weird initialization value ensures a user would never pick the same 
    undefined = 'aB!@12#E~=-'
    miss1 = undefined
    miss2 = undefined
        
    # For loop initialization                         
    label = []
    x1 = lar1.x
    x2 = lar2.x
    label1 = lar1.label
    label2 = lar2.label
    x1isview = True
    x2isview = True
    
    # Loop: align one axis at a time 
    msg = "`fill` type not compatible with larry dtype"     
    for ax in range(ndim):    
        list1 = label1[ax]
        list2 = label2[ax]
        joinax = join[ax]        
        if joinax == 'inner':
            if list1 == list2:
                list3 = list(list1)
            else:
                list3 = list(set(list1) & (set(list2)))
                list3.sort()
                idx1 = listmap(list1, list3)
                idx2 = listmap(list2, list3)
                x1 = x1.take(idx1, ax)
                x2 = x2.take(idx2, ax)
                x1isview = False
                x2isview = False   
        elif joinax == 'outer':
            if list1 == list2:
                list3 = list(list1)
            else:                 
                list3 = list(set(list1) | (set(list2)))
                list3.sort()
                idx1, idx1_miss = listmap_fill(list1, list3, fill=0)
                idx2, idx2_miss = listmap_fill(list2, list3, fill=0)
                x1 = x1.take(idx1, ax)
                x2 = x2.take(idx2, ax) 
                if len(idx1_miss) > 0:
                    if miss1 == undefined:
                        miss1 = missing_marker(lar1)
                    if miss1 == NotImplemented:
                        if cast:
                            x1 = x1.astype(float)
                            miss1 = missing_marker(x1)   
                        else:                         
                            raise TypeError, msg
                    index1 = [slice(None)] * ndim
                    index1[ax] = idx1_miss      
                    x1[index1] = miss1                                        
                if len(idx2_miss) > 0:
                    if miss2 == undefined:
                        miss2 = missing_marker(lar2)
                    if miss2 == NotImplemented:
                        if cast:
                            x2 = x2.astype(float)
                            miss2 = missing_marker(x2)   
                        else:
                            raise TypeError, msg
                    index2 = [slice(None)] * ndim
                    index2[ax] = idx2_miss                             
                    x2[index2] = miss2
                x1isview = False
                x2isview = False                     
        elif joinax == 'left':
            list3 = list(list1)
            if list1 != list2:
                idx2, idx2_miss = listmap_fill(list2, list3, fill=0)
                x2 = x2.take(idx2, ax) 
                if len(idx2_miss) > 0:
                    if miss2 == undefined:
                        miss2 = missing_marker(lar2)
                    if miss2 == NotImplemented:
                        if miss2 is None:
                            miss2 = missing_marker(lar2)
                        if miss2 is None:
                            miss2 = missing_marker(lar2)
                        if cast:
                            x2 = x2.astype(float)
                            miss2 = missing_marker(x2)   
                        else:
                            raise TypeError, msg
                    index2 = [slice(None)] * ndim
                    index2[ax] = idx2_miss        
                    x2[index2] = miss2
                x2isview = False                    
        elif joinax == 'right':
            list3 = list(list2)
            if list1 != list2:            
                idx1, idx1_miss = listmap_fill(list1, list3, fill=0)
                x1 = x1.take(idx1, ax) 
                if len(idx1_miss) > 0:
                    if miss1 == undefined:
                        miss1 = missing_marker(lar1)
                    if miss1 == NotImplemented:
                        if cast:
                            x1 = x1.astype(float)
                            miss1 = missing_marker(x1)   
                        else:
                            raise TypeError, msg
                    index1 = [slice(None)] * ndim
                    index1[ax] = idx1_miss                            
                    x1[index1] = miss1 
                x1isview = False                                 
        else:
            raise ValueError, 'join type not recognized'  
        label.append(list3)
    
    return x1, x2, label, x1isview, x2isview