def listmap_fill_unmappable_test(): "listmap_fill unmappable test" list1 = ['a', 2, 3] list2 = ['a', 2, 3, 4] idx, idx_unmappable = listmap_fill(list1, list2) idx2 = [0, 1, 2, 0] idx2_unmappable = [3] msg = "listmap_fill failed on list1=%s and list2=%s" yield assert_equal, idx, idx2, msg % (list1, list2) yield assert_equal, idx_unmappable, idx2_unmappable, msg % (list1, list2)
def listmap_fill_test(): "listmap_fill test" # test to make sure listmap_nofill returns the same output as # # idx = map(list1.index, list2) # # when there are no items in list2 that are not in list1 list1 = list(range(6)) list2 = list(range(5)) msg = "listmap_fill failed on list1=%s and list2=%s" for i in range(100): np.random.shuffle(list2) idx1 = list(map(list1.index, list2)) idx2, ignore = listmap_fill(list1, list2) yield assert_equal, idx1, idx2, msg % (list1, list2)
def align_raw(lar1, lar2, join='inner', cast=True): """ Align two larrys but return Numpy arrays and label instead of larrys. This function is the same as la.align() except that instead of returning two larrys, the components of the two larrys are returned (two Numpy arrays, a label, and flags for whether the two Numpy arrays are views of the data arrays of the corresponding input larrys). Parameters ---------- lar1 : larry One of the input larrys. Must have the same number of dimensions as `lar2`. lar2 : larry One of the input larrys. Must have the same number of dimensions as `lar1`. join : {'inner', 'outer', 'left', 'right', list}, optional The join method used to align the two larrys. The default join method along each axis is 'inner', i.e., the intersection of the labels. If `join` is a list of strings then the length of the list should be the same as the number of dimensions of the two larrys. The first element in the list is the join method for axis=0, the second element is the join method for axis=1, and so on. cast : bool, optional Only float, str, and object dtypes have missing value markers (la.nan, '', and None, respectively). Other dtypes, such as int and bool, do not have missing value markers. If `cast` is set to True (default) then int and bool dtypes, for example, will be cast to float if any new rows, columns, etc are created. If cast is set to False, then a TypeError will be raised for int and bool dtype input if the join introduces new rows, columns, etc. An inner join will never introduce new rows, columns, etc. Returns ------- x1 : ndarray The aligned version of `lar1`. x2 : ndarray The aligned version of `lar2`. label : list of lists The label of the joined larrys. x1isview : bool True if x1 is a view of lar1.x; False otherwise. A view of lar1.x is retuned if the labels of `lar1` and `lar2` are the same along all axes; otherwise a copy is returned. x2isview : bool True if x2 is a view of lar2.x; False otherwise. A view of lar2.x is retuned if the labels of `lar1` and `lar2` are the same along all axes; otherwise a copy is returned. See Also -------- la.align: Align two larrys using one of five join methods. Notes ----- The returned Numpy arrays are views of the corresponding input larrys if the labels of the two input larrys are the same along all axes. If the labels are not the same along any axis then a copy is returned. Examples -------- Create two larrys: >>> y1 = larry([1, 2]) >>> y2 = larry([1, 2, 3]) The default join method is an inner join: >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2) >>> x1 array([1, 2]) >>> x2 array([1, 2]) >>> label [[0, 1]] >>> x1isview False >>> x2isview False An outer join adds a missing value (NaN) to lar1, therefore the the dtype of lar1 is changed from int to float: >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2, join='outer') >>> x1 array([ 1., 2., NaN]) >>> x2 array([1, 2, 3]) >>> label [[0, 1, 2]] >>> x1isview False >>> x2isview False If the labels are already aligned, then a view of the data array is returned: >>> lar1 = larry([1, 2]) >>> lar2 = larry([3, 4]) >>> x1, x2, label, x1isview, x2isview = la.flarry._align_raw(lar1, lar2) >>> x1isview True >>> x2isview True """ # Check number of dimensions ndim = lar2.ndim if lar1.ndim != ndim: msg = "'lar1' and 'lar2' must have the same number of dimensions." raise ValueError, msg # Check join type typejoin = type(join) if typejoin is str: join = [join] * ndim elif typejoin is list: if len(join) != ndim: msg = "Length of `join` list equal number of dimension of `lar1`." raise ValueError, msg else: raise TypeError, "`join` must be a string or a list." # Initialize missing markers, set value later (in loop) only if needed. # The weird initialization value ensures a user would never pick the same undefined = 'aB!@12#E~=-' miss1 = undefined miss2 = undefined # For loop initialization label = [] x1 = lar1.x x2 = lar2.x label1 = lar1.label label2 = lar2.label x1isview = True x2isview = True # Loop: align one axis at a time msg = "`fill` type not compatible with larry dtype" for ax in range(ndim): list1 = label1[ax] list2 = label2[ax] joinax = join[ax] if joinax == 'inner': if list1 == list2: list3 = list(list1) else: list3 = list(set(list1) & (set(list2))) list3.sort() idx1 = listmap(list1, list3) idx2 = listmap(list2, list3) x1 = x1.take(idx1, ax) x2 = x2.take(idx2, ax) x1isview = False x2isview = False elif joinax == 'outer': if list1 == list2: list3 = list(list1) else: list3 = list(set(list1) | (set(list2))) list3.sort() idx1, idx1_miss = listmap_fill(list1, list3, fill=0) idx2, idx2_miss = listmap_fill(list2, list3, fill=0) x1 = x1.take(idx1, ax) x2 = x2.take(idx2, ax) if len(idx1_miss) > 0: if miss1 == undefined: miss1 = missing_marker(lar1) if miss1 == NotImplemented: if cast: x1 = x1.astype(float) miss1 = missing_marker(x1) else: raise TypeError, msg index1 = [slice(None)] * ndim index1[ax] = idx1_miss x1[index1] = miss1 if len(idx2_miss) > 0: if miss2 == undefined: miss2 = missing_marker(lar2) if miss2 == NotImplemented: if cast: x2 = x2.astype(float) miss2 = missing_marker(x2) else: raise TypeError, msg index2 = [slice(None)] * ndim index2[ax] = idx2_miss x2[index2] = miss2 x1isview = False x2isview = False elif joinax == 'left': list3 = list(list1) if list1 != list2: idx2, idx2_miss = listmap_fill(list2, list3, fill=0) x2 = x2.take(idx2, ax) if len(idx2_miss) > 0: if miss2 == undefined: miss2 = missing_marker(lar2) if miss2 == NotImplemented: if miss2 is None: miss2 = missing_marker(lar2) if miss2 is None: miss2 = missing_marker(lar2) if cast: x2 = x2.astype(float) miss2 = missing_marker(x2) else: raise TypeError, msg index2 = [slice(None)] * ndim index2[ax] = idx2_miss x2[index2] = miss2 x2isview = False elif joinax == 'right': list3 = list(list2) if list1 != list2: idx1, idx1_miss = listmap_fill(list1, list3, fill=0) x1 = x1.take(idx1, ax) if len(idx1_miss) > 0: if miss1 == undefined: miss1 = missing_marker(lar1) if miss1 == NotImplemented: if cast: x1 = x1.astype(float) miss1 = missing_marker(x1) else: raise TypeError, msg index1 = [slice(None)] * ndim index1[ax] = idx1_miss x1[index1] = miss1 x1isview = False else: raise ValueError, 'join type not recognized' label.append(list3) return x1, x2, label, x1isview, x2isview