예제 #1
0
    def unique_index(self):
        from element import Element
        if len(self.bincounts) == 0:
            sorted = self.sorted
            bincounts = rl.encode(sorted.mask, self.sorted)
            offsets = np.cumsum(bincounts) - bincounts
            return self.sort_index[offsets]
        else:
            bincounts = np.asarray(self.bincounts[0]).astype(np.int)
            bincounts[~self.bincounts[0].mask] = 0
            binid = np.repeat(np.arange(bincounts.size),
                              bincounts)[self.sort_index.flattened]
            sorted = self.sorted.flattened

            array = sorted[:]
            array[~sorted.mask] = 0
            bincounts = rl.encode(binid, sorted.mask, array)
            offsets = np.cumsum(bincounts) - bincounts

            bincounts2 = rl.encode(binid[offsets])
            offsets2 = np.cumsum(bincounts2) - 1

            bincounts3 = np.zeros(self.bincounts[0].size, np.int)
            bincounts3[binid[offsets][offsets2]] = bincounts2

            bincounts3 = [Atom(bincounts3, mask=self.bincounts[0].mask)]
            bincounts3.extend(self.bincounts[1:])
            return Atom(self.sort_index.flattened[offsets],
                        mask=sorted.mask[offsets],
                        bincounts=bincounts3)
예제 #2
0
class WordCountTests(unittest.TestCase):
    def test_encode(self):
        self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))

    def test_decode(self):
        self.assertMultiLineEqual('AABBBCCCC', decode('2A3B4C'))

    def test_encode_with_single(self):
        self.assertMultiLineEqual(
            '12WB12W3B24WB',
            encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))

    def test_decode_with_single(self):
        self.assertMultiLineEqual(
            'WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB',
            decode('12WB12W3B24WB'))

    def test_combination(self):
        self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))

    encode('zzz ZZ  zZ')

    def test_encode_unicode_s(self):
        self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))

    def test_decode_unicode(self):
        self.assertMultiLineEqual('⏰⚽⚽⚽⭐⭐⏰', decode('⏰3⚽2⭐⏰'))
예제 #3
0
def group_index(self, *a):
    '''
    '''
    from atom import Atom
    import run_length as rl
    cnames = by(self, *a)
    sorted_self = sort_by(self, *cnames)
    sorted_columns = getcolumns(sorted_self, *cnames)
    return Atom(sort_index(self, *cnames),
                bincounts=[rl.encode(*sorted_columns)])
예제 #4
0
def group_index(self, *a):
    '''
    '''
    from atom import Atom
    import run_length as rl
    cnames = by(self, *a)
    sorted_self = sort_by(self, *cnames)
    sorted_columns = getcolumns(sorted_self, *cnames)
    return Atom(sort_index(self, *cnames),
                bincounts=[rl.encode(*sorted_columns)])
예제 #5
0
 def __init__(self, join, container, outer, axis, *keys):
     self.size           = len(container)
     self.join           = join
     self.axis           = axis
     self.outer          = outer
     _sort_index         = sort_index(container, *keys)
     self.columns        = getcolumns(container[_sort_index], *keys)
     self.bin_counts     = rl.encode(*self.columns)
     self.unique         = [Atom(column, bincounts=[self.bin_counts]).first
                            for column in self.columns]
     self.inverted_index = Atom(_sort_index, bincounts=[self.bin_counts])
예제 #6
0
 def __init__(self, left, right, keys, left_outer=False, right_outer=False):
     self.left = join.Side(self, left, left_outer, 0, *keys.values())
     self.right = join.Side(self, right, right_outer, 1, *keys.keys())
     self.left.other, self.right.other = self.right, self.left
     joined_columns = [np.concatenate([self.left.unique[i], self.right.unique[i]])
                       for i in xrange(0, len(keys))]
     _sort_index = np.lexsort(joined_columns)
     _bin_counts = rl.encode(*[x[_sort_index] for x in joined_columns])
     self.side_indexes = Atom(np.concatenate([np.arange(self.left.bin_counts.size),
                                                np.arange(self.right.bin_counts.size)])[_sort_index],
                                bincounts=[_bin_counts]) 
예제 #7
0
 def sort_index(self):
     array = self.asarray()
     if array.dtype == np.datetime64:
         sort_index = np.argsort(array)
         inverse_sort_index = np.empty(len(array), dtype=int)
         inverse_sort_index[sort_index] = np.arange(len(array))
         sorted_array = array[sort_index]
         bincounts = rl.encode(sorted_array)
         array = np.repeat(
             np.array([
                 int(re.sub('[-: ]', '', str(x)))
                 for x in sorted_array[bincounts.cumsum() - bincounts]
             ]), bincounts)[inverse_sort_index]
     if len(self.bincounts) == 0:
         sort_index = np.lexsort([self.mask, array])
         return Atom(sort_index, mask=self.mask[sort_index])
     else:
         bincounts = np.asarray(self.bincounts[0]).astype(np.int)
         bincounts[~self.bincounts[0].mask] = 0
         binid = np.repeat(np.arange(bincounts.size), bincounts)
         sort_index = np.lexsort([self.mask, array, binid])
         return Atom(sort_index,
                     mask=self.mask[sort_index],
                     bincounts=self.bincounts)
예제 #8
0
 def test_encode(self):
     self.assertMultiLineEqual("2A3B4C", encode("AABBBCCCC"))
예제 #9
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))
예제 #10
0
 def test_combination(self):
     self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))
예제 #11
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual(
         '12WB12W3B24WB',
         encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))
예제 #12
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual(
         '12WB12W3B24WB',
         encode('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB'))
예제 #13
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual('⏰3⚽2⭐⏰', encode('⏰⚽⚽⚽⭐⭐⏰'))
예제 #14
0
 def test_encode_unicode_s(self):
     self.assertMultiLineEqual("⏰3⚽2⭐⏰", encode("⏰⚽⚽⚽⭐⭐⏰"))
예제 #15
0
 def test_combination(self):
     self.assertMultiLineEqual("zzz ZZ  zZ", decode(encode("zzz ZZ  zZ")))
예제 #16
0
 def test_encode_with_single(self):
     self.assertMultiLineEqual("12WB12W3B24WB", encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"))
예제 #17
0
 def unique_counts(self):
     return rl.encode(self.sorted)
예제 #18
0
from run_length import encode, decode

print(encode("AABBCCDDEEE"))
print("2A2B2C2D3E")
print(encode("AAABCCDDDDAAB"))
print("3A1B2C4D2A1B")

print()
print(decode("3F2A1B4D1C"))
print("FFFAABDDDDC")
print(decode("1A1B1A1B1A2B2A"))
print("ABABABBAA")
예제 #19
0
 def test_combination(self):
     self.assertMultiLineEqual('zzz ZZ  zZ', decode(encode('zzz ZZ  zZ')))
예제 #20
0
파일: test.py 프로젝트: qpzm/PS
 def test_encode(self):
     self.assertEqual(run_length.encode('HHHeellloWooorrrrlld!!'),
                      'H3e2l3o1W1o3r4l2d1!2')
예제 #21
0
 def test_encode(self):
     self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))
예제 #22
0
 def test_encode(self):
     self.assertMultiLineEqual('2A3B4C', encode('AABBBCCCC'))