Beispiel #1
0
 def test_buffer_info(self):
     try:
         from _numpypy import multiarray as np
     except ImportError:
         skip('pypy built without _numpypy')
     module = self.import_module(name='buffer_test')
     get_buffer_info = module.get_buffer_info
     raises(ValueError, get_buffer_info, np.arange(5)[::2], ('SIMPLE', ))
     arr = np.zeros((1, 10), order='F')
     shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS'])
     assert strides[0] == 8
     arr = np.zeros((10, 1), order='C')
     shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS'])
     assert strides[-1] == 8
     dt1 = np.dtype([('a', 'b'), ('b', 'i'), ('sub0', np.dtype('b,i')),
                     ('sub1', np.dtype('b,i')), ('sub2', np.dtype('b,i')),
                     ('sub3', np.dtype('b,i')), ('sub4', np.dtype('b,i')),
                     ('sub5', np.dtype('b,i')), ('sub6', np.dtype('b,i')),
                     ('sub7', np.dtype('b,i')), ('c', 'i')], )
     x = np.arange(dt1.itemsize, dtype='int8').view(dt1)
     # pytest can catch warnings from v2.8 and up, we ship 2.5
     import warnings
     warnings.filterwarnings("error")
     try:
         try:
             y = get_buffer_info(x, ['SIMPLE'])
         except UserWarning as e:
             pass
         else:
             assert False ,"PyPy-specific UserWarning not raised" \
                       " on too long format string"
     finally:
         warnings.resetwarnings()
Beispiel #2
0
def csrmm0(x0, y0, z0, x1, y1, z1, z2n='tmp'):

    R = x0.size
    C = (y0.size + y1.size) * 10
    x2d = darray(z2n + '_x.npz', R, dtype=x0.dtype)
    y2d = darray(z2n + '_y.npz', C, dtype=y0.dtype)
    z2d = darray(z2n + '_z.npz', C, dtype=z0.dtype)

    x2, y2, z2 = x2d.dat, y2d.dat, z2d.dat
    # row value
    row_i = npy.zeros(R, z0.dtype)
    visit = npy.zeros(R, 'int8')
    key = npy.zeros(R, 'int32')
    z2_ptr = 0
    for i in xrange(x0.size - 1):
        ist, ied = x0[i:i + 2]
        if ist == ied:
            continue

        # get a[i,j], j
        i2 = 0
        for i0 in xrange(ist, ied):
            k = y0[i0]
            aik = z0[i0]
            kst, ked = x1[k:k + 1]
            #bi = z1[x1st: x1ed]
            for i1 in xrange(kst, ked):
                j = y1[i1]
                bkj = z1[i1]
                zij = aik * bkj
                row_i[j] += zij

                if visit[j] == 0:
                    key[i2] = j
                    i2 += 1
                    visit[j] = 1
        # add to z
        for i3 in xrange(i2):
            j = key[i3]
            zij = row_i[j]
            if zij != 0:

                if z2_ptr > z2.size:
                    y2d.resize()
                    z2d.resize()
                    y2, z2 = x2d.dat, z2d.dat

                y2[z2_ptr] = j
                z2[z2_ptr] = zij
                visit[j] = 0
                z2_ptr += 1

        x2[x2_ptr] = z2_ptr
        x2_ptr += 1
def editdistance(a, b):
    if a == NULL:
	a = ''
    if b == NULL:
	b = ''
    table =  np.zeros((len(a)+1, len(b)+1))
    for i in range(len(a)+1):
        table[i,0] = i
    for j in range(len(b)+1):
        table[0,j]  = j
    #print 'start'
    for i in range(1,len(a)+1):
        for j in range(1,len(b)+1):
            if a[i-1] == b[j-1]:
                table[i,j] = table[i-1, j-1]
            else:
                #print i, j
                diag = table[i - 1, j - 1] + 1
                #print 'diag', diag
                left = table[i - 1, j] + 1
                #print 'left', left
                top = table[i, j - 1] + 1
                #print 'top', top
                best = min(diag, top, left)
                #print 'best so far', best
                table[i, j] = best
                #print 'current cell', table[i, j]
    #print table
    return table[i, j]
Beispiel #4
0
def editdistance(a, b):
    if a == NULL:
        a = ''
    if b == NULL:
        b = ''
    table = np.zeros((len(a) + 1, len(b) + 1))
    for i in range(len(a) + 1):
        table[i, 0] = i
    for j in range(len(b) + 1):
        table[0, j] = j
    #print 'start'
    for i in range(1, len(a) + 1):
        for j in range(1, len(b) + 1):
            if a[i - 1] == b[j - 1]:
                table[i, j] = table[i - 1, j - 1]
            else:
                #print i, j
                diag = table[i - 1, j - 1] + 1
                #print 'diag', diag
                left = table[i - 1, j] + 1
                #print 'left', left
                top = table[i, j - 1] + 1
                #print 'top', top
                best = min(diag, top, left)
                #print 'best so far', best
                table[i, j] = best
                #print 'current cell', table[i, j]
    #print table
    return table[i, j]
Beispiel #5
0
 def main():
     import _numpypy.multiarray as np
     arr = np.zeros((300, 300))
     x = 150
     y = 0
     while y < 300:
         a = arr[x, y]
         y += 1
     return a
Beispiel #6
0
 def main():
     import _numpypy.multiarray as np
     arr = np.zeros((300, 300))
     x = 150
     y = 0
     while y < 300:
         a = arr[x, y]
         y += 1
     return a
Beispiel #7
0
 def main():
     import _numpypy.multiarray as np
     arr = np.zeros((1024, 16)) + 42
     ai = arr.flat
     i = 0
     while i < arr.size:
         a = ai[i]
         i += 1
     return a
Beispiel #8
0
 def main():
     import _numpypy.multiarray as np
     arr = np.zeros((1024, 16)) + 42
     ai = arr.flat
     i = 0
     while i < arr.size:
         a = ai[i]
         i += 1
     return a
Beispiel #9
0
def csrmm1(r0, c0, d0, r1, c1, d1, fn='tmp'):
    # r: row index
    # c: col index
    # d: data

    R = r0.size
    C = (c0.size + c1.size) * 10
    x2d = darray(fn + '_r.npz', R, dtype=x0.dtype)
    y2d = darray(fn + '_c.npz', C, dtype=y0.dtype)
    z2d = darray(fn + '_d.npz', C, dtype=z0.dtype)

    r2, c2, d2 = x2d.dat, y2d.dat, z2d.dat
    # values of ith row
    di = npy.zeros(R, z0.dtype)

    # set
    visit = set()

    for i in xrange(r0.size - 1):
        k00, k01 = r0[i:i + 2]
        if k00 == k01:
            continue

        for k0i in xrange(k00, k01):
            k = c0[k0i]
            a0ik = d0[k0i]

            k10, k11 = r1[k:k + 1]
            for k1i in xrange(k10, k11):
                j = c1[k1i]
                a1kj = d1[k1i]
                zij = aik * bkj
                row_i[j] += zij

                if visit[j] == 0:
                    key[i2] = j
                    i2 += 1
                    visit[j] = 1
        # add to z
        for i3 in xrange(i2):
            j = key[i3]
            zij = row_i[j]
            if zij != 0:

                if z2_ptr > z2.size:
                    y2d.resize()
                    z2d.resize()
                    y2, z2 = x2d.dat, z2d.dat

                y2[z2_ptr] = j
                z2[z2_ptr] = zij
                visit[j] = 0
                z2_ptr += 1

        x2[x2_ptr] = z2_ptr
        x2_ptr += 1
Beispiel #10
0
    def test_buffer_info(self):
        try:
            from _numpypy import multiarray as np
        except ImportError:
            skip("pypy built without _numpypy")
        module = self.import_module(name="buffer_test")
        get_buffer_info = module.get_buffer_info
        raises(ValueError, get_buffer_info, np.arange(5)[::2], ("SIMPLE",))
        arr = np.zeros((1, 10), order="F")
        shape, strides = get_buffer_info(arr, ["F_CONTIGUOUS"])
        assert strides[0] == 8
        arr = np.zeros((10, 1), order="C")
        shape, strides = get_buffer_info(arr, ["C_CONTIGUOUS"])
        assert strides[-1] == 8
        dt1 = np.dtype(
            [
                ("a", "b"),
                ("b", "i"),
                ("sub0", np.dtype("b,i")),
                ("sub1", np.dtype("b,i")),
                ("sub2", np.dtype("b,i")),
                ("sub3", np.dtype("b,i")),
                ("sub4", np.dtype("b,i")),
                ("sub5", np.dtype("b,i")),
                ("sub6", np.dtype("b,i")),
                ("sub7", np.dtype("b,i")),
                ("c", "i"),
            ]
        )
        x = np.arange(dt1.itemsize, dtype="int8").view(dt1)
        # pytest can catch warnings from v2.8 and up, we ship 2.5
        import warnings

        warnings.filterwarnings("error")
        try:
            try:
                y = get_buffer_info(x, ["SIMPLE"])
            except UserWarning as e:
                pass
            else:
                assert False, "PyPy-specific UserWarning not raised" " on too long format string"
        finally:
            warnings.resetwarnings()
Beispiel #11
0
 def test_buffer_info(self):
     try:
         from _numpypy import multiarray as np
     except ImportError:
         skip('pypy built without _numpypy')
     module = self.import_module(name='buffer_test')
     get_buffer_info = module.get_buffer_info
     raises(ValueError, get_buffer_info, np.arange(5)[::2], ('SIMPLE', ))
     arr = np.zeros((1, 10), order='F')
     shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS'])
     assert strides[0] == 8
     arr = np.zeros((10, 1), order='C')
     shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS'])
     assert strides[-1] == 8
     dt1 = np.dtype([('a', 'b'), ('b', 'i'), ('sub0', np.dtype('b,i')),
                     ('sub1', np.dtype('b,i')), ('sub2', np.dtype('b,i')),
                     ('sub3', np.dtype('b,i')), ('sub4', np.dtype('b,i')),
                     ('sub5', np.dtype('b,i')), ('sub6', np.dtype('b,i')),
                     ('sub7', np.dtype('b,i')), ('c', 'i')], )
     x = np.arange(dt1.itemsize, dtype='int8').view(dt1)
def display_best_alignment(ak, en, es):
    lk = len(en)
    mk = len(es)
    k_mat = np.zeros((mk, lk))
    for jk in range(lk):
        for ik in range(mk):
            k_mat[ik][jk] = delta[ak, ik, jk]
    print ' '.join(en)
    print ' '.join(es)
    for ik, max_jk in enumerate(np.argmax(k_mat, 1)):
        print ik, max_jk, corpus_target[ak][ik], corpus_source[ak][max_jk]
Beispiel #13
0
def display_best_alignment(ak, en, es):
    lk = len(en)
    mk = len(es)
    k_mat = np.zeros((mk, lk))
    for jk in range(lk):
        for ik in range(mk):
            k_mat[ik][jk] = delta[ak, ik, jk]
    print ' '.join(en)
    print ' '.join(es)
    for ik, max_jk in enumerate(np.argmax(k_mat, 1)):
        print ik, max_jk, corpus_target[ak][ik], corpus_source[ak][max_jk]
Beispiel #14
0
    def __init__(self,
                 capacity=1024,
                 load_factor=.75,
                 ksize=1,
                 ktype=nb.int64,
                 vtype=nb.int64):

        self.capacity = self.find_prime(capacity)
        self.load = load_factor
        self.size = 0
        self.ksize = ksize
        N = self.capacity

        self.keys = np.empty(N * ksize, dtype=ktype)
        self.values = np.empty(N, dtype=vtype)
        self.counts = np.zeros(N, dtype=nb.uint8)
Beispiel #15
0
    def resize(self):

        # get old arrays
        N = self.capacity
        ks = self.ksize
        keys_old, values_old, counts_old = self.keys, self.values, self.counts

        # get new arrays
        self.capacity = self.find_prime(np.int64(N * 1.62))
        M = self.capacity

        keys = np.empty(M * ks, dtype=keys_old.dtype)
        values = np.empty(M, dtype=values_old.dtype)
        counts = np.zeros(M, dtype=counts_old.dtype)

        for i in xrange(N):
            if counts_old[i] > 0:
                value = values_old[i]
                count = counts_old[i]
                ik = i * ks
                # new hash
                j, k = self.hash_(keys_old, ik, ks) % M, 0
                j_init = j
                for k in xrange(N):
                    jk = j * ks

                    if counts[j] == 0 or self.eq(keys, jk, keys_old, ik, ks):
                        break

                    j = (j_init + k * k) % M

                jk = j * ks
                keys[jk:jk + ks] = keys_old[ik:ik + ks]
                values[j] = value
                counts[j] = count

            else:
                continue

        self.keys = keys
        self.values = values
        self.counts = counts

        del keys_old, values_old, counts_old
     #pdb.set_trace()
     translations[t, s] = float(p)
 """
 EM iterations
 """
 for iter in range(10):
     counts = dict.fromkeys(counts.iterkeys(), 0.0)
     for k, tokens_source in enumerate(corpus_source):
         #print iter, k, len(delta), len(translations)
         sys.stdout.write(
             'iteration: %d sentence %d len delta %d len translations %d\r'
             % (iter, k, len(delta), len(translations)))
         sys.stdout.flush()
         tokens_target = corpus_target[k].split()
         tokens_source = tokens_source.split()
         t_mat = np.zeros((len(tokens_target), len(tokens_source)))
         for j in range(0, len(tokens_source)):
             for i in range(0, len(tokens_target)):
                 t_mat[i][j] = translations[tokens_target[i],
                                            tokens_source[j]]
         #t_sum = np.sum(t_mat, 1)
         t_sum = t_mat.sum(1)
         #print t_mat
         #print t_sum
         for j in range(0, len(tokens_source)):
             for i in range(0, len(tokens_target)):
                 delta[k, i, j] = t_mat[i][j] / t_sum[i]
                 counts[tokens_target[i], tokens_source[j]] = counts.get(
                     (tokens_target[i], tokens_source[j]),
                     0.0) + delta[k, i, j]
                 counts[tokens_source[j]] = counts.get(
Beispiel #17
0
 def main():
     N = 1500
     import _numpypy.multiarray as np
     arr = np.zeros(N)
     l = [arr[i] / 2. for i in range(N)]
     return l
Beispiel #18
0
    #pp(q)
    """
    EM iterations
    """

    for iter in range(10):
        print "iteration",iter
        counts = dict.fromkeys(counts.iterkeys(), 0.0)
        for k, tokens_source in enumerate(corpus_source):
            #print iter, k, len(delta), len(translations)
            sys.stderr.write('iteration: %d sentence %d len delta %d len translations %d\r' % (iter, k, len(delta), len(translations)))
            sys.stderr.flush()
            tokens_target = corpus_target[k]
            mk = len(tokens_target)
            lk = len(tokens_source)
            qt_mat = np.zeros((mk, lk))
            #print t_mat, t_mat.shape
            for j in range(0, lk):
                for i in range(0, mk):
                    qt_mat[i][j] = q[j, i, lk, mk] * translations[tokens_target[i], tokens_source[j]]
            qt_sum = qt_mat.sum(1)
            #print qt_mat, qt_sum
            for j in range(0, lk):
                for i in range(0, mk):
                    delta[k, i, j] = qt_mat[i][j] / qt_sum[i]
                    counts[tokens_target[i], tokens_source[j]] = counts.get((tokens_target[i], tokens_source[j]), 0.0) + delta[k, i, j]
                    counts[tokens_source[j]] = counts.get(tokens_source[j], 0.0) + delta[k, i, j]
                    counts[j, i, lk, mk] = counts.get((j, i, lk, mk), 0.0) + delta[k, i, j]
                    counts[i, lk, mk] = counts.get((i, lk, mk), 0.0) + delta[k, i, j]

        """
    EM iterations
    """

    for iter in range(10):
        print "iteration", iter
        counts = dict.fromkeys(counts.iterkeys(), 0.0)
        for k, tokens_source in enumerate(corpus_source):
            #print iter, k, len(delta), len(translations)
            sys.stderr.write(
                'iteration: %d sentence %d len delta %d len translations %d\r'
                % (iter, k, len(delta), len(translations)))
            sys.stderr.flush()
            tokens_target = corpus_target[k]
            mk = len(tokens_target)
            lk = len(tokens_source)
            qt_mat = np.zeros((mk, lk))
            #print t_mat, t_mat.shape
            for j in range(0, lk):
                for i in range(0, mk):
                    qt_mat[i][j] = q[j, i, lk,
                                     mk] * translations[tokens_target[i],
                                                        tokens_source[j]]
            qt_sum = qt_mat.sum(1)
            #print qt_mat, qt_sum
            for j in range(0, lk):
                for i in range(0, mk):
                    delta[k, i, j] = qt_mat[i][j] / qt_sum[i]
                    counts[tokens_target[i], tokens_source[j]] = counts.get(
                        (tokens_target[i], tokens_source[j]),
                        0.0) + delta[k, i, j]
                    counts[tokens_source[j]] = counts.get(
Beispiel #20
0
#    return ((x + (x >> 3)) & 030707070707) % 63


def nbit(n):
    x = n - ((n >> 1) & 3681400539) - ((n >> 2) & 1227133513)
    return ((x + (x >> 3)) & 3340530119) % 63


# the last char of the kmer
# A: 1
# T: 10
# G: 100
# C: 1000
# N: 10000
# $: 100000
lastc = np.zeros(256, dtype='int8')
lastc[ord('a')] = lastc[ord('A')] = 0b1
lastc[ord('t')] = lastc[ord('T')] = 0b10
lastc[ord('g')] = lastc[ord('G')] = 0b100
lastc[ord('c')] = lastc[ord('C')] = 0b1000
lastc[ord('n')] = lastc[ord('N')] = 0b10000
lastc[ord('$')] = 0b100000  # end of the sequence
lastc[ord('#')] = 0b000000

offbit = int(math.log(max(lastc), 2)) + 1
#print('offbit', offbit, bin(max(lastc)))
lowbit = int('0b' + '1' * offbit, 2)
#offbit = 6
#print('offbit', offbit, 'low bin', bin(lowbit))

# reverse next character table
     #pdb.set_trace()
     translations[t, s] = float(p)
 """
 EM iterations
 """
 for iter in range(10):
     counts = dict.fromkeys(counts.iterkeys(), 0.0)
     for k, tokens_source in enumerate(corpus_source):
         #print iter, k, len(delta), len(translations)
         sys.stdout.write(
             'iteration: %d sentence %d len delta %d len translations %d\r'
             % (iter, k, len(delta), len(translations)))
         sys.stdout.flush()
         tokens_target = corpus_target[k].split()
         tokens_source = tokens_source.split()
         t_mat = np.zeros((len(tokens_target), len(tokens_source)))
         for j in range(0, len(tokens_source)):
             for i in range(0, len(tokens_target)):
                 t_mat[i][j] = translations[tokens_target[i],
                                            tokens_source[j]]
         #t_sum = np.sum(t_mat, 1)
         t_sum = t_mat.sum(1)
         #print t_mat
         #print t_sum
         for j in range(0, len(tokens_source)):
             for i in range(0, len(tokens_target)):
                 delta[k, i, j] = t_mat[i][j] / t_sum[i]
                 counts[tokens_target[i], tokens_source[j]] = counts.get(
                     (tokens_target[i], tokens_source[j]),
                     0.0) + delta[k, i, j]
                 counts[tokens_source[j]] = counts.get(
Beispiel #22
0
 def main():
     N = 1500
     import _numpypy.multiarray as np
     arr = np.zeros(N)
     l = [arr[i]/2. for i in range(N)]
     return l