def test_buffer_info(self): try: from _numpypy import multiarray as np except ImportError: skip('pypy built without _numpypy') module = self.import_module(name='buffer_test') get_buffer_info = module.get_buffer_info raises(ValueError, get_buffer_info, np.arange(5)[::2], ('SIMPLE', )) arr = np.zeros((1, 10), order='F') shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS']) assert strides[0] == 8 arr = np.zeros((10, 1), order='C') shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS']) assert strides[-1] == 8 dt1 = np.dtype([('a', 'b'), ('b', 'i'), ('sub0', np.dtype('b,i')), ('sub1', np.dtype('b,i')), ('sub2', np.dtype('b,i')), ('sub3', np.dtype('b,i')), ('sub4', np.dtype('b,i')), ('sub5', np.dtype('b,i')), ('sub6', np.dtype('b,i')), ('sub7', np.dtype('b,i')), ('c', 'i')], ) x = np.arange(dt1.itemsize, dtype='int8').view(dt1) # pytest can catch warnings from v2.8 and up, we ship 2.5 import warnings warnings.filterwarnings("error") try: try: y = get_buffer_info(x, ['SIMPLE']) except UserWarning as e: pass else: assert False ,"PyPy-specific UserWarning not raised" \ " on too long format string" finally: warnings.resetwarnings()
def csrmm0(x0, y0, z0, x1, y1, z1, z2n='tmp'): R = x0.size C = (y0.size + y1.size) * 10 x2d = darray(z2n + '_x.npz', R, dtype=x0.dtype) y2d = darray(z2n + '_y.npz', C, dtype=y0.dtype) z2d = darray(z2n + '_z.npz', C, dtype=z0.dtype) x2, y2, z2 = x2d.dat, y2d.dat, z2d.dat # row value row_i = npy.zeros(R, z0.dtype) visit = npy.zeros(R, 'int8') key = npy.zeros(R, 'int32') z2_ptr = 0 for i in xrange(x0.size - 1): ist, ied = x0[i:i + 2] if ist == ied: continue # get a[i,j], j i2 = 0 for i0 in xrange(ist, ied): k = y0[i0] aik = z0[i0] kst, ked = x1[k:k + 1] #bi = z1[x1st: x1ed] for i1 in xrange(kst, ked): j = y1[i1] bkj = z1[i1] zij = aik * bkj row_i[j] += zij if visit[j] == 0: key[i2] = j i2 += 1 visit[j] = 1 # add to z for i3 in xrange(i2): j = key[i3] zij = row_i[j] if zij != 0: if z2_ptr > z2.size: y2d.resize() z2d.resize() y2, z2 = x2d.dat, z2d.dat y2[z2_ptr] = j z2[z2_ptr] = zij visit[j] = 0 z2_ptr += 1 x2[x2_ptr] = z2_ptr x2_ptr += 1
def editdistance(a, b): if a == NULL: a = '' if b == NULL: b = '' table = np.zeros((len(a)+1, len(b)+1)) for i in range(len(a)+1): table[i,0] = i for j in range(len(b)+1): table[0,j] = j #print 'start' for i in range(1,len(a)+1): for j in range(1,len(b)+1): if a[i-1] == b[j-1]: table[i,j] = table[i-1, j-1] else: #print i, j diag = table[i - 1, j - 1] + 1 #print 'diag', diag left = table[i - 1, j] + 1 #print 'left', left top = table[i, j - 1] + 1 #print 'top', top best = min(diag, top, left) #print 'best so far', best table[i, j] = best #print 'current cell', table[i, j] #print table return table[i, j]
def editdistance(a, b): if a == NULL: a = '' if b == NULL: b = '' table = np.zeros((len(a) + 1, len(b) + 1)) for i in range(len(a) + 1): table[i, 0] = i for j in range(len(b) + 1): table[0, j] = j #print 'start' for i in range(1, len(a) + 1): for j in range(1, len(b) + 1): if a[i - 1] == b[j - 1]: table[i, j] = table[i - 1, j - 1] else: #print i, j diag = table[i - 1, j - 1] + 1 #print 'diag', diag left = table[i - 1, j] + 1 #print 'left', left top = table[i, j - 1] + 1 #print 'top', top best = min(diag, top, left) #print 'best so far', best table[i, j] = best #print 'current cell', table[i, j] #print table return table[i, j]
def main(): import _numpypy.multiarray as np arr = np.zeros((300, 300)) x = 150 y = 0 while y < 300: a = arr[x, y] y += 1 return a
def main(): import _numpypy.multiarray as np arr = np.zeros((1024, 16)) + 42 ai = arr.flat i = 0 while i < arr.size: a = ai[i] i += 1 return a
def csrmm1(r0, c0, d0, r1, c1, d1, fn='tmp'): # r: row index # c: col index # d: data R = r0.size C = (c0.size + c1.size) * 10 x2d = darray(fn + '_r.npz', R, dtype=x0.dtype) y2d = darray(fn + '_c.npz', C, dtype=y0.dtype) z2d = darray(fn + '_d.npz', C, dtype=z0.dtype) r2, c2, d2 = x2d.dat, y2d.dat, z2d.dat # values of ith row di = npy.zeros(R, z0.dtype) # set visit = set() for i in xrange(r0.size - 1): k00, k01 = r0[i:i + 2] if k00 == k01: continue for k0i in xrange(k00, k01): k = c0[k0i] a0ik = d0[k0i] k10, k11 = r1[k:k + 1] for k1i in xrange(k10, k11): j = c1[k1i] a1kj = d1[k1i] zij = aik * bkj row_i[j] += zij if visit[j] == 0: key[i2] = j i2 += 1 visit[j] = 1 # add to z for i3 in xrange(i2): j = key[i3] zij = row_i[j] if zij != 0: if z2_ptr > z2.size: y2d.resize() z2d.resize() y2, z2 = x2d.dat, z2d.dat y2[z2_ptr] = j z2[z2_ptr] = zij visit[j] = 0 z2_ptr += 1 x2[x2_ptr] = z2_ptr x2_ptr += 1
def test_buffer_info(self): try: from _numpypy import multiarray as np except ImportError: skip("pypy built without _numpypy") module = self.import_module(name="buffer_test") get_buffer_info = module.get_buffer_info raises(ValueError, get_buffer_info, np.arange(5)[::2], ("SIMPLE",)) arr = np.zeros((1, 10), order="F") shape, strides = get_buffer_info(arr, ["F_CONTIGUOUS"]) assert strides[0] == 8 arr = np.zeros((10, 1), order="C") shape, strides = get_buffer_info(arr, ["C_CONTIGUOUS"]) assert strides[-1] == 8 dt1 = np.dtype( [ ("a", "b"), ("b", "i"), ("sub0", np.dtype("b,i")), ("sub1", np.dtype("b,i")), ("sub2", np.dtype("b,i")), ("sub3", np.dtype("b,i")), ("sub4", np.dtype("b,i")), ("sub5", np.dtype("b,i")), ("sub6", np.dtype("b,i")), ("sub7", np.dtype("b,i")), ("c", "i"), ] ) x = np.arange(dt1.itemsize, dtype="int8").view(dt1) # pytest can catch warnings from v2.8 and up, we ship 2.5 import warnings warnings.filterwarnings("error") try: try: y = get_buffer_info(x, ["SIMPLE"]) except UserWarning as e: pass else: assert False, "PyPy-specific UserWarning not raised" " on too long format string" finally: warnings.resetwarnings()
def test_buffer_info(self): try: from _numpypy import multiarray as np except ImportError: skip('pypy built without _numpypy') module = self.import_module(name='buffer_test') get_buffer_info = module.get_buffer_info raises(ValueError, get_buffer_info, np.arange(5)[::2], ('SIMPLE', )) arr = np.zeros((1, 10), order='F') shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS']) assert strides[0] == 8 arr = np.zeros((10, 1), order='C') shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS']) assert strides[-1] == 8 dt1 = np.dtype([('a', 'b'), ('b', 'i'), ('sub0', np.dtype('b,i')), ('sub1', np.dtype('b,i')), ('sub2', np.dtype('b,i')), ('sub3', np.dtype('b,i')), ('sub4', np.dtype('b,i')), ('sub5', np.dtype('b,i')), ('sub6', np.dtype('b,i')), ('sub7', np.dtype('b,i')), ('c', 'i')], ) x = np.arange(dt1.itemsize, dtype='int8').view(dt1)
def display_best_alignment(ak, en, es): lk = len(en) mk = len(es) k_mat = np.zeros((mk, lk)) for jk in range(lk): for ik in range(mk): k_mat[ik][jk] = delta[ak, ik, jk] print ' '.join(en) print ' '.join(es) for ik, max_jk in enumerate(np.argmax(k_mat, 1)): print ik, max_jk, corpus_target[ak][ik], corpus_source[ak][max_jk]
def __init__(self, capacity=1024, load_factor=.75, ksize=1, ktype=nb.int64, vtype=nb.int64): self.capacity = self.find_prime(capacity) self.load = load_factor self.size = 0 self.ksize = ksize N = self.capacity self.keys = np.empty(N * ksize, dtype=ktype) self.values = np.empty(N, dtype=vtype) self.counts = np.zeros(N, dtype=nb.uint8)
def resize(self): # get old arrays N = self.capacity ks = self.ksize keys_old, values_old, counts_old = self.keys, self.values, self.counts # get new arrays self.capacity = self.find_prime(np.int64(N * 1.62)) M = self.capacity keys = np.empty(M * ks, dtype=keys_old.dtype) values = np.empty(M, dtype=values_old.dtype) counts = np.zeros(M, dtype=counts_old.dtype) for i in xrange(N): if counts_old[i] > 0: value = values_old[i] count = counts_old[i] ik = i * ks # new hash j, k = self.hash_(keys_old, ik, ks) % M, 0 j_init = j for k in xrange(N): jk = j * ks if counts[j] == 0 or self.eq(keys, jk, keys_old, ik, ks): break j = (j_init + k * k) % M jk = j * ks keys[jk:jk + ks] = keys_old[ik:ik + ks] values[j] = value counts[j] = count else: continue self.keys = keys self.values = values self.counts = counts del keys_old, values_old, counts_old
#pdb.set_trace() translations[t, s] = float(p) """ EM iterations """ for iter in range(10): counts = dict.fromkeys(counts.iterkeys(), 0.0) for k, tokens_source in enumerate(corpus_source): #print iter, k, len(delta), len(translations) sys.stdout.write( 'iteration: %d sentence %d len delta %d len translations %d\r' % (iter, k, len(delta), len(translations))) sys.stdout.flush() tokens_target = corpus_target[k].split() tokens_source = tokens_source.split() t_mat = np.zeros((len(tokens_target), len(tokens_source))) for j in range(0, len(tokens_source)): for i in range(0, len(tokens_target)): t_mat[i][j] = translations[tokens_target[i], tokens_source[j]] #t_sum = np.sum(t_mat, 1) t_sum = t_mat.sum(1) #print t_mat #print t_sum for j in range(0, len(tokens_source)): for i in range(0, len(tokens_target)): delta[k, i, j] = t_mat[i][j] / t_sum[i] counts[tokens_target[i], tokens_source[j]] = counts.get( (tokens_target[i], tokens_source[j]), 0.0) + delta[k, i, j] counts[tokens_source[j]] = counts.get(
def main(): N = 1500 import _numpypy.multiarray as np arr = np.zeros(N) l = [arr[i] / 2. for i in range(N)] return l
#pp(q) """ EM iterations """ for iter in range(10): print "iteration",iter counts = dict.fromkeys(counts.iterkeys(), 0.0) for k, tokens_source in enumerate(corpus_source): #print iter, k, len(delta), len(translations) sys.stderr.write('iteration: %d sentence %d len delta %d len translations %d\r' % (iter, k, len(delta), len(translations))) sys.stderr.flush() tokens_target = corpus_target[k] mk = len(tokens_target) lk = len(tokens_source) qt_mat = np.zeros((mk, lk)) #print t_mat, t_mat.shape for j in range(0, lk): for i in range(0, mk): qt_mat[i][j] = q[j, i, lk, mk] * translations[tokens_target[i], tokens_source[j]] qt_sum = qt_mat.sum(1) #print qt_mat, qt_sum for j in range(0, lk): for i in range(0, mk): delta[k, i, j] = qt_mat[i][j] / qt_sum[i] counts[tokens_target[i], tokens_source[j]] = counts.get((tokens_target[i], tokens_source[j]), 0.0) + delta[k, i, j] counts[tokens_source[j]] = counts.get(tokens_source[j], 0.0) + delta[k, i, j] counts[j, i, lk, mk] = counts.get((j, i, lk, mk), 0.0) + delta[k, i, j] counts[i, lk, mk] = counts.get((i, lk, mk), 0.0) + delta[k, i, j] """
EM iterations """ for iter in range(10): print "iteration", iter counts = dict.fromkeys(counts.iterkeys(), 0.0) for k, tokens_source in enumerate(corpus_source): #print iter, k, len(delta), len(translations) sys.stderr.write( 'iteration: %d sentence %d len delta %d len translations %d\r' % (iter, k, len(delta), len(translations))) sys.stderr.flush() tokens_target = corpus_target[k] mk = len(tokens_target) lk = len(tokens_source) qt_mat = np.zeros((mk, lk)) #print t_mat, t_mat.shape for j in range(0, lk): for i in range(0, mk): qt_mat[i][j] = q[j, i, lk, mk] * translations[tokens_target[i], tokens_source[j]] qt_sum = qt_mat.sum(1) #print qt_mat, qt_sum for j in range(0, lk): for i in range(0, mk): delta[k, i, j] = qt_mat[i][j] / qt_sum[i] counts[tokens_target[i], tokens_source[j]] = counts.get( (tokens_target[i], tokens_source[j]), 0.0) + delta[k, i, j] counts[tokens_source[j]] = counts.get(
# return ((x + (x >> 3)) & 030707070707) % 63 def nbit(n): x = n - ((n >> 1) & 3681400539) - ((n >> 2) & 1227133513) return ((x + (x >> 3)) & 3340530119) % 63 # the last char of the kmer # A: 1 # T: 10 # G: 100 # C: 1000 # N: 10000 # $: 100000 lastc = np.zeros(256, dtype='int8') lastc[ord('a')] = lastc[ord('A')] = 0b1 lastc[ord('t')] = lastc[ord('T')] = 0b10 lastc[ord('g')] = lastc[ord('G')] = 0b100 lastc[ord('c')] = lastc[ord('C')] = 0b1000 lastc[ord('n')] = lastc[ord('N')] = 0b10000 lastc[ord('$')] = 0b100000 # end of the sequence lastc[ord('#')] = 0b000000 offbit = int(math.log(max(lastc), 2)) + 1 #print('offbit', offbit, bin(max(lastc))) lowbit = int('0b' + '1' * offbit, 2) #offbit = 6 #print('offbit', offbit, 'low bin', bin(lowbit)) # reverse next character table
def main(): N = 1500 import _numpypy.multiarray as np arr = np.zeros(N) l = [arr[i]/2. for i in range(N)] return l