def finalize_labels(labels): """Ensure that labels are root or point to a root.""" for i in range(labels.size - 1, -1, -1): i = numba.int_(i) anc = i while anc != labels[anc]: anc = numba.int_(labels[anc]) while labels[i] != anc: i_prev = i labels[i_prev] = anc i = numba.int_(labels[i])
def _nb_vector_editdistance(indices, seqs_mat, seqs_L, distance_matrix=identity_nb_distance_matrix, gap_penalty=1): """This function works OK on its own. Wrapping it with the above python function was a workaround because joblib and multiprocessing seem to have an issue retaining default arguments with numba functions.""" assert seqs_mat.shape[0] == seqs_L.shape[0] mx_L = nb.int_(np.max(seqs_L)) dist = np.zeros(indices.shape[0], dtype=np.int16) """As long as ldmat is big enough to accomodate the largest sequence its OK to only use part of it for the smaller sequences NOTE that to create a 2D array it must be created 1D and reshaped""" ldmat = np.zeros(mx_L * mx_L, dtype=np.int16).reshape((mx_L, mx_L)) for ind_i in nb.prange(indices.shape[0]): query_i = indices[ind_i, 0] seq_i = indices[ind_i, 1] q_L = seqs_L[query_i] s_L = seqs_L[seq_i] if q_L == s_L: """No gaps: substitution distance This will make it differ from a strict edit-distance since the optimal edit-distance may insert same number of gaps in both sequences""" #tmp_dist = 0 for i in range(q_L): dist[ind_i] += distance_matrix[seqs_mat[query_i, i], seqs_mat[seq_i, i]] #dist[ind_i] = tmp_dist continue """Do not need to re-zero each time""" # ldmat = np.zeros((q_L, s_L), dtype=np.int16) for row in range(1, q_L): ldmat[row, 0] = row * gap_penalty for col in range(1, s_L): ldmat[0, col] = col * gap_penalty for col in range(1, s_L): for row in range(1, q_L): ldmat[row, col] = min( ldmat[row - 1, col] + gap_penalty, ldmat[row, col - 1] + gap_penalty, ldmat[row - 1, col - 1] + distance_matrix[seqs_mat[query_i, row - 1], seqs_mat[seq_i, col - 1]]) # substitution dist[ind_i] = ldmat[row, col] return dist
def test_unpacking(self): lunpack = jit(int_(list_(int_, 2)))(unpack) tunpack = jit(int_(tuple_(int_, 2)))(unpack) tounpack = jit(int_(tuple_(object_, 2)))(unpack) iunpack = jit(int_(object_))(unpack) sunpack = jit(int_(object_))(unpack) punpack = jit(int_(shape_t), wrap=False)(unpack) self.assertEqual(lunpack([5, 6]), 30) self.assertEqual(tunpack((5, 6)), 30) self.assertEqual(tounpack((5, 6)), 30) # self.assertEqual(iunpack(Iterable()), 30) self.assertEqual(sunpack(Sequence()), 30) c_punpack = nb.addressof(punpack) self.assertEqual(c_punpack(A.ctypes.shape), 30)
from __future__ import print_function from timeit import default_timer as time import cProfile import pstats import numpy as np import numba as nb import viscid from viscid import readers from viscid import field from viscid.calculator import seed # nb.float_ = nb.template("nb.float_") @nb.jit(nb.int_(nb.float_[:], nb.float_, nb.int_[:], nb.int_), nopython=True) def closest_ind(crd, point, startinds, m): i = 0 fallback = 0 n = crd.shape[0] forward = n > 1 and crd[1] > crd[0] if startinds[m] < 0: start = 0 elif startinds[m] > n - 1: start = n - 1 else: start = startinds[m] # search linearly... maybe branch prediction makes this better
i = 0 for elem in x: i += 1 if elem > 9: break return i @jit def fill(a): for i in range(len(a)): a[i] += 1 return a @jit(int_(int_[:])) def call_loop(a): s = 0 for x in fill(a): s += x return s class TestLoops(unittest.TestCase): def test_obj_loop1(self): self.assertTrue(obj_loop1(np.array([[None]*10]*10), 1) == 100) def test_obj_loop2(self): self.assertTrue(obj_loop2([1, 2, 3, 10]) == 4) self.assertTrue(obj_loop2(range(100)) == 11)
i = 0 for elem in x: i += 1 if elem > 9: break return i @jit def fill(a): for i in range(len(a)): a[i] += 1 return a @jit(int_(int_[:])) def call_loop(a): s = 0 for x in fill(a): s += x return s class TestLoops(unittest.TestCase): def test_obj_loop1(self): self.assertTrue(obj_loop1(np.array([[None] * 10] * 10), 1) == 100) def test_obj_loop2(self): self.assertTrue(obj_loop2([1, 2, 3, 10]) == 4) self.assertTrue(obj_loop2(range(100)) == 11)
def _nb_running_editdistance(query_i, seqs_mat, seqs_L, radius, density_est=0.05, distance_matrix=identity_nb_distance_matrix, gap_penalty=1): assert seqs_mat.shape[0] == seqs_L.shape[0] q_L = seqs_L[query_i] mx_L = np.max(seqs_L) """Chunk size for allocating array space to hold neighbors: should be a minimum of 100 and a max of seqs_mat.shape[0]""" chunk_sz = min(max(int((density_est/2) * seqs_mat.shape[0]) + 1, 100), seqs_mat.shape[0]) neighbor_count = 0 neighbors = np.zeros(chunk_sz, dtype=np.uint32) nndists = np.zeros(chunk_sz, dtype=np.int16) """As long as ldmat is big enough to accomodate the largest sequence its OK to only use part of it for the smaller sequences NOTE that to create a 2D array it must be created 1D anfd reshaped""" ldmat = np.zeros(nb.int_(q_L) * nb.int_(mx_L), dtype=np.int16).reshape((q_L, mx_L)) for seq_i in range(seqs_mat.shape[0]): # query_i = indices[ind_i, 0] # seq_i = indices[ind_i, 1] s_L = seqs_L[seq_i] len_diff = abs(q_L - s_L) tot_gap_penalty = len_diff * gap_penalty if len_diff == 0: """No gaps: substitution distance This will make it differ from a strict edit-distance since the optimal edit-distance may insert same number of gaps in both sequences""" tmp_dist = 0 for i in range(q_L): tmp_dist += distance_matrix[seqs_mat[query_i, i], seqs_mat[seq_i, i]] if tmp_dist <= radius: neighbors[neighbor_count] = seq_i nndists[neighbor_count] = tmp_dist neighbor_count += 1 if neighbor_count >= neighbors.shape[0]: neighbors = np.concatenate((neighbors, np.zeros(chunk_sz, dtype=np.uint32))) nndists = np.concatenate((nndists, np.zeros(chunk_sz, dtype=np.int16))) #print(f'quiting1 on {seq_i}: dist={tmp_dist}') continue elif tot_gap_penalty > radius: #print(f'quiting2 on {seq_i}: gap_penalty={tot_gap_penalty}') continue """Do not need to re-zero each time""" # ldmat = np.zeros((q_L, s_L), dtype=np.int16) for row in range(1, q_L): ldmat[row, 0] = row * gap_penalty for col in range(1, s_L): ldmat[0, col] = col * gap_penalty for col in range(1, s_L): for row in range(1, q_L): ldmat[row, col] = min(ldmat[row-1, col] + gap_penalty, ldmat[row, col-1] + gap_penalty, ldmat[row-1, col-1] + distance_matrix[seqs_mat[query_i, row-1], seqs_mat[seq_i, col-1]]) # substitution if ldmat[row, col] <= radius: """Means that the nested loops finished withour BREAKing""" neighbors[neighbor_count] = seq_i nndists[neighbor_count] = ldmat[row, col] neighbor_count += 1 if neighbor_count >= neighbors.shape[0]: neighbors = np.concatenate((neighbors, np.zeros(chunk_sz, dtype=np.uint32))) nndists = np.concatenate((nndists, np.zeros(chunk_sz, dtype=np.int16))) else: pass #print(f'quiting3 on {seq_i}: dist={ldmat[row, col]}') return neighbors[:neighbor_count], nndists[:neighbor_count]
import numpy as np import ipdb import numba from numba import int_ as int_ from numba import float64 as float_ from scipy.sparse.linalg import LinearOperator @numba.njit(int_(int_, int_)) def min(x1, x2): if x2 < x1: return x2 return x1 @numba.njit(int_(int_, int_)) def max(x1, x2): if x1 > x2: return x1 return x2 @numba.njit(int_(int_)) def factorial(n): y = 1 for i in range(n): y *= (i + 1) return y
Refer to [1]_. References ---------- .. [1] Trapzoidal rule, wikipedia, https://en.wikipedia.org/wiki/Trapezoidal_rule """ n = x.size dx = np.empty(n, dtype=np.double) for i in range(1, n - 1): dx[i] = 0.5 * (x[i + 1] - x[i - 1]) dx[0] = 0.5 * (x[1] - x[0]) dx[n - 1] = 0.5 * (x[n - 1] - x[n - 2]) sum = 0.0 for i in range(n): sum += dx[i] * integrand[i] return sum ################################################################################ # whether to compile them using numba's LLVM ################################################################################ if Cst.isJIT == True: Trapze = nb.jit([ nb.float64(nb.float64[:], nb.float64[:]), nb.int_(nb.int_[:], nb.int_[:]) ], nopython=True)(Trapze)