Example #1
0
def finalize_labels(labels):
    """Ensure that labels are root or point to a root."""
    for i in range(labels.size - 1, -1, -1):
        i = numba.int_(i)
        anc = i

        while anc != labels[anc]:
            anc = numba.int_(labels[anc])

        while labels[i] != anc:
            i_prev = i
            labels[i_prev] = anc
            i = numba.int_(labels[i])
Example #2
0
def _nb_vector_editdistance(indices,
                            seqs_mat,
                            seqs_L,
                            distance_matrix=identity_nb_distance_matrix,
                            gap_penalty=1):
    """This function works OK on its own. Wrapping it with the above python function was a workaround because
    joblib and multiprocessing seem to have an issue retaining default arguments with numba functions."""
    assert seqs_mat.shape[0] == seqs_L.shape[0]
    mx_L = nb.int_(np.max(seqs_L))

    dist = np.zeros(indices.shape[0], dtype=np.int16)
    """As long as ldmat is big enough to accomodate the largest sequence
    its OK to only use part of it for the smaller sequences
    NOTE that to create a 2D array it must be created 1D and reshaped"""
    ldmat = np.zeros(mx_L * mx_L, dtype=np.int16).reshape((mx_L, mx_L))
    for ind_i in nb.prange(indices.shape[0]):
        query_i = indices[ind_i, 0]
        seq_i = indices[ind_i, 1]

        q_L = seqs_L[query_i]
        s_L = seqs_L[seq_i]
        if q_L == s_L:
            """No gaps: substitution distance
            This will make it differ from a strict edit-distance since
            the optimal edit-distance may insert same number of gaps in both sequences"""
            #tmp_dist = 0
            for i in range(q_L):
                dist[ind_i] += distance_matrix[seqs_mat[query_i, i],
                                               seqs_mat[seq_i, i]]
            #dist[ind_i] = tmp_dist
            continue
        """Do not need to re-zero each time"""
        # ldmat = np.zeros((q_L, s_L), dtype=np.int16)
        for row in range(1, q_L):
            ldmat[row, 0] = row * gap_penalty

        for col in range(1, s_L):
            ldmat[0, col] = col * gap_penalty

        for col in range(1, s_L):
            for row in range(1, q_L):
                ldmat[row, col] = min(
                    ldmat[row - 1, col] + gap_penalty,
                    ldmat[row, col - 1] + gap_penalty,
                    ldmat[row - 1, col - 1] +
                    distance_matrix[seqs_mat[query_i, row - 1],
                                    seqs_mat[seq_i, col - 1]])  # substitution
        dist[ind_i] = ldmat[row, col]
    return dist
Example #3
0
    def test_unpacking(self):
        lunpack  = jit(int_(list_(int_, 2)))(unpack)
        tunpack  = jit(int_(tuple_(int_, 2)))(unpack)
        tounpack = jit(int_(tuple_(object_, 2)))(unpack)
        iunpack  = jit(int_(object_))(unpack)
        sunpack  = jit(int_(object_))(unpack)
        punpack  = jit(int_(shape_t), wrap=False)(unpack)

        self.assertEqual(lunpack([5, 6]), 30)
        self.assertEqual(tunpack((5, 6)), 30)
        self.assertEqual(tounpack((5, 6)), 30)
        # self.assertEqual(iunpack(Iterable()), 30)
        self.assertEqual(sunpack(Sequence()), 30)

        c_punpack = nb.addressof(punpack)
        self.assertEqual(c_punpack(A.ctypes.shape), 30)
Example #4
0
    def test_unpacking(self):
        lunpack = jit(int_(list_(int_, 2)))(unpack)
        tunpack = jit(int_(tuple_(int_, 2)))(unpack)
        tounpack = jit(int_(tuple_(object_, 2)))(unpack)
        iunpack = jit(int_(object_))(unpack)
        sunpack = jit(int_(object_))(unpack)
        punpack = jit(int_(shape_t), wrap=False)(unpack)

        self.assertEqual(lunpack([5, 6]), 30)
        self.assertEqual(tunpack((5, 6)), 30)
        self.assertEqual(tounpack((5, 6)), 30)
        # self.assertEqual(iunpack(Iterable()), 30)
        self.assertEqual(sunpack(Sequence()), 30)

        c_punpack = nb.addressof(punpack)
        self.assertEqual(c_punpack(A.ctypes.shape), 30)
Example #5
0
from __future__ import print_function
from timeit import default_timer as time
import cProfile
import pstats

import numpy as np
import numba as nb

import viscid
from viscid import readers
from viscid import field
from viscid.calculator import seed

# nb.float_ = nb.template("nb.float_")

@nb.jit(nb.int_(nb.float_[:], nb.float_, nb.int_[:], nb.int_),
        nopython=True)
def closest_ind(crd, point, startinds, m):
    i = 0
    fallback = 0
    n = crd.shape[0]
    forward = n > 1 and crd[1] > crd[0]

    if startinds[m] < 0:
        start = 0
    elif startinds[m] > n - 1:
        start = n - 1
    else:
        start = startinds[m]

    # search linearly... maybe branch prediction makes this better
Example #6
0
    i = 0
    for elem in x:
        i += 1
        if elem > 9:
            break
    return i


@jit
def fill(a):
    for i in range(len(a)):
        a[i] += 1
    return a


@jit(int_(int_[:]))
def call_loop(a):
    s = 0
    for x in fill(a):
        s += x
    return s


class TestLoops(unittest.TestCase):

    def test_obj_loop1(self):
        self.assertTrue(obj_loop1(np.array([[None]*10]*10), 1) == 100)

    def test_obj_loop2(self):
        self.assertTrue(obj_loop2([1, 2, 3, 10]) == 4)
        self.assertTrue(obj_loop2(range(100)) == 11)
Example #7
0
    i = 0
    for elem in x:
        i += 1
        if elem > 9:
            break
    return i


@jit
def fill(a):
    for i in range(len(a)):
        a[i] += 1
    return a


@jit(int_(int_[:]))
def call_loop(a):
    s = 0
    for x in fill(a):
        s += x
    return s


class TestLoops(unittest.TestCase):
    def test_obj_loop1(self):
        self.assertTrue(obj_loop1(np.array([[None] * 10] * 10), 1) == 100)

    def test_obj_loop2(self):
        self.assertTrue(obj_loop2([1, 2, 3, 10]) == 4)
        self.assertTrue(obj_loop2(range(100)) == 11)
Example #8
0
def _nb_running_editdistance(query_i, seqs_mat, seqs_L, radius, density_est=0.05, distance_matrix=identity_nb_distance_matrix, gap_penalty=1):
    assert seqs_mat.shape[0] == seqs_L.shape[0]
    q_L = seqs_L[query_i]
    mx_L = np.max(seqs_L)

    """Chunk size for allocating array space to hold neighbors: should be a minimum of 100 and a max of seqs_mat.shape[0]"""
    chunk_sz = min(max(int((density_est/2) * seqs_mat.shape[0]) + 1, 100), seqs_mat.shape[0])

    neighbor_count = 0
    neighbors = np.zeros(chunk_sz, dtype=np.uint32)
    nndists = np.zeros(chunk_sz, dtype=np.int16)

    """As long as ldmat is big enough to accomodate the largest sequence
    its OK to only use part of it for the smaller sequences
    NOTE that to create a 2D array it must be created 1D anfd reshaped"""
    ldmat = np.zeros(nb.int_(q_L) * nb.int_(mx_L), dtype=np.int16).reshape((q_L, mx_L))
    for seq_i in range(seqs_mat.shape[0]):
        # query_i = indices[ind_i, 0]
        # seq_i = indices[ind_i, 1]
        
        s_L = seqs_L[seq_i]
        len_diff = abs(q_L - s_L)
        tot_gap_penalty = len_diff * gap_penalty

        if len_diff == 0:
            """No gaps: substitution distance
            This will make it differ from a strict edit-distance since
            the optimal edit-distance may insert same number of gaps in both sequences"""
            tmp_dist = 0
            for i in range(q_L):
                tmp_dist += distance_matrix[seqs_mat[query_i, i], seqs_mat[seq_i, i]]
            if tmp_dist <= radius:
                neighbors[neighbor_count] = seq_i
                nndists[neighbor_count] = tmp_dist
                neighbor_count += 1
                if neighbor_count >= neighbors.shape[0]:
                    neighbors = np.concatenate((neighbors, np.zeros(chunk_sz, dtype=np.uint32)))
                    nndists = np.concatenate((nndists, np.zeros(chunk_sz, dtype=np.int16)))
            #print(f'quiting1 on {seq_i}: dist={tmp_dist}')
            continue
        elif tot_gap_penalty > radius:
            #print(f'quiting2 on {seq_i}: gap_penalty={tot_gap_penalty}')
            continue
    
        """Do not need to re-zero each time"""
        # ldmat = np.zeros((q_L, s_L), dtype=np.int16)
        for row in range(1, q_L):
            ldmat[row, 0] = row * gap_penalty

        for col in range(1, s_L):
            ldmat[0, col] = col * gap_penalty
            

        for col in range(1, s_L):
            for row in range(1, q_L):
                ldmat[row, col] = min(ldmat[row-1, col] + gap_penalty,
                                     ldmat[row, col-1] + gap_penalty,
                                     ldmat[row-1, col-1] + distance_matrix[seqs_mat[query_i, row-1], seqs_mat[seq_i, col-1]]) # substitution

        if ldmat[row, col] <= radius:
            """Means that the nested loops finished withour BREAKing"""
            neighbors[neighbor_count] = seq_i
            nndists[neighbor_count] = ldmat[row, col]
            neighbor_count += 1
            if neighbor_count >= neighbors.shape[0]:
                neighbors = np.concatenate((neighbors, np.zeros(chunk_sz, dtype=np.uint32)))
                nndists = np.concatenate((nndists, np.zeros(chunk_sz, dtype=np.int16)))
        else:
            pass
            #print(f'quiting3 on {seq_i}: dist={ldmat[row, col]}')
    return neighbors[:neighbor_count], nndists[:neighbor_count]
import numpy as np
import ipdb
import numba
from numba import int_ as int_
from numba import float64 as float_
from scipy.sparse.linalg import LinearOperator


@numba.njit(int_(int_, int_))
def min(x1, x2):
    if x2 < x1:
        return x2
    return x1


@numba.njit(int_(int_, int_))
def max(x1, x2):
    if x1 > x2:
        return x1

    return x2


@numba.njit(int_(int_))
def factorial(n):

    y = 1
    for i in range(n):
        y *= (i + 1)

    return y
Example #10
0
    Refer to [1]_.

    References
    ----------
    .. [1] Trapzoidal rule, wikipedia, https://en.wikipedia.org/wiki/Trapezoidal_rule
    """

    n = x.size
    dx = np.empty(n, dtype=np.double)
    for i in range(1, n - 1):
        dx[i] = 0.5 * (x[i + 1] - x[i - 1])
    dx[0] = 0.5 * (x[1] - x[0])
    dx[n - 1] = 0.5 * (x[n - 1] - x[n - 2])

    sum = 0.0
    for i in range(n):
        sum += dx[i] * integrand[i]
    return sum


################################################################################
# whether to compile them using numba's LLVM
################################################################################

if Cst.isJIT == True:
    Trapze = nb.jit([
        nb.float64(nb.float64[:], nb.float64[:]),
        nb.int_(nb.int_[:], nb.int_[:])
    ],
                    nopython=True)(Trapze)