Ejemplo n.º 1
0
def zoom_numbaThread(data,  chunkIndices, zoomArray):
    """
    2-D zoom interpolation using purely python - fast if compiled with numba.
    Both the array to zoom and the output array are required as arguments, the
    zoom level is calculated from the size of the new array.

    Parameters:
        array (ndarray): The 2-D array to zoom
        zoomArray (ndarray): The array to place the calculation

    Returns:
        interpArray (ndarray): A pointer to the calculated ``zoomArray''
    """

    for i in range(chunkIndices[0], chunkIndices[1]):
        x = i*numba.float32(data.shape[0]-1)/(zoomArray.shape[0]-0.99999999)
        x1 = numba.int32(x)
        for j in range(zoomArray.shape[1]):
            y = j*numba.float32(data.shape[1]-1)/(zoomArray.shape[1]-0.99999999)
            y1 = numba.int32(y)

            xGrad1 = data[x1+1, y1] - data[x1, y1]
            a1 = data[x1, y1] + xGrad1*(x-x1)

            xGrad2 = data[x1+1, y1+1] - data[x1, y1+1]
            a2 = data[x1, y1+1] + xGrad2*(x-x1)

            yGrad = a2 - a1
            zoomArray[i,j] = a1 + yGrad*(y-y1)


    return zoomArray
Ejemplo n.º 2
0
def bilinear_interp_numba_inbounds(data, xCoords, yCoords, chunkIndices, interpArray):
    """
    2-D interpolation using purely python - fast if compiled with numba
    This version also accepts a parameter specifying how much of the array
    to operate on. This is useful for multi-threading applications.

    Parameters:
        array (ndarray): The 2-D array to interpolate
        xCoords (ndarray): A 1-D array of x-coordinates
        yCoords (ndarray): A 2-D array of y-coordinates
        chunkIndices (ndarray): A 2 element array, with (start Index, stop Index) to work on for the x-dimension.
        interpArray (ndarray): The array to place the calculation

    Returns:
        interpArray (ndarray): A pointer to the calculated ``interpArray''
    """
    jRange = range(yCoords.shape[0])
    for i in range(chunkIndices[0], chunkIndices[1]):
        x = xCoords[i]
        x1 = numba.int32(x)
        for j in jRange:
            y = yCoords[j]
            y1 = numba.int32(y)

            xGrad1 = data[x1 + 1, y1] - data[x1, y1]
            a1 = data[x1, y1] + xGrad1 * (x - x1)

            xGrad2 = data[x1 + 1, y1 + 1] - data[x1, y1 + 1]
            a2 = data[x1, y1 + 1] + xGrad2 * (x - x1)

            yGrad = a2 - a1
            interpArray[i, j] = a1 + yGrad * (y - y1)
    return interpArray
Ejemplo n.º 3
0
def cu_mat_power(A, power, power_A):
    y, x = cuda.grid(2)

    m, n = power_A.shape
    if x >= n or y >= m:
        return

    power_A[y, x] = math.pow(A[y, x], int32(power))
Ejemplo n.º 4
0
def geometric_propagation_numba(phase_screens, metapupil_coords, output_phase, thread_indices):
    """
    2-D interpolation using purely python - fast if compiled with numba
    This version also accepts a parameter specifying how much of the array
    to operate on. This is useful for multi-threading applications.

    Parameters:
        array (ndarray): The 2-D array to interpolate
        xCoords (ndarray): A 1-D array of x-coordinates
        yCoords (ndarray): A 2-D array of y-coordinates
        chunkIndices (ndarray): A 2 element array, with (start Index, stop Index) to work on for the x-dimension.
        interpArray (ndarray): The array to place the calculation

    Returns:
        interpArray (ndarray): A pointer to the calculated ``interpArray''
    """

    jRange = range(metapupil_coords.shape[0])
    for layer in range(phase_screens.shape[0]):
        if metapupil_coords[layer, 0, -1] == phase_screens.shape[1] - 1:
            metapupil_coords[layer, 0, -1] -= 1e-6
        if metapupil_coords[layer, 1, -1] == phase_screens.shape[2] - 1:
            metapupil_coords[layer, 1, -1] -= 1e-6
        for i in range(thread_indices[0], thread_indices[1]):
            print(i)
            x = metapupil_coords[layer, 0, i]
            x1 = numba.int32(x)
            for j in jRange:
                y = metapupil_coords[layer, 1, j]
                y1 = numba.int32(y)

                print(layer, x, y)

                xGrad1 = phase_screens[layer, x1 + 1, y1] - phase_screens[layer, x1, y1]
                a1 = phase_screens[layer, x1, y1] + xGrad1 * (x - x1)

                xGrad2 = phase_screens[layer, x1 + 1, y1 + 1] - phase_screens[layer, x1, y1 + 1]
                a2 = phase_screens[layer, x1, y1 + 1] + xGrad2 * (x - x1)

                yGrad = a2 - a1

                output_phase[i, j] += a1 + yGrad * (y - y1)

    return output_phase
Ejemplo n.º 5
0
 def test_4(self):
     sig = [
         int32(int32, int32),
         uint32(uint32, uint32),
         float32(float32, float32),
         float64(float64, float64),
     ]
     func = self.funcs['func3']
     A = np.arange(100, dtype=np.float64)
     self._run_and_compare(func, sig, A, A)
     A = A.astype(np.float32)
     self._run_and_compare(func, sig, A, A)
     A = A.astype(np.int32)
     self._run_and_compare(func, sig, A, A)
     A = A.astype(np.uint32)
     self._run_and_compare(func, sig, A, A)
Ejemplo n.º 6
0
    def _test_template_4(self, target):
        sig = [int32(int32, int32),
               uint32(uint32, uint32),
               float32(float32, float32),
               float64(float64, float64)]
        basic_ufunc = vectorize(sig, target=target)(vector_add)
        np_ufunc = np.add

        def test(ty):
            data = np.linspace(0., 100., 500).astype(ty)
            result = basic_ufunc(data, data)
            gold = np_ufunc(data, data)
            np.testing.assert_allclose(gold, result)

        test(np.double)
        test(np.float32)
        test(np.int32)
        test(np.uint32)
def sentiment_feature(basic_train,basic_test):

    @guvectorize([(int64[:] )], '(n),()->(n)')
    def compute_scores(x):
        if x['极性'] =='0' or x['极性'] =='3':
            return 0
        flag =(1 if x['极性']=='1' else -1)
        return  flag * int(x['强度'])
    @vectorize([int32()])
    def compute_sentiment_scores(line):
        words = np.array(line.split(' '))
        mask = sentiment['词语'].isin(words)
        if mask.any() == False :
            return 0
        else:
            return sum(sentiment[mask].apply(compute_scores,axis=1))

    with open('sentiment_word.csv') as file:
        result = [line.strip().split('\t')  for line in file.readlines()]
    for line in result:
        if len(line)>10:
            result.remove(line)
    sentiment = pd.DataFrame(columns=result[0],data=result[1:])
    sentiment['词语']=sentiment['词语'].map(lambda x:x.decode("utf-8"))
    sentiment.columns = range(10)
    sentiment.drop([7,8,9],axis=1,inplace=True)
    sentiment.columns = ['词语','词性','词义数','词义序号','分类','强度','极性']
    sentiment.drop(['词性','词义数','词义序号','分类'],axis=1,inplace=True)
    train  = basic_train[['clean&segment','pid']].copy()
    test   = basic_test[['clean&segment','pid']].copy()

    train['sentiment'] = train['clean&segment'].map(compute_sentiment_scores)
    test['sentiment']  = test['clean&segment'].map(compute_sentiment_scores)

    train.drop('clean&segment',axis=1,inplace=True)
    test.drop('clean&segment',axis=1,inplace=True)

    train.set_index('pid',inplace=True)
    test.set_index('pid',inplace=True)
    return train,test
Ejemplo n.º 8
0
def template_vectorize(self, target):
    # build basic native code ufunc
    sig = [int32(int32, int32),
           uint32(uint32, uint32),
           float32(float32, float32),
           float64(float64, float64)]
    basic_ufunc = vectorize(sig, target=target)(vector_add)

    # build python ufunc
    np_ufunc = np.add

    # test it out
    def test(ty):
        data = np.linspace(0., 100., 500).astype(ty)
        result = basic_ufunc(data, data)
        gold = np_ufunc(data, data)
        self.assertTrue(np.allclose(gold, result))

    test(np.double)
    test(np.float32)
    test(np.int32)
    test(np.uint32)
Ejemplo n.º 9
0
    return wij, B, proportions

@njit
def calc_mult(w, p):
        n = w.shape[0]
        wij=np.zeros((n,n), dtype=np.int32)
        for i in range(n):        
                if w[i]!=0:
                    wij[i,:]=np.random.multinomial(w[i], p[i])
                else:
                    wij[i,:]=np.zeros(n)    
        return wij

        
#from numba import vectorize, int64, float64
@vectorize([int32(int32, int32)])
def vec_randunif(l, h):
    return np.random.uniform(l,h)

@njit
#@jit(locals={'new_observation': numba.types.int32[:]}, nopython=True)  
def calc(observation, action, price, action_L, high, p, distance_ij, lost_sales_cost  ):
    
    epsilons = vec_randunif(action_L, high)
    demand = action + epsilons
    w = np.minimum(demand, observation)
    wij = calc_mult(w, p)

    num_lost_sales = demand - w
    dwij=np.multiply(distance_ij, wij)
    
Ejemplo n.º 10
0
    xc -= xc_floor
    for i in range(yc_floor.shape[0]):
        for j in range(yc_floor.shape[1]):
            yf = min(Ly - 1, max(0, yc_floor[i, j]))
            xf = min(Lx - 1, max(0, xc_floor[i, j]))
            yf1 = min(Ly - 1, yf + 1)
            xf1 = min(Lx - 1, xf + 1)
            y = yc[i, j]
            x = xc[i, j]
            Y[i, j] = (np.float32(I[yf, xf]) * (1 - y) * (1 - x) +
                       np.float32(I[yf, xf1]) * (1 - y) * x +
                       np.float32(I[yf1, xf]) * y * (1 - x) +
                       np.float32(I[yf1, xf1]) * y * x)


@vectorize([int32(float32)], nopython=True)
def nfloor(y):
    return math.floor(y)  #np.int32(np.floor(y))


@njit([
    'int16[:, :,:], float32[:,:,:], float32[:,:,:], float32[:,:], float32[:,:], float32[:,:,:]',
    'float32[:, :,:], float32[:,:,:], float32[:,:,:], float32[:,:], float32[:,:], float32[:,:,:]'
],
      parallel=True)
def shift_coordinates(data, yup, xup, mshy, mshx, Y):
    """ shift data into yup and xup coordinates

    Parameters
    -------------
Ejemplo n.º 11
0
    if thresholds is None:
        thresholds = cfg.SZO.EVALUATION.THRESHOLDS
    assert 5 == prediction.ndim
    assert 5 == truth.ndim
    assert prediction.shape == truth.shape
    assert prediction.shape[2] == 1
    thresholds = [rainfall_to_pixel(thresholds[i]) for i in range(len(thresholds))]
    thresholds = sorted(thresholds)
    ret = _get_hit_miss_counts_numba(prediction=prediction,
                                     truth=truth,
                                     mask=mask,
                                     thresholds=thresholds)
    return ret[:, :, :, 0], ret[:, :, :, 1], ret[:, :, :, 2], ret[:, :, :, 3]


@jit(int32(float32, float32, boolean, float32))
def _get_hit_miss_counts_numba(prediction, truth, mask, thresholds):
    seqlen, batch_size, _, height, width = prediction.shape
    threshold_num = len(thresholds)
    ret = np.zeros(shape=(seqlen, batch_size, threshold_num, 4), dtype=np.int32)

    for i in range(seqlen):
        for j in range(batch_size):
            for m in range(height):
                for n in range(width):
                    if mask[i][j][0][m][n]:
                        for k in range(threshold_num):
                            bpred = prediction[i][j][0][m][n] >= thresholds[k]
                            btruth = truth[i][j][0][m][n] >= thresholds[k]
                            ind = (1 - btruth) * 2 + (1 - bpred)
                            ret[i][j][k][ind] += 1
Ejemplo n.º 12
0
 def hist(val, vmin, vptp, vres):
     return int32(((val - vmin) / vptp) * vres)
Ejemplo n.º 13
0
    def decode(self):
        width = self.ihdr_info['width']
        height = self.ihdr_info['height']
        bit_depth = self.ihdr_info['bit depth']    
        color_type = self.ihdr_info['color type']
        pixel_width = self.__pixel_width
        
        def ifilter0(m, N, buf, bpp):
            '''Type 0: No filter'''
            return 

        @numba.jit((int32, int32, int32[:, :], int32))
        def ifilter1(m, N, buf, bpp):
            '''Type 1: Inverse Sub filter'''
            for k in range(bpp, N):
                buf[m, k] += buf[m, k-bpp]
                buf[m, k] &= 0xff

        @numba.jit((int32, int32, int32[:, :], int32))
        def ifilter2(m, N, buf, bpp):
            '''Type 2: Inverse Up filter'''
            buf[m, :] += buf[(m-1), :]
            buf[m, :] &= 0xff

        @numba.jit((int32, int32, int32[:, :], int32))
        def ifilter3(m, N, buf, bpp):
            '''Type 3: Inverse Average filter'''
            for k in range(bpp):
                buf[m, k] += buf[(m-1), k] // 2
                buf[m, k] &= 0xff
            for k in range(bpp, N):
                buf[m, k] += (buf[m, (k-bpp)] + buf[(m-1), k]) // 2
                buf[m, k] &= 0xff
        
        @numba.jit(int32(int32, int32, int32))
        def predictor(a, b, c):
            '''Helper function for ifilter4.
a = left, b = above, c = upper left.'''
            p = a + b -c
            pa = abs(p - a)
            pb = abs(p - b)
            pc = abs(p - c)
            if pa <= pb and pa <= pc:
                return a
            elif pb <= pc:
                return b
            else:
                return c    

        @numba.jit((int32, int32, int32[:, :], int32))
        def ifilter4(m, N, buf, bpp):
            '''Type 4: Inverse Paeth filter'''
            for k in range(bpp):
                buf[m, k] += buf[(m-1), k]
                buf[m, k] &= 0xff
            for k in range(bpp, N):
                buf[m, k] += predictor(buf[m, (k-bpp)], buf[(m-1), k], buf[(m-1), (k-bpp)])
                buf[m, k] &= 0xff

        def ifilter(byte_stream):
            '''inverse filter
                before: decompressed data stream
                width:  width of the image
                height: height of the image
                return value: data stream which has been inverse filtered'''

            bwidth = int(math.ceil(width * bit_depth * pixel_width[color_type] / 8.0))
            bpp = self.__bpp
            filter_list = [ifilter0, ifilter1, ifilter2, ifilter3, ifilter4]
            buf = np.empty((height+1, bwidth+1), dtype=np.int)
            buf[0, :] = 0
            buf[1:, :] = np.reshape(np.fromstring(byte_stream, dtype=np.ubyte), (height, bwidth+1))
            for m in range(1, height+1):
                filter_type = buf[m, 0]
                if filter_type == 0: continue
                filter_list[filter_type](m, bwidth, buf[:, 1:], bpp)
            byte_mtx = np.empty((height, bwidth), dtype=np.ubyte)
            byte_mtx[:, :] = buf[1:, 1:]
            return byte_mtx

        def split_byte(b, width):
            mask = 2**width - 1
            li = []
            for k in range(8//width):
                li.append(b & mask)
                b >>= width
            li.reverse()
            return li

        def bytes_to_pixels(mtx, bit_depth, img_width):
#            if bit_depth < 8:
#                for idx, line in enumerate(mtx):
#                    pixels = []
#                    for B in line:
#                        pixels.extend(split_byte(B, bit_depth))
#                    pixels = pixels[:img_width]
#                    mtx[idx] = pixels
#            if bit_depth == 16:
#                for idx, line in enumerate(mtx):
#                    pixels = []
#                    for k in range(img_width):
#                        pixels.append(line[2*k]*2**8 + line[2*k+1])
#                    mtx[idx] = pixels
            if bit_depth == 8:
                return mtx
            if bit_depth == 16:
                return mtx.view(np.ushort) # reinterpret_cast 
            else:
                raise NotImplementedError

        com_stream = StringIO()
        with open(self.__filename, 'rb') as f:
            for chunk in self.__chunks:
                if chunk['type'] == 'IDAT':
                    f.seek(chunk['data pos'])
                    com_stream.write(f.read(chunk['len']))
        byte_stream = zlib.decompress(com_stream.getvalue())
        pix_mtx = ifilter(byte_stream)
        pix_mtx = bytes_to_pixels(pix_mtx, bit_depth, width)
        

        
        pixel_type = np.ubyte if bit_depth <=8 else np.ushort
        
        pix_mtx = np.array(pix_mtx, dtype=pixel_type)
        pix_mtx.shape = (height, width, pixel_width[color_type])
        return pix_mtx
Ejemplo n.º 14
0
    r = linspace(xmin, xmax, width)
    i = linspace(ymin, ymax, height)
    n = [[0] * width for _ in range(height)]
    for x in range(width):
        for y in range(height):
            n[y][x] = mandel_numba(complex(r[x], i[y]), maxiter)

    return n


##############################################################################

#Numba Vectorize


@vectorize([int32(complex64, int32)], target='parallel')
def mandel_numba_vect(c, maxiter):
    nreal = 0
    real = 0
    imag = 0
    for n in range(maxiter):
        nreal = real * real - imag * imag + c.real
        imag = 2 * real * imag + c.imag
        real = nreal
        if real * real + imag * imag > 4.0:  #squared modulus
            return n
    return n


def mandel_set_numba_vect(xmin, xmax, ymin, ymax, width, height, maxiter):
    r1 = np.linspace(xmin, xmax, width, dtype=np.float32)
Ejemplo n.º 15
0
    Args:
        x1 (array): First component of vector 1
        y1 (array): Second component of vector 1
        z1 (array): Third component of vector 1
        x2 (array): First component of vector 2
        y2 (array): Second component of vector 2
        z2 (array): Third component of vector 2

    Returns:
        r2 (array): Element-wise squared distance (see definition)

    .. math::

        r2 = (x1 - x2)^{2} + (y1 - y2)^{2} + (z1 - z2)^{2}
    '''
    return (x1 - x2)**2 + (y1 - y2)**2 + (z1 - z2)**2


if global_config['pkg_numba']:
    from numba import vectorize, float64, float32, int64, int32
    vmag3 = vectorize([int32(int32, int32, int32),
                       int64(int64, int64, int64),
                       float32(float32, float32, float32),
                       float64(float64, float64, float64)])(vmag3)

    vdist3 = vectorize([int32(int32, int32, int32, int32, int32, int32),
                        int64(int64, int64, int64, int64, int64, int64),
                        float32(float32, float32, float32, float32, float32, float32),
                        float64(float64, float64, float64, float64, float64, float64)])(vdist3)
Ejemplo n.º 16
0
from numba import njit
from numba import int32, float32, prange
from numba.core import types
from numba import typeof
from numba.typed import List, Dict
from numba.core.errors import TypingError
from numba.tests.support import (TestCase, MemoryLeakMixin, override_config,
                                 forbid_codegen, skip_parfors_unsupported)

from numba.core.unsafe.refcount import get_refcount
from numba.experimental import jitclass

# global typed-list for testing purposes
global_typed_list = List.empty_list(int32)
for i in (1, 2, 3):
    global_typed_list.append(int32(i))


def to_tl(l):
    """ Convert cpython list to typed-list. """
    tl = List.empty_list(int32)
    for k in l:
        tl.append(k)
    return tl


class TestTypedList(MemoryLeakMixin, TestCase):
    def test_basic(self):
        l = List.empty_list(int32)
        # len
        self.assertEqual(len(l), 0)
Ejemplo n.º 17
0
import numpy as np

from numba import vectorize
from numba import cuda, int32, float32, float64
from numba.cuda.testing import skip_on_cudasim
from numba.cuda.testing import CUDATestCase
from numba.core import config
import unittest

sig = [
    int32(int32, int32),
    float32(float32, float32),
    float64(float64, float64)
]

target = "cuda"
if config.ENABLE_CUDASIM:
    target = "cpu"

test_dtypes = np.float32, np.int32


@skip_on_cudasim("ufunc API unsupported in the simulator")
class TestCUDAVectorize(CUDATestCase):
    N = 1000001

    def test_scalar(self):
        @vectorize(sig, target=target)
        def vector_add(a, b):
            return a + b
Ejemplo n.º 18
0
                    eq_t[h] += 1.0 / nb_best_hand

    # impossible : error
    else:
        return -1

    # normalize eq_w_agg and eq_t_agg
    for h in xrange(p):
        eq_agg[h, 0] = eq_w[h] / n
        eq_agg[h, 1] = eq_t[h] / n

    return eq_agg


rank_fast = jit(
    int32(int32[:], int32[:], uint32[:], int32[:], int32, int32, int32[:],
          int32[:], int32[:]))(rank)
exhaustive_block_fast = jit(int32[:](int32[:, :], int32[:], int32[:],
                                     uint32[:], int32[:], int32, int32,
                                     int32[:], int32[:],
                                     int32[:]))(exhaustive_block)


def exhaustive_eval(player_card, table_card):
    """compute all possible games given the player/table cards (as a numbers from 0 to 51) and return equity win/tie for each player"""

    p = player_card.shape[0]
    equity_arr = np.zeros([p, 2], dtype=np.float32)

    print '\n---------------- Exhaustive eval start'
    print 'player_card=\n{}'.format(player_card)
    print 'table_card=\n{}'.format(table_card)
Ejemplo n.º 19
0
    neighbours[6, 0] = i + 1
    neighbours[7, 0] = i + 1

    neighbours[0, 1] = j + 1
    neighbours[1, 1] = j - 1
    neighbours[2, 1] = j
    neighbours[3, 1] = j
    neighbours[4, 1] = j + 1
    neighbours[5, 1] = j - 1
    neighbours[6, 1] = j + 1
    neighbours[7, 1] = j - 1

    neighbours %= grid_length


@jit(int32(int8[:, :], int32[:, :]), nopython=True)
def count_neighbours(in_grid, neighbours):
    """Count the number of live neighbours of the site."""
    count = np.int32(0)
    for n_count in range(neighbours.shape[0]):
        if in_grid[neighbours[n_count, 0], neighbours[n_count, 1]] == 1:
            count += 1
    return count


@jit((int32[:, :], int8[:, :], int8[:, :]), nopython=True)
def grid_sweep(neighbours, in_grid, out_grid):
    """Sweep the grid once with game of life rules."""
    for i in range(in_grid.shape[0]):
        for j in range(in_grid.shape[1]):
Ejemplo n.º 20
0
                j += 1
                t[j] = t0

            # Update t0, y0, z0
            t0, y0, z0 = t1, y1, z1
        # end

        # Update y if last y0 is greater than (or equal) threshold
        if cmp2(h, abs(y0 - y[t[j]])):
            j += 1
            t[j] = t0
        return j + 1
    return findrfc2


@jit(int32(float64, float64), nopython=True)
def a_le_b(a, b):
    return a <= b


@jit(int32(float64, float64), nopython=True)
def a_lt_b(a, b):
    return a < b


_findrfc_le = _make_findrfc(a_le_b, a_lt_b)
_findrfc_lt = _make_findrfc(a_lt_b, a_le_b)


@jit(int64(int64[:], float64[:], float64), nopython=True)
def _findrfc(ind, y, h):
Ejemplo n.º 21
0
def cuda_ij_to_k(i, j):
    return int32(j + i * (i - 1) / 2)
Ejemplo n.º 22
0
def argcast(a, b):
    return argcast_inner(int32(a), b)
Ejemplo n.º 23
0
@jit(int64(int32, int32))
def EncodeMorton2D(x, y):
    """
    Calculates the 2D morton code from the x, y dimensions
    
    Args:
        x (int): the x dimension
        y (int): the y dimension
        
    Returns:
        int: 64 bit morton code in 2D

    """
    return Expand2D(x) + (Expand2D(y) << 1)

@jit(int32(int64))
def Compact2D(m):
    """
    Decodes the 64 bit morton code into a 32 bit number in the 2D space using
    a divide and conquer approach for separating the bits. 
    1 bit is not used because the integers are not unsigned
    
    Args:
        n (int): a 64 bit morton code
        
    Returns:
        int: a dimension in 2D space
        
    Raises:
        Exception: ERROR: Morton code is always positive
    """
Ejemplo n.º 24
0
            ret += d**2
        return ret

    @cuda.jit(nb_float(nb_float[:], nb_float[:], nb_float[:]), device=True)
    def cu_pbc_dist(a, b, box):
        ret = 0
        for i in range(a.shape[0]):
            d = a[i] - b[i]
            d -= box[i] * floor(d / box[i] + 0.5)
            ret += d**2
        return sqrt(ret)

    return cu_pbc_dist2, cu_pbc_dist_diameter, cu_pbc_dist


@cuda.jit(int32(int32[:], int32[:]), device=True)
def cu_ravel_index_f_pbc(i, dim):  # ravel index in Fortran way.
    ret = (i[0] + dim[0]) % dim[0]
    tmp = dim[0]
    for k in range(1, dim.shape[0]):
        ret += ((i[k] + dim[k]) % dim[k]) * tmp
        tmp *= dim[k]
    return ret


@cuda.jit(void(int32, int32[:], int32[:]), device=True)
def cu_unravel_index_f(i, dim, ret):  # unravel index in Fortran way.
    for k in range(dim.shape[0]):
        ret[k] = int(i % dim[k])
        i = (i - ret[k]) / dim[k]
Ejemplo n.º 25
0
 def foo():
     l = listobject.new_list(types.unicode_type)
     l.append(int32(0))
Ejemplo n.º 26
0
def make_test_list():
    l = listobject.new_list(int32)
    l.append(int32(1))
    return l
Ejemplo n.º 27
0
        xy + \\text{trunc}\\left(\\frac{\\left(\\left|x - y\\right| -
            1\\right)^{2}}{4}\\right)

    Args:
        x (array): First value array
        y (array): Second value array

    Returns:
        p (array): Pairing function result

    Note:
        This function has a vectorized version that is imported as
        :func:`~exa.algorithms.indexing.unordered_pairing`; use that
        function when working with array data.

    .. _pairing function: http://www.mattdipasquale.com/blog/2014/03/09/unique-unordered-pairing-function/
    '''
    return np.int64(x * y + np.trunc((np.abs(x - y) - 1)**2 / 4))


if global_config['pkg_numba']:
    from numba import jit, vectorize, int32, int64, float32, float64
    arange1 = jit(nopython=True, cache=True)(arange1)
    arange2 = jit(nopython=True, cache=True)(arange2)
    indexes_sc1 = jit(nopython=True, cache=True)(indexes_sc1)
    indexes_sc2 = jit(nopython=True, cache=True)(indexes_sc2)
    unordered_pairing = vectorize([int32(int32, int32), int64(int64, int64),
                                   float32(float32, float32), float64(float64, float64)],
                                   nopython=True)(unordered_pairing)
Ejemplo n.º 28
0
 def foo():
     l = make_test_list()
     l._make_immutable()
     l.append(int32(1))
Ejemplo n.º 29
0
#! /usr/bin/python
# -*- coding: utf-8 -*-
# ising.py

# Version: 2018.11.16.01

# Requiered libraries
from __future__ import division
import time, random, math, time, sys, os
import matplotlib.pyplot as plt
from numba import jit, prange, njit, int32
import numpy as np
from numpy.random import random as nrand


@njit(int32(int32, int32, int32))  # Periodic Boundary Condition
def PBC(idx, lim, add):
    return (idx + lim + add) % lim


# Monte Carlo Alg
def MC(temperature, spins,
       MCc):  # Temperature, Number of Spins, Number of cycles
    MSpins = np.ones((spins, spins), np.int32)  # Spins Matrix pointing up

    # Initialize Statistics
    E = 0.
    Eavr = 0.
    Evar = 0.
    E2av = 0.
    Mavr = 0.
Ejemplo n.º 30
0
""" Core math functions to compute escape times for the Mandelbrot set. """

from numba import float64, int32, jit
from numpy import empty

@jit(int32(float64, float64, int32))
def mandelbrot_escape(x, y, n):
    """Mandelbrot set escape time algorithm for a given c = x + i*y coordinate.

    Returnautojit,  the number of iterations necessary to escape abouve a fixed threshold
    (4.0) by repeatedly applying the formula:

    z_0 = 0
    z_n = z_{n-1} ^ 2 + c

    If the formula did not escape after `n` iterations, return -1 .

    Parameters
    ----------
    x, y -- float
        Real and imaginary part of the complex number z.

    n -- integer
        Maximum number of iterations.
    """

    z_x = 0
    z_y = 0

    for i in range(n):
        old_z_x = z_x
Ejemplo n.º 31
0
from __future__ import print_function, absolute_import

import numpy as np

from numba import vectorize
from numba import cuda, int32, float32, float64
from numba import unittest_support as unittest
from numba.cuda.testing import skip_on_cudasim
from numba.cuda.testing import CUDATestCase
from numba import config

sig = [int32(int32, int32),
       float32(float32, float32),
       float64(float64, float64)]


target='cuda'
if config.ENABLE_CUDASIM:
    target='cpu'


test_dtypes = np.float32, np.int32


@skip_on_cudasim('ufunc API unsupported in the simulator')
class TestCUDAVectorize(CUDATestCase):
    N = 1000001

    def test_scalar(self):

        @vectorize(sig, target=target)
Ejemplo n.º 32
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 25 20:50:52 2019

@author: daniel
"""

from numba import jit, vectorize, guvectorize, float64, complex64, int32, float32, int64
import numpy as np


@jit(int32(complex64, int32))
def mandelbrot(c, maxiter):
    nreal = 0
    real = 0
    imag = 0
    for n in range(maxiter):
        nreal = real * real - imag * imag + c.real
        imag = 2 * real * imag + c.imag
        real = nreal
        if real * real + imag * imag > 4.0:
            return n
    return 0


@guvectorize([(complex64[:], int32[:], int32[:])],
             '(n),()->(n)',
             target='parallel')
def mandelbrot_numpy(c, maxit, output):
    maxiter = maxit[0]
Ejemplo n.º 33
0
                    if current_interval == result_buffer_size:
                        return current_interval

            # Add bounds to result buffer
            result_buffer[current_interval, 0] = current_interval_start
            result_buffer[current_interval, 1] = itv_end
            current_interval += 1

            if current_interval == result_buffer_size:
                return current_interval

    n_intervals = current_interval      # No +1, as current_interval was incremented also when the last interval closed
    return n_intervals


@numba.jit(numba.int32(numba.float64[:], numba.float64, numba.int64[:, :]),
           nopython=True)
def find_intervals_above_threshold_no_splitting(w, threshold, result_buffer):
    """Fills result_buffer with l, r bounds of intervals in w > threshold.
    Unlike find_intervals_above_threshold(), does not smooth and split hits,
    which allows speed increase in ZLE simulation.
    :param w: Waveform to do hitfinding in
    :param threshold: Threshold for including an interval
    :param result_buffer: numpy N*2 array of ints, will be filled by function.
                          if more than N intervals are found, none past the first N will be processed.
    :returns : number of intervals processed
    Boundary indices are inclusive, i.e. the right boundary is the last index which was > threshold
    """
    result_buffer_size = len(result_buffer)
    last_index_in_w = len(w) - 1
Ejemplo n.º 34
0
#!/usr/bin/env python
# coding: utf-8

import numpy as np
from numba import jit, guvectorize, complex128, int32

# burning ship set


@jit(int32(complex128, int32))
def burning_ship_iter(z, maxiter):
    nreal = 0
    real = 0
    imag = 0
    for n in range(maxiter):
        real2 = real * real
        imag2 = imag * imag
        if real2 + imag2 > 4.0:
            return n
        imag = abs(2 * real * imag + z.imag)
        real = abs(real2 - imag2 + z.real)
    return 0


@guvectorize([(complex128[:], int32[:], int32[:])],
             '(n),()->(n)',
             target='parallel')
def burning_ship_numpy(z, maxit, output):
    maxiter = maxit[0]
    for i in range(z.shape[0]):
        output[i] = burning_ship_iter(z[i], maxiter)
Ejemplo n.º 35
0
from numba import vectorize, int32, complex128

from __init__ import plot, field

IMAX = 0xFFFF


@vectorize([int32(complex128)], target="parallel")
def mandelbrot_vector(c):
    z = 0
    n = 0
    while abs(z) <= 2 and n < IMAX:
        z = z * z + c
        n += 1
    return n


def main():
    f = field(1024)
    m = mandelbrot_vector(f)
    m[m == IMAX] = 0
    plot(m, "numba_vectorize.png")


if __name__ == "__main__":
    main()
Ejemplo n.º 36
0
def cu_delay_cfun(horizon,
                  cfpre,
                  cfpost,
                  n_cvar,
                  n_thread_per_block,
                  step_stride=0,
                  aff_node_stride=0):
    "Construct CUDA device function for delayed coupling with given pre & post summation functions."

    if horizon < 2 or (horizon & (horizon - 1)) != 0:
        msg = "cu_delay_cfun argument `horizon` should be a positive power of 2, but received %d"
        msg %= horizon
        raise ValueError(msg)

    # 0 except for testing
    step_stride = int32(step_stride)
    aff_node_stride = int32(aff_node_stride)

    @cuda.jit(device=True)
    def dcfun(aff, delays, weights, state, i_post, i_thread, step, cvars,
              buf):  #, delayed_step):

        # shared mem temporary for summation, indexed by block-local thread index
        aff_i = cuda.shared.array((n_cvar, n_thread_per_block), float32)
        i_t = cuda.threadIdx.x

        # 0 except for testing
        step_ = step_stride * step

        # update buffer with state
        for i_cvar in range(cvars.shape[0]):
            buf[i_post, _cu_mod_pow_2(step, horizon), i_cvar,
                i_thread] = state[step_, i_post, cvars[i_cvar], i_thread]

        # initialize sums to zero
        for i_cvar in range(cvars.shape[0]):
            aff_i[i_cvar, i_t] = float32(0.0)
            #aff[step_, i_post * aff_node_stride, i_cvar, i_thread*0] = float32(0.0)

        # query buffer, summing over cfpre applied to delayed efferent cvar values
        for i_pre in range(weights.shape[0]):
            weight = weights[i_post, i_pre]
            if weight == 0.0:
                continue
            # delayed_step[i_post, i_pre] = _cu_mod_pow_2(step - delays[i_post, i_pre] + horizon, horizon)
            delayed_step = _cu_mod_pow_2(
                step - delays[i_post, i_pre] + horizon, horizon)
            for i_cvar in range(cvars.shape[0]):
                cval = buf[i_pre, delayed_step, i_cvar, i_thread]
                #aff[step_, i_post * aff_node_stride, i_cvar, i_thread*0] += \
                aff_i[i_cvar, i_t] += \
                    weight * cfpre(state[step_, i_post, cvars[i_cvar], i_thread], cval)

        # apply cfpost
        for i_cvar in range(cvars.shape[0]):
            # i_t use and i_thread for tests...
            aff[step_, i_post * aff_node_stride, i_cvar, i_t] = cfpost(
                aff_i[i_cvar, i_t]
                #aff[step_, i_post * aff_node_stride, i_cvar, i_thread*0]
            )

    return dcfun
Ejemplo n.º 37
0
#
#     i = 0
#
#     while True:
#         mother = np.random.randint(0,len(fit))
#         father = np.random.randint(0,len(fit))
#         r_rec = recombine_arrays(population[:, mother], population[:, father])
#         new_pop[:, i] =  r_rec[:, 0]
#         new_pop[:, i+1] = r_rec[:, 1]
#         i += 2
#         if i >= n:
#              break
#
#     return new_pop

@jit(int32(float32[:]),nopython=True)
def weighted_choice(weights):
    i = 0
    rnd = np.random.random() * np.sum(weights)
    for i in range(len(weights)):
        rnd -= weights[i]
        if rnd < 0:
            break
    return i

@jit(float32[:,:](float32[:,:],float32[:]),nopython=True)
def recombine_population(population,fitness):
    weights =  fitness.max()-fitness
    new_pop = np.zeros(population.shape,dtype=np.float32)
    n = population.shape[1]
    fit = np.arange(0,int(n/20))
Ejemplo n.º 38
0
from __future__ import print_function, absolute_import
import numpy as np
from numba import vectorize
from numba import cuda, int32, float32, float64
from timeit import default_timer as time
from numba import unittest_support as unittest
from numba.cuda.testing import skip_on_cudasim
from numba.cuda.testing import CUDATestCase
from numba import config

sig = [int32(int32, int32), float32(float32, float32), float64(float64, float64)]


target = "cuda"
if config.ENABLE_CUDASIM:
    target = "cpu"


test_dtypes = np.float32, np.int32


@skip_on_cudasim("ufunc API unsupported in the simulator")
class TestCUDAVectorize(CUDATestCase):
    def test_scalar(self):
        @vectorize(sig, target=target)
        def vector_add(a, b):
            return a + b

        a = 1.2
        b = 2.3
        c = vector_add(a, b)
Ejemplo n.º 39
0
import numpy as np
import numba


@numba.vectorize([numba.int32(numba.uint8)], nopython=True)
def hammard(n):
    """
    fast uint8 Hammard Weight

    :param n: np.uint8
    :return:  np.uint8
    """
    # recursively divide in two, combinig sums by bit shifting and adding
    n = (n & np.uint8(85)) + ((n >> 1) & np.uint8(85))  # 85=01010101b
    n = (n & np.uint8(51)) + ((n >> 2) & np.uint8(51))  # 51=00110011b
    n = (n & np.uint8(15)) + ((n >> 4) & np.uint8(15))  # 15=00001111b
    return n


@numba.vectorize([numba.int32(numba.int32)], nopython=True)
def log2(n):
    """
    fast integer floor log 2

    :param n: input integer, must be positive
    :return:
    """
    result = 0
    for i in range(1, 32):
        if not n >> i:
            result = i - 1
Ejemplo n.º 40
0
    def complete_grid(self):
        """Array of (edge,vertex,vertex) triples defining a complete graph."""
        if self._complete_grid is None:
            self._complete_grid = make_complete_graph(self._num_vertices)
        return self._complete_grid

    @property
    def vertices(self):
        return self._vertices

    def gc(self):
        """Garbage collect temporary cached data structures."""
        self._complete_grid = None


@jit(numba.int32(numba.int32, numba.int32), nopython=True, cache=True)
def find_complete_edge(v1, v2):
    """Find the edge index k of an unsorted pair of vertices (v1, v2)."""
    if v2 < v1:
        v1, v2 = v2, v1
    return v1 + v2 * (v2 - 1) // 2


def make_complete_graph(num_vertices):
    """Constructs a complete graph.

    The pairing function is: k = v1 + v2 * (v2 - 1) // 2

    Args:
        num_vertices: Number of vertices.
Ejemplo n.º 41
0
def cast_as_numba_type_attribute():
    value = 4.4
    return numba.int32(value)
Ejemplo n.º 42
0
def argcast(a, b):
    return argcast_inner(int32(a), b)
Ejemplo n.º 43
0
import math
import json
import base64

import numpy as np

from numba import njit, int32

# this is a 1-to-1 translation of our js bloom filters to python


@njit(int32(int32))
def popcnt(v):
    v -= (v >> 1) & 0x55555555
    v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
    return ((v + (v >> 4) & 0xf0f0f0f) * 0x1010101) >> 24


# a * 16777619 mod 2**32
@njit(int32(int32))
def fnv_multiply(a):
    return a + (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24)


#// See https://web.archive.org/web/20131019013225/http://home.comcast.net/~bretm/hash/6.html
@njit(int32(int32))
def fnv_mix(a):
    a += (a << 13)
    a ^= (a >> 7)
    a += (a << 3)
    a ^= (a >> 17)
Ejemplo n.º 44
0
from numba import float32, int32, jit
import cProfile
from dis import dis


@jit(int32(int32, int32), nopython=True, nogil=True)
def add_two(a, b):
    acc = 0
    i = 0
    while i < 1000:
        acc += a + b
        i += 1
    return acc


def add_two_wrap(a, b):
    return add_two(a, b)


def add_two2(a, b):
    acc = 0
    i = 0
    while i < 1000:
        acc += a + b
        i += 1
    return acc


def test():
    num = 100
    print add_two_wrap(num, num + 1)
Ejemplo n.º 45
0
#     i = 0
#
#     while True:
#         mother = np.random.randint(0,len(fit))
#         father = np.random.randint(0,len(fit))
#         r_rec = recombine_arrays(population[:, mother], population[:, father])
#         new_pop[:, i] =  r_rec[:, 0]
#         new_pop[:, i+1] = r_rec[:, 1]
#         i += 2
#         if i >= n:
#              break
#
#     return new_pop


@jit(int32(float32[:]), nopython=True)
def weighted_choice(weights):
    i = 0
    rnd = np.random.random() * np.sum(weights)
    for i in range(len(weights)):
        rnd -= weights[i]
        if rnd < 0:
            break
    return i


@jit(float32[:, :](float32[:, :], float32[:]), nopython=True)
def recombine_population(population, fitness):
    weights = fitness.max() - fitness
    new_pop = np.zeros(population.shape, dtype=np.float32)
    n = population.shape[1]
Ejemplo n.º 46
0
    def cu_angle_force(npa, pos, params, box0, box1, box2, box0_half,
                       box1_half, box2_half, angle_size, angle_list, force,
                       virial_potential, one_three, one_sixth):
        i = cuda.grid(1)
        if i < npa:
            pi = pos[i]
            # result = cuda.local.array(5, dtype = nb.float32)
            result0 = force[i][0]
            result1 = force[i][1]
            result2 = force[i][2]
            result3 = virial_potential[i][0]
            result4 = virial_potential[i][1]
            for ai in range(nb.int32(0), angle_size[i]):
                j = angle_list[i][ai][0]
                k = angle_list[i][ai][1]
                type = angle_list[i][ai][2]
                order = angle_list[i][ai][3]

                pj = pos[j]
                pk = pos[k]

                if order == 0:
                    pa = pi
                    pb = pj
                    pc = pk

                if order == 1:
                    pa = pj
                    pb = pi
                    pc = pk

                if order == 2:
                    pa = pj
                    pb = pk
                    pc = pi

                d_ab0 = pa[0] - pb[0]
                d_ab1 = pa[1] - pb[1]
                d_ab2 = pa[2] - pb[2]

                d_cb0 = pc[0] - pb[0]
                d_cb1 = pc[1] - pb[1]
                d_cb2 = pc[2] - pb[2]

                if d_ab0 >= box0_half:
                    d_ab0 -= box0
                elif d_ab0 < -box0_half:
                    d_ab0 += box0

                if d_ab1 >= box1_half:
                    d_ab1 -= box1
                elif d_ab1 < -box1_half:
                    d_ab1 += box1

                if d_ab2 >= box2_half:
                    d_ab2 -= box2
                elif d_ab2 < -box2_half:
                    d_ab2 += box2

                if d_cb0 >= box0_half:
                    d_cb0 -= box0
                elif d_cb0 < -box0_half:
                    d_cb0 += box0

                if d_cb1 >= box1_half:
                    d_cb1 -= box1
                elif d_cb1 < -box1_half:
                    d_cb1 += box1

                if d_cb2 >= box2_half:
                    d_cb2 -= box2
                elif d_cb2 < -box2_half:
                    d_cb2 += box2

                rsq_ab = d_ab0 * d_ab0 + d_ab1 * d_ab1 + d_ab2 * d_ab2
                r_ab = math.sqrt(rsq_ab)
                rsq_cb = d_cb0 * d_cb0 + d_cb1 * d_cb1 + d_cb2 * d_cb2
                r_cb = math.sqrt(rsq_cb)

                cos_abc = d_ab0 * d_cb0 + d_ab1 * d_cb1 + d_ab2 * d_cb2
                cos_abc /= r_ab * r_cb

                if cos_abc > nb.float32(1.0):
                    cos_abc = nb.float32(1.0)
                if cos_abc < -nb.float32(1.0):
                    cos_abc = -nb.float32(1.0)

                sin_abc = math.sqrt(nb.float32(1.0) - cos_abc * cos_abc)
                if sin_abc < minimum_value:
                    sin_abc = minimum_value
                sin_abc = nb.float32(1.0) / sin_abc

                pms = params[type]
                fp = cuda.local.array(2, dtype=nb.float32)
                cu_func(cos_abc, sin_abc, pms, fp)

                a = -fp[0] * sin_abc
                a11 = a * cos_abc / rsq_ab
                a12 = -a / (r_ab * r_cb)
                a22 = a * cos_abc / rsq_cb

                fab0 = a11 * d_ab0 + a12 * d_cb0
                fab1 = a11 * d_ab1 + a12 * d_cb1
                fab2 = a11 * d_ab2 + a12 * d_cb2

                fcb0 = a22 * d_cb0 + a12 * d_ab0
                fcb1 = a22 * d_cb1 + a12 * d_ab1
                fcb2 = a22 * d_cb2 + a12 * d_ab2

                if order == 0:
                    result0 += fab0
                    result1 += fab1
                    result2 += fab2

                if order == 1:
                    result0 -= fab0 + fcb0
                    result1 -= fab1 + fcb1
                    result2 -= fab2 + fcb2

                if order == 2:
                    result0 += fcb0
                    result1 += fcb1
                    result2 += fcb2

                vx = d_ab0 * fab0 + d_cb0 * fcb0
                vy = d_ab1 * fab1 + d_cb1 * fcb1
                vz = d_ab2 * fab2 + d_cb2 * fcb2
                virial = one_sixth * (vx + vy + vz)
                # if i==35 and ai == 0:
                # print(i, ai, vy, a, cos_abc, rsq_cb)
                potential = fp[1] * one_three
                result3 += virial
                result4 += potential
            force[i][0] = result0
            force[i][1] = result1
            force[i][2] = result2
            virial_potential[i][0] = result3
            virial_potential[i][1] = result4
Ejemplo n.º 47
0
        if np.isnan(ai):
            f = True
            break
    return f


@ndreduce([int64(int32), int64(int64), int64(float32), int64(float64)])
def count(a):
    non_missing = 0
    for ai in a.flat:
        if not np.isnan(ai):
            non_missing += 1
    return non_missing


@ndreduce([int32(int32), int64(int64), float32(float32), float64(float64)])
def nansum(a):
    asum = 0
    for ai in a.flat:
        if not np.isnan(ai):
            asum += ai
    return asum


@ndreduce([float32(float32), float64(float64)])
def nanmean(a):
    asum = 0.0
    count = 0
    for ai in a.flat:
        if not np.isnan(ai):
            asum += ai
    def dowork(M_f_start, nfs_sq, d_src_ar, d_dst_ar, weight_ar, sigma_m, E2,
               sigma_0, fovshift, nfs, W_cut, osd0p, osd1r):
        # Work out i_src and i_dst based on the 2-D thread index
        i_src, i_dst = cuda.grid(2)

        if i_src < nfs_sq and i_dst < nfs_sq:

            # Temporary shared memory for weights
            tmp_w = cuda.shared.array(12288, dtype=float32)
            myidx = (cuda.threadIdx.y * cuda.blockDim.x + cuda.threadIdx.x)
            offsidx = shifted_idx3(myidx)
            tmp_w[offsidx] = float32(0.0)
            tmp_w[offsidx + 1] = float32(0.0)
            tmp_w[offsidx + 2] = float32(0.0)
            cuda.syncthreads()

            # Compute the location of d_src_ar, this defines what sigma will
            # be. As r (as opp. to phi) increases, the sigma should increase.
            M_f = float32(nfs) / (E2 * math.log(((1 + d_src_ar[i_src, 1]) /
                                                 (2 * E2)) + 1))

            # Set some of M_f to 1 to ensure the fan-out starts at around the
            # edge of the foveal region.
            if (1 + d_src_ar[i_src, 1]) < fovshift:
                M_f = M_f_start

            # Compute modified sigma and 3 times this value. _sigma is a
            # function of r, aka d_src_ar[1]. M_f is the function of r.
            _sigma = (sigma_m / M_f) - (sigma_m / M_f_start) + sigma_0
            three_sigma = float32(3.0) * _sigma

            # in-xy-plane distance (ignore d_src_ar[2]/dstdoc[2])
            xd = (d_src_ar[i_src, 0] - d_dst_ar[i_dst, 0] + osd0p)
            yd = (d_src_ar[i_src, 1] - d_dst_ar[i_dst, 1] + osd1r)
            if abs(xd) < three_sigma and abs(yd) < three_sigma:
                dist = math.sqrt(math.pow(xd, 2) + math.pow(yd, 2))
                gauss = math.exp(-0.5 * math.pow(dist / _sigma, 2))
                if gauss > W_cut:
                    # Write result into weight_ar
                    tmp_w[offsidx] = float32(gauss)
                    tmp_w[offsidx + 1] = float32(i_src)
                    tmp_w[offsidx + 2] = float32(i_dst)

            # Sync threads, then access device memory with any results
            cuda.syncthreads()

            if cuda.threadIdx.x == 0 and cuda.threadIdx.y == 0:
                tpb = cuda.blockDim.x * cuda.blockDim.y

                # Write data from tmp_w to res_ar, but only in ONE thread from
                # the threadblock. Should avoid racing.
                for idx in range(
                        0, tpb):  # 512 was hard coded here; changed it for tpb
                    offsidx2 = shifted_idx3(idx)
                    theweight = tmp_w[
                        offsidx2]  # weight should be the first one, so no +1/+2
                    # Add to weight_ar
                    weight_idx = int32(tmp_w[offsidx2 + 2]) * nfs_sq + int32(
                        tmp_w[offsidx2 + 1])
                    weight_ar[weight_idx] = theweight

        return  # end dowork()
Ejemplo n.º 49
0
 def foo():
     li, lf = List(), List()
     li.append(int32(1))
     lf.append(float32(1.0))
     return li._dtype, lf._dtype
Ejemplo n.º 50
0
def exprefixsum(masks, indices, init = 0, nelem = None):
    """
    exclusive prefix sum
    """
    nelem = masks.size if nelem is None else nelem

    carry = init
    for i in xrange(nelem):
        indices[i] = carry
        if masks[i] != 0:
            carry += masks[i]

    #indices[nelem] = carry
    return carry

@numba.jit(int32(int32[:],int32[:],int32), nopython=False)
def exprefixsumNumba(in_ary, out_ary, init = 0):
    """
    exclusive prefix sum
    """
    nelem = in_ary.size

    carry = init
    for i in range(nelem):
        out_ary[i] = carry
        carry += in_ary[i]

    return carry

#@numba.jit(int32(int32[:],int32), nopython=False)
@numba.njit
Ejemplo n.º 51
0
					eq_t[h] += 1.0/nb_best_hand

	# impossible : error
	else:
		return -1

	# normalize eq_w_agg and eq_t_agg
	for h in xrange(p):
		eq_agg[h, 0] = eq_w[h]/n
		eq_agg[h, 1] = eq_t[h]/n

	return eq_agg



rank_fast = jit(int32(int32[:], int32[:], uint32[:], int32[:], int32, int32, int32[:], int32[:], int32[:]))(rank)
exhaustive_block_fast = jit(int32[:](int32[:, :], int32[:], int32[:], uint32[:], int32[:], int32, int32, int32[:], int32[:], int32[:]))(exhaustive_block)



def exhaustive_eval(player_card, table_card):
	"""compute all possible games given the player/table cards (as a numbers from 0 to 51) and return equity win/tie for each player"""

	p = player_card.shape[0]
	equity_arr = np.zeros([p, 2], dtype=np.float32)

	print '\n---------------- Exhaustive eval start'
	print 'player_card=\n{}'.format(player_card)
	print 'table_card=\n{}'.format(table_card)
	print 'p={}'.format(p)
Ejemplo n.º 52
0
    return np.array([x3d, y3d, z3d])


@numba.njit(
    numba.typeof(
        (0.0, 0.0))(numba.float32, numba.float32, numba.float32, numba.float32,
                    numba.float32, numba.float32, numba.float32))
def project_to_2d(x, y, z, cx, cy, fx, fy):
    if z == 0:
        z = 0.001
    x2d = fx * x / z + cx
    y2d = fy * y / z + cy
    return x2d, y2d


@numba.njit(numba.int32(numba.float32, numba.float32, numba.float32))
def clip_round(value, minvalue, maxvalue):
    return int(round((min(max((value, minvalue)), maxvalue))))


@numba.njit(numba.float32[:, :](numba.float32[:, :], numba.float32[:, :],
                                numba.float32[:, :], numba.float32[:, :],
                                numba.float32[:], numba.typeof((0, 0))))
def reproject(d_image, rgb_mat, d_mat, R, T, shape):
    buffer = np.zeros(shape, np.float32)
    h, w = d_image.shape
    dfx = d_mat[0, 0]
    dfy = d_mat[1, 1]
    dcx = d_mat[0, 2]
    dcy = d_mat[1, 2]
Ejemplo n.º 53
0
import math

@jit
def f(x,y):
    # A somewhat trivial example
    return x + y

'''
    在此模式下,编译将推迟到第一个函数执行。Numba将在调用时推断参数类型,并根据
    此信息生成优化代码。Numba还可以根据输入类型编译单独的特化。例如,f()使用整数或复数调用上面的函数将生成不同的代码路径:
'''
print(f(1, 2))
print(f(1j, 2))

from numba import jit, int32
@jit(int32(int32,int32))
def f(x, y):
    return x + y

'''
    int32(int32, int32)是函数的签名。在这种情况下,相应的特化将由@jit装饰器编译,并且不允许其他专门化。如果您希望对编译器
    选择的类型进行细粒度控制(例如,使用单精度浮点数),这将非常有用。

    如果省略返回类型,例如通过写而不是 ,Numba将尝试为您推断它。函数签名也可以是字符串,您可以将
    其中的几个作为列表传递;
'''
print(f(1, 2))
print(f(2**31, 2**31 + 1))

# 调用和内联其他函数
@jit
Ejemplo n.º 54
0
    return np.interp(xp, [x1, x2], [y1, y2])


def complex_grid(xlim, ylim, nx, ny):
    '''
    returns a nx x ny grid of complex numbers
    bounded by xlim and ylim ranges.
    '''
    x = np.linspace(xlim[0], xlim[1], nx)
    y = np.linspace(ylim[0], ylim[1], ny)
    xx, yy = np.meshgrid(x, y) 

    return xx + 1j*yy


@jit(int32(complex128, complex128, int32, float64, int32), nopython=True, cache=True)
def iterate(z, C, n, zmax, niter):
    '''
    Return the number of iteration 
    needed for the absolute value 
    of z to become greater than zmax
    '''
    zmax2 = zmax**2
    for k in range(niter):
        
        z = pow(z, n) + C
        if z.imag**2 + z.real**2 > zmax2:
            break
            
    return k