Example #1
0
    return distance_matrix


def arc_distance_numpy_broadcast(a, b):
    """
    Calculates the pairwise arc distance between all points in vector a and b.
    """
    theta_1 = a[:, 0][:, None]
    theta_2 = b[:, 0][None, :]
    phi_1 = a[:, 1][:, None]
    phi_2 = b[:, 1][None, :]

    temp = (np.sin((theta_2 - theta_1) / 2)**2 +
            np.cos(theta_1) * np.cos(theta_2) * np.sin((phi_2 - phi_1) / 2)**2)
    distance_matrix = 2 * (np.arctan2(np.sqrt(temp), np.sqrt(1 - temp)))
    return distance_matrix


from compare_perf import compare_perf

n = 1000
import numpy as np

a = np.random.rand(n, 2)
b = np.random.rand(n, 2)

compare_perf(arc_distance_python_nested_for_loops, [a, b])
compare_perf(arc_distance_numpy_broadcast, [a, b])
compare_perf(arc_distance_numpy_tile, [a, b])
Example #2
0
def harris(I):
  m,n = I.shape
  dx = (I[1:, :] - I[:m-1, :])[:, 1:]
  dy = (I[:, 1:] - I[:, :n-1])[1:, :]

  #
  #   At each point we build a matrix
  #   of derivative products
  #   M =
  #   | A = dx^2     C = dx * dy |
  #   | C = dy * dx  B = dy * dy |
  #
  #   and the score at that point is:
  #      det(M) - k*trace(M)^2
  #
  A = dx * dx
  B = dy * dy
  C = dx * dy
  tr = A + B
  det = A * B - C * C
  k = 0.05
  return det - k * tr * tr

from compare_perf import compare_perf 
m,n = 1920, 1080
dtype = 'uint8'
I = np.random.randn(m,n).astype(dtype)

compare_perf(harris, [I], propagate_exceptions=True, backends = ('openmp',))
Example #3
0
    """
  Accepted response on stack overflow by phillip
  """
    gg = np.outer(g, g)
    gggg = np.outer(gg, gg).reshape(4 * g.shape)
    axes = ((0, 2, 4, 6), (0, 1, 2, 3))
    return np.tensordot(gggg, T, axes)


T = np.random.randn(n, n, n, n)
g = np.random.randn(n, n)

from compare_perf import compare_perf
compare_perf(rotT_loops, [T, g],
             extra={'numpy_tensordot': rotT_numpy},
             numba=False,
             backends=('c', 'openmp'),
             cpython=True)


def rotT_par(T, g):
    def compute_elt(i, j, k, l):
        total = 0.0
        for ii in range(n):
            for jj in range(n):
                for kk in range(n):
                    for ll in range(n):
                        gg = g[ii, i] * g[jj, j] * g[kk, k] * g[ll, l]
                        total += gg * T[ii, jj, kk, ll]
        return total
Example #4
0
def rosen_der_np(x):
    der = np.empty_like(x)
    der[1:-1] = (+200 * (x[1:-1] - x[:-2]**2) - 400 *
                 (x[2:] - x[1:-1]**2) * x[1:-1] - 2 * (1 - x[1:-1]))
    der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
    der[-1] = 200 * (x[-1] - x[-2]**2)
    return der


def rosen_der_loops(x):
    n = x.shape[0]
    der = np.empty_like(x)

    for i in range(1, n - 1):
        der[i] = (+200 * (x[i] - x[i - 1]**2) - 400 *
                  (x[i + 1] - x[i]**2) * x[i] - 2 * (1 - x[i]))
    der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
    der[-1] = 200 * (x[-1] - x[-2]**2)
    return der


if __name__ == '__main__':
    N = 10**7
    x = np.arange(N) / float(N)
    jit(rosen_der_np)(x)
    from compare_perf import compare_perf
    # numba still crashes on negative indexing
    compare_perf(rosen_der_np, [x.copy()], numba=False)
    compare_perf(rosen_der_loops, [x.copy()], numba=False)
Example #5
0
        winning_colony = state[i, j, 0]
        defense_strength = state[i, j, 1]
        for jj in xrange(max(j - window_radius, 0),
                         min(j + window_radius + 1, width)):
            for ii in xrange(max(i - window_radius, 0),
                             min(i + window_radius + 1, height)):
                if ii != i or jj != j:
                    d = image[i, j, :] - image[ii, jj, :]
                    s = np.sum(d**2)
                    gval = 1.0 - np.sqrt(s) / np.sqrt(3)
                    attack_strength = gval * state[ii, jj, 1]
                    if attack_strength > defense_strength:
                        defense_strength = attack_strength
                        winning_colony = state[ii, jj, 0]
        return [winning_colony, defense_strength]

    return parakeet.imap(attack, (height, width))


from compare_perf import compare_perf

import time

t = time.time()
growcut_python(image, state, state_next, window_radius)
t2 = time.time()
print "Python time", t2 - t
compare_perf(growcut_par, [image, state, window_radius],
             suppress_output=False,
             propagate_exceptions=True)
Example #6
0
n = 7 
def rotT_loops(T, g):
    Tprime = np.zeros((n,n,n,n))
    for i in range(n):
        for j in range(n):
            for k in range(n):
                for l in range(n):
                    for ii in range(n):
                        for jj in range(n):
                            for kk in range(n):
                                for ll in range(n):
                                    gg = g[ii,i]*g[jj,j]*g[kk,k]*g[ll,l]
                                    Tprime[i,j,k,l] = Tprime[i,j,k,l] + gg*T[ii,jj,kk,ll]
    return Tprime

def rotT_numpy(T, g):
  """
  Accepted response on stack overflow by phillip
  """
  gg = np.outer(g, g)
  gggg = np.outer(gg, gg).reshape(4 * g.shape)
  axes = ((0, 2, 4, 6), (0, 1, 2, 3))
  return np.tensordot(gggg, T, axes)

T = np.random.randn(n,n,n,n)
g = np.random.randn(n,n)

from compare_perf import compare_perf 
compare_perf(rotT_loops, [T, g], extra = {'numpy_tensordot': rotT_numpy})
Example #7
0
import numpy as np 


def smooth(x, alpha):
  s = x.copy()
  for i in xrange(1, len(x)):
      s[i] = alpha * x[i] + (1-alpha)*s[i-1]
  return s 

n = 10**6
alpha = 0.01
X = np.random.randn(n).astype('float32')

from compare_perf import compare_perf
compare_perf(smooth, [X, alpha])

Example #8
0
def rosen_der_np(x):
  der = np.empty_like(x)
  der[1:-1] = (+ 200 * (x[1:-1] - x[:-2] ** 2)
               - 400 * (x[2:] - x[1:-1] ** 2) * x[1:-1]
               - 2 * (1 - x[1:-1]))
  der[0] = -400 * x[0] * (x[1] - x[0] ** 2) - 2 * (1 - x[0])
  der[-1] = 200 * (x[-1] - x[-2] ** 2)
  return der

def rosen_der_loops(x):
  n = x.shape[0]
  der = np.empty_like(x)

  for i in range(1, n - 1):
    der[i] = (+ 200 * (x[i] - x[i - 1] ** 2)
              - 400 * (x[i + 1]
              - x[i] ** 2) * x[i]
              - 2 * (1 - x[i]))
  der[0] = -400 * x[0] * (x[1] - x[0] ** 2) - 2 * (1 - x[0])
  der[-1] = 200 * (x[-1] - x[-2] ** 2)
  return der

if __name__ == '__main__':
  N = 10**5
  x = np.arange(N) / float(N)
  jit(rosen_der_np)(x) 
  from compare_perf import compare_perf
  # numba still crashes on negative indexing
  compare_perf(rosen_der_np, [x.copy()], numba=False)
  compare_perf(rosen_der_loops, [x.copy()], numba=False)
Example #9
0
def local_maxima(data, wsize, mode=wrap):
  result = np.ones(shape=data.shape,dtype=bool)
  for pos in np.ndindex(data.shape):
    myval = data[pos]  
    for offset in np.ndindex(wsize):
      neighbor_idx = tuple(mode(p, o-w/2, w) for (p, o, w) in zip(pos, offset, wsize))
      result[pos] &= (data[neighbor_idx] <= myval)
  return result 


@parakeet.jit 
def parakeet_local_maxima(data, wsize, mode=wrap):
  def is_max(pos):
    def is_smaller_neighbor(offset):
      neighbor_idx = tuple(mode(p, o-w/2, w) for (p, o, w) in zip(pos, offset, wsize))
      return data[neighbor_idx] <= data[pos]
    return np.all(parakeet.imap(is_smaller_neighbor, wsize))
  return parakeet.imap(is_max, data.shape)
  

# not sure how to get numba to auto-jit size generic code
# get error: "FAILED with KeyError 'sized_pointer(npy_intp, 4)'"
#import numba
#numba_local_maxima = numba.autojit(python_local_maxima) 

from compare_perf import compare_perf 

shape = (30,30,20,12)
x = np.random.randn(*shape)
compare_perf(local_maxima, [x, shape]) 
Example #10
0
import numpy as np 


def dot(x,y):
  return sum(x*y)

def matmult_high_level(X,Y):
  return np.array([[np.dot(x,y) for y in Y.T] for x in X])

def matmult_loops(X,Y,Z):
  m, d = X.shape
  n = Y.shape[1]
  for i in xrange(m):
    for j in xrange(n):
      total = X[i,0] * Y[0,j] 
      for k in xrange(1,d):
        total += X[i,k] * Y[k,j]
      Z[i,j] = total 
  return Z

n, d = 2000, 500 
m = 2000
X = np.random.randn(m,d).astype('float64')
Y = np.random.randn(d,n).astype('float64')
Z = np.zeros((m,n)).astype('float64')
from compare_perf import compare_perf

compare_perf(matmult_high_level, [X,Y],cpython=True, numba=False,extra = {'numpy':np.dot}, suppress_output = False)
compare_perf(matmult_loops, [X, Y, Z], cpython=False)

Example #11
0
    grid_x = np.linspace(-bound, bound, N)
    for i, x in enumerate(grid_x):
        for j, y in enumerate(grid_x):
            julia[i, j] = kernel(x, y, cr, ci, lim, cutoff=cutoff)
    return julia


def julia(cr, ci, N, bound=1.5, lim=1000., cutoff=1e6):
    grid_x = np.linspace(-bound, bound, N)
    return np.array(
        [[kernel(x, y, cr, ci, lim, cutoff=cutoff) for x in grid_x]
         for y in grid_x])


from compare_perf import compare_perf
cr = 0.285
ci = 0.01
N = 1200
bound = 1.5
lim = 1000
cutoff = 1e6

extra = {}
try:
    from numba import autojit
    extra['numba'] = autojit(julia_loops)
except:
    print "Failed to import Numba"

compare_perf(julia, [cr, ci, N, bound, lim, cutoff], numba=False, extra=extra)
Example #12
0
import numpy as np


def smooth(x, alpha):
    s = x.copy()
    for i in xrange(1, len(x)):
        s[i] = alpha * x[i] + (1 - alpha) * s[i - 1]
    return s


n = 10**6
alpha = 0.01
X = np.random.randn(n).astype('float32')

from compare_perf import compare_perf
compare_perf(smooth, [X, alpha])
Example #13
0
                    curr_dist += (x[xidx] - centroid[xidx])**2
                if curr_dist < min_dist:
                    min_dist = curr_dist
                    min_idx = cidx
            A[i] = min_idx
        # recompute the clusters by averaging data points
        # assigned to them
        for cidx in xrange(k):
            # reset centroids
            for dim_idx in xrange(ndims):
                C[cidx, dim_idx] = 0
            # add each data point only to its assigned centroid
            cluster_count = 0
            for i in xrange(n):
                if A[i] == cidx:
                    C[cidx, :] += X[i, :]
                    cluster_count += 1
            C[cidx, :] /= cluster_count
    return C


n, d = 10**4, 50
X = np.random.randn(n, d)
k = 25

from compare_perf import compare_perf

compare_perf(kmeans_comprehensions, [X, k, 5], cpython=False)

compare_perf(kmeans_loops, [X, k, 5], cpython=True)
Example #14
0
#
# Longest hailstone sequence from http://www.mit.edu/~mtikekar/posts/stream-fusion.html
#
import sys


def collatzLen(a0):
    a = a0
    length = 0
    while a != 1:
        a = (a if a % 2 == 0 else 3 * a + 1) / 2
        length += 1
    return length


def maxLen(max_a0):
    max_length = 0
    longest = 0
    for a0 in xrange(1, max_a0 + 1):
        length = collatzLen(a0)
        if length > max_length:
            max_length = length
            longest = a0
    return max_length, longest


from compare_perf import compare_perf

compare_perf(maxLen, [1000000])
Example #15
0
                #                h = max_d_curr*.5
                #h = max(h,0.55*dx)

                # particle pixel center
                xpos = physical_to_pixel(x, xmin, dx)
                ypos = physical_to_pixel(y, ymin, dy)

                left = xpos - k / 2
                upper = ypos - k / 2

                for i in xrange(0, k):
                    for j in xrange(0, k):
                        if ((i + left >= 0) and (i + left < nx)
                                and (j + upper >= 0) and (j + upper < ny)):
                            image[(i + left), (j + upper)] += kernel[i, j] * qt

            start_ind = end_ind

    return image


N = 20
x = y = z = hs = qts = mass = rhos = np.random.rand(N)
nx = ny = 100
args = (x, y, qts, hs, nx, ny, 0.0, 1.0, 0.0, 1.0, 1)

template_kernel_cpu(*args)
from compare_perf import compare_perf

compare_perf(template_kernel_cpu, args)
Example #16
0
#
# Longest hailstone sequence from http://www.mit.edu/~mtikekar/posts/stream-fusion.html
#
import sys

def collatzLen(a0):
    a = a0
    length = 0
    while a != 1:
        a = (a if a%2 == 0 else 3*a+1) / 2
        length += 1
    return length

def maxLen(max_a0):
    max_length = 0
    longest = 0
    for a0 in xrange(1, max_a0 + 1):
        length = collatzLen(a0)
        if length > max_length:
            max_length = length
            longest = a0
    return max_length, longest

from compare_perf import compare_perf

compare_perf(maxLen, [1000000])

Example #17
0
                for ii in xrange(max(i-window_radius, 0), min(i+window_radius+1, height)):
                    if ii != i or jj != j:
                        d = image[i, j, :] - image[ii, jj, :]
                        s = np.sum(d**2) 
                        gval = 1.0 - np.sqrt(s) / np.sqrt(3)
                        attack_strength = gval * state[ii, jj, 1]
                        if attack_strength > defense_strength:
                            defense_strength = attack_strength
                            winning_colony = state[ii, jj, 0]
                            changes += 1
            state_next[i, j, 0] = winning_colony
            state_next[i, j, 1] = defense_strength
    return changes
    
N = 50
dtype = np.double
image = np.zeros((N, N, 3), dtype=dtype)
state = np.zeros((N, N, 2), dtype=dtype)
state_next = np.empty_like(state)

# colony 1 is strength 1 at position 0,0
# colony 0 is strength 0 at all other positions
state[0, 0, 0] = 1
state[0, 0, 1] = 1

window_radius = 10

from compare_perf import compare_perf 

compare_perf(growcut_python, [image, state, state_next, window_radius])
Example #18
0
            
                if(x_pix_start < 0):  x_pix_start = 0
                if(x_pix_stop  > nx): x_pix_stop  = int32(nx-1)
                if(y_pix_start < 0):  y_pix_start = 0
                if(y_pix_stop  > ny): y_pix_stop  = int32(ny-1)
    
                
                for xpix in range(x_pix_start, x_pix_stop) : 
                    for ypix in range(y_pix_start, y_pix_stop) : 
                        # physical coordinates of pixel
                        xpixel = pixel_to_physical(xpix,x_start,dx)
                        ypixel = pixel_to_physical(ypix,y_start,dy)
                        zpixel = zplane
 
                        dxpix, dypix, dzpix = [x-xpixel,y-ypixel,z-zpixel]
                        d = distance(dxpix,dypix,dzpix)
                        if (d/h < 2) : 
                            kernel_val = kernel_vals[int(d/(.01*h))]/(h*h*h)
                            image[xpix,ypix] += qt*kernel_val
    
 
    return image

from compare_perf import compare_perf 

N = 160
x = y = z = hs= qts = mass = rhos = np.random.rand(N)
nx=ny=80
args = (x,y,z,hs,qts,mass,rhos,nx,ny, 0.0, 1.0, 0.0, 1.0)
compare_perf(render_image, args, numba = True, backends= ('c',))
Example #19
0
    julia = np.empty((N, N), dtype=np.uint32)
    grid_x = np.linspace(-bound, bound, N)
    for i, x in enumerate(grid_x):
        for j, y in enumerate(grid_x):
            julia[i,j] = kernel(x, y, cr, ci, lim, cutoff=cutoff)
    return julia

def julia(cr, ci, N, bound=1.5, lim=1000., cutoff=1e6):
   grid_x = np.linspace(-bound, bound, N)
   return np.array([[kernel(x,y,cr,ci,lim,cutoff=cutoff) 
                     for x in grid_x] 
                     for y in grid_x])
                    
from compare_perf import compare_perf 
cr=0.285
ci=0.01
N=1200
bound = 1.5 
lim = 1000
cutoff = 1e6 

extra = {}
try:
  from numba import autojit 
  extra['numba'] = autojit(julia_loops)
except:
  print "Failed to import Numba" 

compare_perf(julia, [cr, ci, N, bound, lim, cutoff], numba = False, extra = extra)

Example #20
0
            
                if(x_pix_start < 0):  x_pix_start = 0
                if(x_pix_stop  > nx): x_pix_stop  = int32(nx-1)
                if(y_pix_start < 0):  y_pix_start = 0
                if(y_pix_stop  > ny): y_pix_stop  = int32(ny-1)
    
                
                for xpix in range(x_pix_start, x_pix_stop) : 
                    for ypix in range(y_pix_start, y_pix_stop) : 
                        # physical coordinates of pixel
                        xpixel = pixel_to_physical(xpix,x_start,dx)
                        ypixel = pixel_to_physical(ypix,y_start,dy)
                        zpixel = zplane
 
                        dxpix, dypix, dzpix = [x-xpixel,y-ypixel,z-zpixel]
                        d = distance(dxpix,dypix,dzpix)
                        if (d/h < 2) : 
                            kernel_val = kernel_vals[int(d/(.01*h))]/(h*h*h)
                            image[xpix,ypix] += qt*kernel_val
    
 
    return image

from compare_perf import compare_perf 

N = 1600
x = y = z = hs= qts = mass = rhos = np.random.rand(N)
nx=ny=40
args = (x,y,z,hs,qts,mass,rhos,nx,ny, 0.0, 1.0, 0.0, 1.0)
compare_perf(render_image, args)
Example #21
0
def harris(I):
  m,n = I.shape
  dx = (I[1:, :] - I[:m-1, :])[:, 1:]
  dy = (I[:, 1:] - I[:, :n-1])[1:, :]

  #
  #   At each point we build a matrix
  #   of derivative products
  #   M =
  #   | A = dx^2     C = dx * dy |
  #   | C = dy * dx  B = dy * dy |
  #
  #   and the score at that point is:
  #      det(M) - k*trace(M)^2
  #
  A = dx * dx
  B = dy * dy
  C = dx * dy
  tr = A + B
  det = A * B - C * C
  k = 0.05
  return det - k * tr * tr

from compare_perf import compare_perf 
m,n = 1920, 1080
dtype = 'float64'
I = np.random.randn(m,n).astype(dtype)

compare_perf(harris, [I], propagate_exceptions=True, backends= ("c", "openmp"))
Example #22
0
from parakeet import jit, config, c_backend 
 


def covariance(x,y):
  return ((x-x.mean()) * (y-y.mean())).mean()

def fit_simple_regression(x,y):
  slope = covariance(x,y) / covariance(x,x)
  offset = y.mean() - slope * x.mean() 
  return slope, offset

import numpy as np 

N = 10**7
x = np.random.randn(N).astype('float64')
slope = 903.29
offset = 102.1
y = slope * x + offset



from compare_perf import compare_perf 
compare_perf(fit_simple_regression, (x,y))
Example #23
0
x = np.random.randn(1500,1500).astype('float32')
w = np.random.randn(3,3).astype('float32')
#compare_perf(conv_3x3_trim, [x,w])

w = np.random.randn(3,3).astype('float32')
# Simple convolution of 5x5 patches from a given array x
# by a 5x5 array of filter weights
 
def conv_3x3_trim_loops(image, weights):
  result = np.zeros_like(image)
  for i in xrange(1,x.shape[0]-1):
    for j in xrange(1,x.shape[1]-1):
      for ii in xrange(3): 
        for jj in xrange(3):
          result[i,j] += image[i-ii+1, j-jj+1] * weights[ii, jj] 
  return result

compare_perf(conv_3x3_trim_loops, [x,w])

import parakeet 
def conv_3x3_imap(image, weights):
  def compute((i,j)):
      total = np.float32(0.0)
      for ii in xrange(3):
          for jj in xrange(3):
            total += image[i+ii-1, j + jj - 1] * weights[ii, jj]
      return total 
  w,h = image.shape
  return parakeet.imap(compute, (w-2,h-2))
compare_perf(conv_3x3_imap, [x,w], backends=('openmp', 'cuda',))
Example #24
0
                if (x_pix_stop > nx): x_pix_stop = int32(nx - 1)
                if (y_pix_start < 0): y_pix_start = 0
                if (y_pix_stop > ny): y_pix_stop = int32(ny - 1)

                for xpix in range(x_pix_start, x_pix_stop):
                    for ypix in range(y_pix_start, y_pix_stop):
                        # physical coordinates of pixel
                        xpixel = pixel_to_physical(xpix, x_start, dx)
                        ypixel = pixel_to_physical(ypix, y_start, dy)
                        zpixel = zplane

                        dxpix, dypix, dzpix = [
                            x - xpixel, y - ypixel, z - zpixel
                        ]
                        d = distance(dxpix, dypix, dzpix)
                        if (d / h < 2):
                            kernel_val = kernel_vals[int(
                                d / (.01 * h))] / (h * h * h)
                            image[xpix, ypix] += qt * kernel_val

    return image


from compare_perf import compare_perf

N = 160
x = y = z = hs = qts = mass = rhos = np.random.rand(N)
nx = ny = 80
args = (x, y, z, hs, qts, mass, rhos, nx, ny, 0.0, 1.0, 0.0, 1.0)
compare_perf(render_image, args, numba=True, backends=('c', ))
Example #25
0
import numpy as np 


def dot(x,y):
    return np.min(x+y)

def matmult_high_level(X,Y):
  return np.array([[dot(x,y) for y in Y.T] for x in X])

def matmult_loops(X,Y,Z):
  m, d = X.shape
  n = Y.shape[1]
  for i in xrange(m):
    for j in xrange(n):
      total = X[i,0] + Y[0,j] 
      for k in xrange(1,d):
        total = min(total, X[i,k] + Y[k,j])
      Z[i,j] = total 
  return Z

n, d = 500, 500
m = 500
X = np.random.randn(m,d)
Y = np.random.randn(d,n)
Z = np.zeros((m,n))
from compare_perf import compare_perf

compare_perf(matmult_high_level, [X,Y], cpython=True, numba=False)
compare_perf(matmult_loops, [X, Y, Z], cpython=False)

                # set the minimum h to be equal to half pixel width
                #                h = max_d_curr*.5
                #h = max(h,0.55*dx)
                
                # particle pixel center
                xpos = physical_to_pixel(x,xmin,dx)
                ypos = physical_to_pixel(y,ymin,dy)
    
                left  = xpos-k/2
                upper = ypos-k/2

                for i in xrange(0,k) : 
                    for j in xrange(0,k): 
                        if ((i+left>=0) and (i+left < nx) and (j+upper >=0) and (j+upper<ny)) : 
                            image[(i+left),(j+upper)] += kernel[i,j]*qt


            start_ind = end_ind

    return image
  
N = 20
x = y = z = hs= qts = mass = rhos = np.random.rand(N)
nx=ny=100
args = (x, y, qts,hs, nx, ny, 0.0, 1.0, 0.0, 1.0,1)

template_kernel_cpu(*args)
from compare_perf import compare_perf        

compare_perf(template_kernel_cpu, args)
Example #27
0
def harris(I):
  m,n = I.shape
  dx = (I[1:, :] - I[:m-1, :])[:, 1:]
  dy = (I[:, 1:] - I[:, :n-1])[1:, :]

  #
  #   At each point we build a matrix
  #   of derivative products
  #   M =
  #   | A = dx^2     C = dx * dy |
  #   | C = dy * dx  B = dy * dy |
  #
  #   and the score at that point is:
  #      det(M) - k*trace(M)^2
  #
  A = dx * dx
  B = dy * dy
  C = dx * dy
  tr = A + B
  det = A * B - C * C
  k = np.float32(0.05)
  return det - k * tr * tr

from compare_perf import compare_perf 
m,n = 2400, 2400
dtype = 'float32' 
I = (np.random.randn(m,n) ** 2).astype(dtype)

compare_perf(harris, [I], propagate_exceptions=True)
Example #28
0
  def matmult_loops(X,Y,Z):
    m, d = X.shape
    n = Y.shape[1]
    for i in xrange(m):
      for j in xrange(n):
        total = X[i,0] * Y[0,j] 
        for k in xrange(1,d):
          total += X[i,k] * Y[k,j]
        Z[i,j] = total 
  
  def call_numba(X,Y):
    Z = np.zeros((X.shape[0],Y.shape[1])).astype(dtype)
    matmult_loops(X,Y,Z)
    return Z 

  extra['numba'] = call_numba 

except:
  print "Failed to import Numba" 
  pass 

compare_perf(matmult_high_level, [X,Y],
             cpython=True,
             # numba can't run the nested comprehensions so we use
             # a special loopy version instead 
             numba=False,
             extra = extra, 
             suppress_output = False,
             propagate_exceptions = False)

Example #29
0
import parakeet 
def growcut_par(image, state, window_radius):
    height = image.shape[0]
    width = image.shape[1]
    def attack((i,j)):
            winning_colony = state[i, j, 0]
            defense_strength = state[i, j, 1]
            for jj in xrange(max(j-window_radius,0), min(j+window_radius+1, width)):
                for ii in xrange(max(i-window_radius, 0), min(i+window_radius+1, height)):
                    if ii != i or jj != j:
                        d = image[i, j, :] - image[ii, jj, :]
                        s = np.sum(d**2) 
                        gval = 1.0 - np.sqrt(s) / np.sqrt(3)
                        attack_strength = gval * state[ii, jj, 1]
                        if attack_strength > defense_strength:
                            defense_strength = attack_strength
                            winning_colony = state[ii, jj, 0]
            return [winning_colony, defense_strength]
    return parakeet.imap(attack, (height, width))

from compare_perf import compare_perf 

import time 
t = time.time()
growcut_python(image, state, state_next, window_radius)
t2 = time.time()
print "Python time", t2 - t
compare_perf(growcut_par, [image, state, window_radius], suppress_output = False, propagate_exceptions = True)


Example #30
0
    ''' Computes the number of iterations `n` such that 
        |z_n| > `lim`, where `z_n = z_{n-1}**2 + c`.
    '''
    count = 0
    while ((zr*zr + zi*zi) < (lim*lim)) and count < cutoff:
        zr, zi = zr * zr - zi * zi + cr, 2 * zr * zi + ci
        count += 1
    return count

def julia_loops(cr, ci, N, bound=1.5, lim=1000., cutoff=1e6):
    ''' Pure Python calculation of the Julia set for a given `c`.  No NumPy
        array operations are used.
    '''
    julia = np.empty((N, N), dtype=np.uint32)
    grid_x = np.linspace(-bound, bound, N)
    for i, x in enumerate(grid_x):
        for j, y in enumerate(grid_x):
            julia[i,j] = kernel(x, y, cr, ci, lim, cutoff=cutoff)
    return julia

from compare_perf import compare_perf 
cr=0.285
ci=0.01
N=100
bound = 1.5 
lim = 1000
cutoff = 1e6 

compare_perf(julia_loops, [cr, ci, N, bound, lim, cutoff])

Example #31
0
from parakeet import jit, config, c_backend 
 


def covariance(x,y):
  return ((x-x.mean()) * (y-y.mean())).mean()

def fit_simple_regression(x,y):
  slope = covariance(x,y) / covariance(x,x)
  offset = y.mean() - slope * x.mean() 
  return slope, offset

import numpy as np 

N = 2*10**7
x = np.random.randn(N).astype('float64')
slope = 903.29
offset = 102.1
y = slope * x + offset



from compare_perf import compare_perf 
compare_perf(fit_simple_regression, (x,y), numba=True)
Example #32
0
        pzi = points[i, 2]
        total = 0.0
        for j in xrange(n_weights):
            weight_j = weights[j]
            xj = pos[j, 0]
            yj = pos[j, 1]
            zj = pos[j, 2]
            dx = pxi - pos[j, 0]
            dy = pyi - pos[j, 1]
            dz = pzi - pos[j, 2]
            dr = 1.0 / np.sqrt(dx * dx + dy * dy + dz * dz)
            total += weight_j * dr
            sum_array3d[i, 0] += weight_j * dx
            sum_array3d[i, 1] += weight_j * dy
            sum_array3d[i, 2] += weight_j * dz
        return total

    sum_array = np.array([compute(i) for i in xrange(n_points)])
    return sum_array, sum_array3d


n_points = 200
n_weights = 400
pos = np.random.randn(n_weights, 3)
weights = np.random.randn(n_weights)
points = np.random.randn(n_points, 3)

from compare_perf import compare_perf

compare_perf(summation, [pos, weights, points])
Example #33
0
        for offset in np.ndindex(wsize):
            neighbor_idx = tuple(
                mode(p, o - w / 2, w) for (p, o, w) in zip(pos, offset, wsize))
            result[pos] &= (data[neighbor_idx] <= myval)
    return result


@parakeet.jit
def parakeet_local_maxima(data, wsize, mode=wrap):
    def is_max(pos):
        def is_smaller_neighbor(offset):
            neighbor_idx = tuple(
                mode(p, o - w / 2, w) for (p, o, w) in zip(pos, offset, wsize))
            return data[neighbor_idx] <= data[pos]

        return np.all(parakeet.imap(is_smaller_neighbor, wsize))

    return parakeet.imap(is_max, data.shape)


# not sure how to get numba to auto-jit size generic code
# get error: "FAILED with KeyError 'sized_pointer(npy_intp, 4)'"
#import numba
#numba_local_maxima = numba.autojit(python_local_maxima)

from compare_perf import compare_perf

shape = (30, 30, 20, 12)
x = np.random.randn(*shape)
compare_perf(local_maxima, [x, shape])
Example #34
0
from parakeet import jit, config, c_backend


def covariance(x, y):
    return ((x - x.mean()) * (y - y.mean())).mean()


def fit_simple_regression(x, y):
    slope = covariance(x, y) / covariance(x, x)
    offset = y.mean() - slope * x.mean()
    return slope, offset


import numpy as np

N = 2 * 10**7
x = np.random.randn(N).astype('float64')
slope = 903.29
offset = 102.1
y = slope * x + offset

from compare_perf import compare_perf
compare_perf(fit_simple_regression, (x, y), numba=True)
Example #35
0
        for xidx in xrange(ndims):
          curr_dist += (x[xidx] - centroid[xidx])**2
        if curr_dist < min_dist:
          min_dist = curr_dist
          min_idx = cidx
      A[i] = min_idx
    # recompute the clusters by averaging data points 
    # assigned to them 
    for cidx in xrange(k):
      # reset centroids
      for dim_idx in xrange(ndims):
        C[cidx, dim_idx] = 0
      # add each data point only to its assigned centroid
      cluster_count = 0
      for i in xrange(n):
        if A[i] == cidx:
          C[cidx, :] += X[i, :]
          cluster_count += 1
      C[cidx, :] /= cluster_count 
  return C      

n, d = 10**4, 50
X = np.random.randn(n,d)
k = 25

from compare_perf import compare_perf

compare_perf(kmeans_comprehensions, [X, k, 5],cpython=False)

compare_perf(kmeans_loops, [X, k, 5], cpython=True)
Example #36
0
    for i in range(steps):
        previous_grid[:, :] = old_grid
        old_grid[:, :] = grid 
        for x in range(l_x):
            for y in range(l_y):
                grid[x,y] = 0.0
                if x + 1 < l_x:
                    grid[x,y] += old_grid[x+1,y]
                if 0 < x-1 and x - 1 < l_x:
                    grid[x,y] += old_grid[x-1,y]
                if y+1 < l_y:
                    grid[x,y] += old_grid[x,y+1]
                if 0 < y-1 and y-1 < l_y:
                    grid[x,y] += old_grid[x,y-1]
                grid[x,y] /= 2.0
                grid[x,y] -= previous_grid[x,y]
    return grid

N = 1000
steps = 20 
input_grid = np.random.randn(N,N).astype('float64')

import parakeet
parakeet.config.print_generated_code = True 

from compare_perf import compare_perf 
compare_perf(fdtd, [input_grid, steps], backends = ('c', 'openmp', 'cuda'))


Example #37
0
def harris(I):
    m, n = I.shape
    dx = (I[1:, :] - I[:m - 1, :])[:, 1:]
    dy = (I[:, 1:] - I[:, :n - 1])[1:, :]

    #
    #   At each point we build a matrix
    #   of derivative products
    #   M =
    #   | A = dx^2     C = dx * dy |
    #   | C = dy * dx  B = dy * dy |
    #
    #   and the score at that point is:
    #      det(M) - k*trace(M)^2
    #
    A = dx * dx
    B = dy * dy
    C = dx * dy
    tr = A + B
    det = A * B - C * C
    k = np.float32(0.05)
    return det - k * tr * tr


from compare_perf import compare_perf
m, n = 2400, 2400
dtype = 'float32'
I = (np.random.randn(m, n)**2).astype(dtype)

compare_perf(harris, [I], propagate_exceptions=True)
Example #38
0
    l_x = grid.shape[0]
    l_y = grid.shape[1]

    for i in range(steps):
        previous_grid[:, :] = old_grid
        old_grid[:, :] = grid 
        for x in range(l_x):
            for y in range(l_y):
                grid[x,y] = 0.0
                if x + 1 < l_x:
                    grid[x,y] += old_grid[x+1,y]
                if 0 < x-1 and x - 1 < l_x:
                    grid[x,y] += old_grid[x-1,y]
                if y+1 < l_y:
                    grid[x,y] += old_grid[x,y+1]
                if 0 < y-1 and y-1 < l_y:
                    grid[x,y] += old_grid[x,y-1]

                grid[x,y] /= 2.0
                grid[x,y] -= previous_grid[x,y]

    return grid

N = 1000
steps = 20 
input_grid = np.random.randn(N,N).astype('float32')

from compare_perf import compare_perf 
compare_perf(fdtd, [input_grid, steps], backends = ('c', 'openmp', 'cuda'))

Example #39
0
    """
  Accepted response on stack overflow by phillip
  """
    gg = np.outer(g, g)
    gggg = np.outer(gg, gg).reshape(4 * g.shape)
    axes = ((0, 2, 4, 6), (0, 1, 2, 3))
    return np.tensordot(gggg, T, axes)


T = np.random.randn(n, n, n, n)
g = np.random.randn(n, n)

from compare_perf import compare_perf

compare_perf(
    rotT_loops, [T, g], extra={"numpy_tensordot": rotT_numpy}, numba=False, backends=("c", "openmp"), cpython=True
)


def rotT_par(T, g):
    def compute_elt(i, j, k, l):
        total = 0.0
        for ii in range(n):
            for jj in range(n):
                for kk in range(n):
                    for ll in range(n):
                        gg = g[ii, i] * g[jj, j] * g[kk, k] * g[ll, l]
                        total += gg * T[ii, jj, kk, ll]
        return total

    return np.array(
Example #40
0
                u[i, j] = mu * (temp_u[i + 1, j] + temp_u[i - 1, j] +
                                temp_u[i, j + 1] + temp_u[i, j - 1] -
                                4 * temp_u[i, j])

        temp = u
        u = temp_u
        temp_u = temp

    return u

def diffuse_array_expressions(iter_num):
    u = np.zeros((Lx, Ly), dtype=np.float64)
    temp_u = np.zeros_like(u)
    temp_u[Lx / 2, Ly / 2] = 1000.0

    for i in range(iter_num):
        u[1:-1, 1:-1] = mu * (temp_u[2:, 1:-1] + temp_u[:-2, 1:-1] +
                              temp_u[1:-1, 2:] + temp_u[1:-1, :-2] -
                              4 * temp_u[1:-1, 1:-1])

        temp = u
        u = temp_u
        temp_u = temp
    return u


from compare_perf import compare_perf 

compare_perf(diffuse_loops, [N], numba=True)
compare_perf( diffuse_array_expressions, [N], numba =True)
Example #41
0
    pxi = points[i, 0]
    pyi = points[i, 1]
    pzi = points[i, 2]
    total = 0.0
    for j in xrange(n_weights):
      weight_j = weights[j]
      xj = pos[j,0]
      yj = pos[j,1]
      zj = pos[j,2]
      dx = pxi - pos[j, 0]
      dy = pyi - pos[j, 1]
      dz = pzi - pos[j, 2]
      dr = 1.0/np.sqrt(dx*dx + dy*dy + dz*dz)
      total += weight_j * dr
      sum_array3d[i,0] += weight_j * dx
      sum_array3d[i,1] += weight_j * dy
      sum_array3d[i,2] += weight_j * dz
    return total 
  sum_array = np.array([compute(i) for i in xrange(n_points)])
  return sum_array, sum_array3d

n_points = 200
n_weights = 400
pos = np.random.randn(n_weights, 3)
weights = np.random.randn(n_weights)
points = np.random.randn(n_points, 3)

from compare_perf import compare_perf 

compare_perf(summation, [pos, weights, points])
Example #42
0
# Simple convolution of 5x5 patches from a given array x
# by a 5x5 array of filter weights


def conv_3x3_trim_loops(image, weights):
    result = np.zeros_like(image)
    for i in xrange(1, x.shape[0] - 1):
        for j in xrange(1, x.shape[1] - 1):
            for ii in xrange(3):
                for jj in xrange(3):
                    result[i, j] += image[i - ii + 1, j - jj + 1] * weights[ii,
                                                                            jj]
    return result


compare_perf(conv_3x3_trim_loops, [x, w])

import parakeet


def conv_3x3_imap(image, weights):
    def compute((i, j)):
        total = np.float32(0.0)
        for ii in xrange(3):
            for jj in xrange(3):
                total += image[i + ii - 1, j + jj - 1] * weights[ii, jj]
        return total

    w, h = image.shape
    return parakeet.imap(compute, (w - 2, h - 2))
Example #43
0
import numpy as np 
from compare_perf import compare_perf

# Simple convolution of 3x3 patches from a given array x
# by a 3x3 array of filter weights
 
def conv_3x3_trim(x, weights):
  return np.array([[(x[i-1:i+2, j-1:j+2]*weights).sum() 
                    for j in xrange(1, x.shape[1] -2)]
                    for i in xrange(1, x.shape[0] -2)])
 

x = np.random.randn(1200,1200).astype('float32')
w = np.random.randn(3,3).astype('float32')
compare_perf(conv_3x3_trim, [x,w])


w = np.random.randn(3,3).astype('float32')
# Simple convolution of 5x5 patches from a given array x
# by a 5x5 array of filter weights
 
def conv_3x3_trim_loops(image, weights):
  result = np.zeros_like(image)
  for i in xrange(1,x.shape[0]-1):
    for j in xrange(1,x.shape[1]-1):
      for ii in xrange(3): 
        for jj in xrange(3):
          result[i,j] += image[i-ii+1, j-jj+1] * weights[ii, jj] 
  return result

Example #44
0
                                temp_u[i, j + 1] + temp_u[i, j - 1] -
                                4 * temp_u[i, j])

        temp = u
        u = temp_u
        temp_u = temp

    return u


def diffuse_array_expressions(iter_num):
    u = np.zeros((Lx, Ly), dtype=np.float64)
    temp_u = np.zeros_like(u)
    temp_u[Lx / 2, Ly / 2] = 1000.0

    for i in range(iter_num):
        u[1:-1,
          1:-1] = mu * (temp_u[2:, 1:-1] + temp_u[:-2, 1:-1] + temp_u[1:-1, 2:]
                        + temp_u[1:-1, :-2] - 4 * temp_u[1:-1, 1:-1])

        temp = u
        u = temp_u
        temp_u = temp
    return u


from compare_perf import compare_perf

compare_perf(diffuse_loops, [N], numba=True)
compare_perf(diffuse_array_expressions, [N], numba=True)
Example #45
0
    return distance_matrix


def arc_distance_numpy_broadcast(a, b):
    """
    Calculates the pairwise arc distance between all points in vector a and b.
    """
    theta_1 = a[:, 0][:, None]
    theta_2 = b[:, 0][None, :]
    phi_1 = a[:, 1][:, None]
    phi_2 = b[:, 1][None, :]

    temp = (np.sin((theta_2 - theta_1) / 2)**2
            +
            np.cos(theta_1) * np.cos(theta_2)
            * np.sin((phi_2 - phi_1) / 2)**2)
    distance_matrix = 2 * (np.arctan2(np.sqrt(temp), np.sqrt(1 - temp)))
    return distance_matrix

from compare_perf import compare_perf 

n = 1000 
import numpy as np
a = np.random.rand(n, 2)
b = np.random.rand(n, 2)

compare_perf(arc_distance_python_nested_for_loops, [a,b])
compare_perf(arc_distance_numpy_broadcast, [a,b])
compare_perf(arc_distance_numpy_tile, [a,b])