Ejemplo n.º 1
0
def gen_calc_variance_fullintegral(code, out_reg, position, integral_ptr, sq_integral_ptr, haar_size, block_size):
	''' Variance calculation for full integral image.
	'''
	# calculate split shape
	width, height = block_size
	haar_width, haar_height = haar_size
	
	# in order to produce same results as the python VJ implementation, x,y: [0, w], [0, h]
	shape = (0, 0, haar_width + 1, haar_height + 1) 

	with scoped_alloc(code, 1) as tmp:
		# calculate sum of int_im 
		# out_reg = val_sum = integral_sum(integral, xx, yy, w, h)
		for x in gen_fullintegral_sum(code, out_reg, position, shape, integral_ptr, block_size):
			yield x

		# calculate sum of sq_int_im
		# tmp = val_sum = integral_sum(sq_integral, xx, yy, w, h)
		for x in gen_fullintegral_sum(code, tmp, position, shape, sq_integral_ptr, block_size):
			yield x

		# calculate variance
		with scoped_alloc(code, 1) as area_r:
			yield Imm(area_r, haar_width*haar_height) # area
			yield Mul(tmp, tmp, area_r)     # sq_acc = sq_integral_sum*(haar_width*haar_height) 
		yield Mul(out_reg, out_reg, out_reg)    # out_reg = integral_sum^2
		yield Sub(out_reg, tmp, out_reg)    # out_reg = sq_integral_sum*(haar_width*haar_height) - integral_sum^2 
		with scoped_alloc(code, 1) as const_0:
			yield Imm(const_0, 0.)
			yield Cmp(out_reg, const_0)                # comp out_reg - 0
		yield Sqrt(out_reg, out_reg, cond='GT')   # out_reg = sqrt(sq_integral_sum*(haar_width*haar_height) - integral_sum^2)
		yield Imm(out_reg, 1, cond='LE')     # if out_reg <= 0: variance = 1
Ejemplo n.º 2
0
def gen_gather_local_max(code, block_size, args):
	''' Gather local maximum from mask code generation. '''
	f = args['filter']
	rows, cols = block_size
	frows, fcols = f.size()
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0

	for i in xrange(rows):
		for j in xrange(cols):
			with scoped_alloc(code, 1) as max_v:
				yield Imm(max_v, -float('inf'))
				for ii in xrange(frows):
					for jj in xrange(fcols):
						if not f.mask[ii][jj]: continue # skip if not enabled
						iii = i + ii - hfrow
						jjj = j + jj - hfcol
						with scoped_alloc(code, 1) as v:
							for instr in load_mem_value(code, in_ptr, (jjj, iii), v, block_size):
								yield instr
							yield Cmp(v, max_v)
							yield Mov(max_v, v, cond='GT')
				yield MemWImm(out_ptr+i*cols + j, max_v)
Ejemplo n.º 3
0
def gen_threshold(code, block_size, args):
    th = args['th']
    with scoped_alloc(code, 6) as (out_ptr_r, th_r, const_1, const_255, const_0, in_ptr_r):
        # out pointer
        yield Imm(out_ptr_r, block_size[0]*block_size[1]) 

	# constants
        yield Imm(th_r, th) 
        yield Imm(const_1, 1) 
        yield Imm(const_255, 255) 
        yield Imm(const_0, 0) 

        # in pointer
        yield Mov(in_ptr_r, const_0) 
    
        for i in xrange(block_size[0]):
            for j in xrange(block_size[1]):
                with scoped_alloc(code, 1) as tmp:
                    yield MemR(tmp, in_ptr_r) 

                    yield Cmp(tmp, th_r)  
                    yield Mov(tmp, const_0)  
                    yield Mov(tmp, const_255, 'GT')   

                    yield MemW(out_ptr_r, tmp) 
                    if(not (j == block_size[1]-1 and i == block_size[0]-1)):
                        yield Add(in_ptr_r, in_ptr_r, const_1)  
                        yield Add(out_ptr_r, out_ptr_r, const_1)
Ejemplo n.º 4
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 1) as acc:
			yield Xor(acc, acc, acc)
			with scoped_alloc(code, 1) as m:
				for i in xrange(3):
					yield MemRImm(m, 2)
					yield Add(acc, acc, m)
					yield Sub(acc, acc, m)
					yield MemRImm(m, i)
					yield Add(acc, acc, m)
					yield MemRImm(m, i)
					yield Sub(acc, acc, m)
				yield Add(m, m, m)
Ejemplo n.º 5
0
def gen_abs_value(code, block_size, args):
	''' Generate element-wise absolute value of a buffer. '''
	rows, cols = block_size
	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0

	with scoped_alloc(code, 1) as const0:
		yield Imm(const0, 0)
		for i in xrange(rows):
			for j in xrange(cols):
				addr = i*cols + j
				with scoped_alloc(code, 1) as tmp:
					yield MemRImm(tmp, in_ptr + addr)
					yield Cmp(tmp, const0)
					yield Neg(tmp, tmp, cond='LT')
					yield MemWImm(out_ptr + addr, tmp)
Ejemplo n.º 6
0
def gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_array_size, block_size):
	width, height = block_size
	pe_width, pe_height = pe_array_size
	for x in gen_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, block_size):
		yield x
	for buffer_ptr in [integral_ptr, sq_integral_ptr]:
		with scoped_alloc(code, 1) as acc:
			# horizontal propagation
			for row in xrange(height):
				yield MemRImm(code.out, buffer_ptr+(row+1)*width-1)
				for bid in xrange(pe_width-1):
					for x in xrange(width):
						ptr = buffer_ptr + row*width + x
						yield MemRImm(acc, ptr)
						yield Add(acc, acc, code.west)
						yield MemWImm(ptr, acc)
					yield Mov(code.out, code.west)

			# vertical propagation
			for col in xrange(width):
				yield MemRImm(code.out, buffer_ptr + width*(height-1) + col)
				for bid in xrange(pe_height-1):
					for y in xrange(height):
						ptr = buffer_ptr + y*width + col
						yield MemRImm(acc, ptr)
						yield Add(acc, acc, code.north)
						yield MemWImm(ptr, acc)
					yield Mov(code.out, code.north)
Ejemplo n.º 7
0
def map_neighborhood_to_pixel(code, in_ptr, out_ptr, neighborhood, pixel_op, args, block_size):
    """ Apply neigborhood to pixel operations. """

    bwidth, bheight = block_size
    nheight, nwidth = len(neighborhood[0]), len(neighborhood)
    assert nheight % 2 != 0 and nwidth % 2 != 0  # mask size must be odd
    h_nheight = nheight // 2
    h_nwidth = nwidth // 2

    def process_pixel(code, in_ptr, pos, acc, neigborhood, pixel_op, args, block_size):
        j, i = pos
        for ii, row in enumerate(neighborhood):
            for jj, m in enumerate(row):
                if m:  # works implicitly for booleans and coefficients
                    apos = (j + jj - h_nwidth, i + ii - h_nheight)
                    with scoped_alloc(code, 1) as v:
                        for x in load_mem_value(code, in_ptr, apos, v, block_size):
                            yield x
                        for x in pixel_op(code, m, v, acc, args, block_size):
                            yield x

    for i in xrange(bheight):
        for j in xrange(bwidth):
            pos = (j, i)
            with scoped_alloc(code, 1) as acc:
                # XXX apply assignment-instead-of-accum-on-first-iteration optimalisation
                yield Xor(acc, acc, acc)
                for x in process_pixel(code, in_ptr, pos, acc, neighborhood, pixel_op, args, block_size):
                    yield x
                yield MemWImm(out_ptr + bwidth * i + j, acc)
Ejemplo n.º 8
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (a, b):
			yield MemRImm(a, 3)
			yield MemRImm(b, 4)
			yield Add(a, a, b)
			yield MemWImm(3, a)
			yield MemRImm(a, 3)
Ejemplo n.º 9
0
	def pixel_op(code, pin, pout, args, block_size):
		th = args['th']
		with scoped_alloc(code, 3) as (th_r, v, const_255):
			yield Imm(th_r, th)
			yield Cmp(pin, th_r)
			yield Imm(pout, 255, cond='GT')
			yield Xor(pout, pout, pout, cond='LE')
Ejemplo n.º 10
0
def gen_bbs(code, block_size, args):
    th = args['th']
    alpha = args['alpha']
    width, height = block_size
    block_mem_size = width * height
    # pointers:
    src_ptr = 0
    res_ptr = block_mem_size
    back_ptr = 2*block_mem_size

    with scoped_alloc(code, 3) as (const_alpha, const_1_m_alpha, const_th):
        # setup parameters
        yield Imm(const_alpha, alpha)
        yield Imm(const_1_m_alpha, 1-alpha)
        yield Imm(const_th, th)

        # regs:
        # ip_n     : I_p[n]
        # ib_n_1   : Ibackground_p[n-1]
        # ib_n     : Ibackground_p[n]
        # abbsdiff : abs(I_p[n] - Ibackground_p[n-1]

        for i in xrange(block_mem_size):
            with scoped_alloc(code, 5) as (ip_n, ib_n_1, ib_n, absdiff, res):
                # I_background(n) = I*alpha + I_background(n-1)*(1-alpha)
                yield MemRImm(ip_n, src_ptr + i)
                yield Mul(ib_n, ip_n, const_alpha)
                yield MemRImm(ib_n_1, back_ptr + i)
                yield Mul(ib_n_1, ib_n_1, const_1_m_alpha)
                yield Add(ib_n, ib_n, ib_n_1)
                yield MemWImm(back_ptr + i, ib_n)

                # I_res = abs(I - I_background) > th
                # equivalent to:
                # if I >= I_background:
                #   I_res = (I - I_background) > th
                # else:
                #   I_res = (I_background - I) > th
                yield Cmp(ip_n, ib_n)
                yield Sub(absdiff, ip_n, ib_n, cond='GE')
                yield Sub(absdiff, ib_n, ip_n, cond='LT')
                yield Cmp(absdiff, const_th)
                yield Imm(res, 0)
                yield Imm(res, 255, cond='GT')
                yield MemWImm(res_ptr + i, res)
Ejemplo n.º 11
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 4) as (a, b, c, d):
			yield Imm(a, 1)
			yield Imm(b, 2)
			yield Cmp(a, b)
			yield Inv(c, a, cond='LE')
			yield Imm(a, 3) # to check if a is captured before new assignment
			yield Mov(c, b, cond='GT')
			yield Mov(d, c)
Ejemplo n.º 12
0
	def codegen(code, block_size, args):
		for x in xrange(4):
			with scoped_alloc(code, 3) as (a, b, c):
				yield Imm(b, x*4)
				yield Mov(a, b)
				yield Imm(a, x)
				yield Mov(b, a)
				yield Add(a, a, b)
				yield Sub(c, a, b)
				yield Mul(c, c, c)
Ejemplo n.º 13
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (acc, imm_r):
			yield Xor(acc, acc, acc)
			for i in xrange(3):
				yield Imm(imm_r, 2)
				yield Add(acc, acc, imm_r)
				yield Sub(acc, acc, imm_r)
				yield Imm(imm_r, i)
				yield Add(acc, acc, imm_r)
				yield Sub(acc, acc, imm_r)
Ejemplo n.º 14
0
def gen_copy_to_out(code, block_size, args):
    with scoped_alloc(code, 3) as (out_ptr_r, in_ptr_r, const_1):
        # init pointer to output memory
        yield Imm(out_ptr_r, block_size[0]*block_size[1]) 

	# init src ptr
	yield Xor(in_ptr_r, in_ptr_r, in_ptr_r)

        # inc value
        yield Imm(const_1, 1) 

        for i in xrange(block_size[0]):
            for j in xrange(block_size[1]):
                with scoped_alloc(code, 1) as tmp:
                    yield MemR(tmp, in_ptr_r) 
                    yield MemW(out_ptr_r, tmp) 
    
                    yield Add(in_ptr_r, in_ptr_r, const_1)
                    yield Add(out_ptr_r, out_ptr_r, const_1)
Ejemplo n.º 15
0
def gen_integral_sum(code, out_reg, position, shape, ptr, block_size):
	''' Gen integral sum code.
	this code assumes that each shape is in a single block
	maximum one block away from the originating block

	note that in contrast with the python implementation,
	a block has the ranges: x[0,w[, y[0,h[
	so width and height of the shape need to be incremented by one
	to be compatible with the violajones sum function
	'''
	px, py = position
	x, y, w, h = shape
	width, height = block_size
	xx = px + x
	yy = py + y
	# val_sum [r4], tmp [r5]
	# to handle values outside the block range:
	# first detect cases and adapt the xx,yy coordinates
	# calculate the value as usual
	# copy the value to the correct block
	copy_from_right = False
	copy_from_below = False
	if not ((xx+w-1) < width):
		copy_from_right = True
		xx -= width
	if not ((yy+h-1) < height):
		copy_from_below = True
		yy -= height

	# v1 = im[yy    ][xx    ]
	# v2 = im[yy    ][xx+w-1]
	# v3 = im[yy+h-1][xx    ]
	# v4 = im[yy+h-1][xx+w-1]
	# val_sum =  v1 - v2 - v3 + v4

	with scoped_alloc(code, 1) as tmp:
		yield MemRImm(out_reg, ptr + yy * width + xx) # r = v1
		yield MemRImm(tmp, ptr +  yy    * width +   (xx+w-1)) # v2
		yield Sub(out_reg, out_reg, tmp) # r = v1 - v2
		yield MemRImm(tmp, ptr + (yy+h-1) * width +  xx     ) # v3
		yield Sub(out_reg, out_reg, tmp) # r = v1 - v2 - v3
		yield MemRImm(tmp, ptr + (yy+h-1) * width + (xx+w-1)) # v4
		yield Add(out_reg, out_reg, tmp) # r = v1 - v2 - v3 + v4

	# now handle the shapes out of PE block
	if copy_from_right and copy_from_below:
		yield Mov(code.out, out_reg)
		yield Mov(code.out, code.east)
		yield Mov(out_reg, code.south)
	elif copy_from_right:
		yield Mov(code.out, out_reg)
		yield Mov(out_reg, code.east)
	elif copy_from_below:
		yield Mov(code.out, out_reg)
		yield Mov(out_reg, code.south)
Ejemplo n.º 16
0
def gen_calc_variance(code, out_reg, position, integral_ptr, sq_integral_ptr, haar_size, block_size):
	''' Variance calculation.
	'''
	# calculte split shape
	width, height = block_size
	haar_width, haar_height = haar_size
	
	# in order to produce same results as the python VJ implementation, x,y: [0, w], [0, h]
	shape = (0, 0, haar_width + 1, haar_height + 1) 
	shapes = split_shape_across_blocks(shape, position, block_size)

	with scoped_alloc(code, 2) as (int_acc, sq_acc):
		# int_acc: integral sum accum
		yield Xor(int_acc, int_acc, int_acc)
		# sq_acc: square integral sum accum
		yield Xor(sq_acc, sq_acc, sq_acc)
		for i, s in enumerate(shapes):
			# calculate sum of int_im 
			with scoped_alloc(code, 1) as sum_out:
				for x in gen_integral_sum(code, sum_out, position, s, integral_ptr, block_size):
					if i > 0: code.tag_com_overhead_instr(x)
					yield x
				yield Add(int_acc, int_acc, sum_out)

			# calculate sum of sq_int_im
			with scoped_alloc(code, 1) as sum_out:
				for x in gen_integral_sum(code, sum_out, position, s, sq_integral_ptr, block_size):
					if i > 0: code.tag_com_overhead_instr(x)
					yield x
				yield Add(sq_acc, sq_acc, sum_out)

		# calculate variance
		with scoped_alloc(code, 1) as area_r:
			yield Imm(area_r, haar_width*haar_height) # area
			yield Mul(sq_acc, sq_acc, area_r)     # sq_acc = sq_integral_sum*(haar_width*haar_height) 
		yield Mul(int_acc, int_acc, int_acc)    # int_acc = integral_sum^2
		yield Sub(int_acc, sq_acc, int_acc)    # int_acc = sq_integral_sum*(haar_width*haar_height) - integral_sum^2 
		with scoped_alloc(code, 1) as const_0:
			yield Imm(const_0, 0.)
			yield Cmp(int_acc, const_0)                # comp int_acc - 0
		yield Sqrt(out_reg, int_acc, cond='GT')   # r7 = sqrt(sq_integral_sum*(haar_width*haar_height) - integral_sum^2)
		yield Imm(out_reg, 1., cond='LE')          # if int_acc <= 0: variance = 1
Ejemplo n.º 17
0
def map_pixel_to_pixel(code, in_ptr, out_ptr, pixel_op, args, block_size):
    """ Apply one to one pixel operations. """
    bwidth, bheight = block_size
    for i in xrange(bheight):
        for j in xrange(bwidth):
            off = bwidth * i + j
            with scoped_alloc(code, 2) as (in_reg, out_reg):
                yield MemRImm(in_reg, in_ptr + off)
                for x in pixel_op(code, in_reg, out_reg, args, block_size):
                    yield x
                yield MemWImm(out_ptr + off, out_reg)
Ejemplo n.º 18
0
	def pixel_op(code, pos, in_ptr, out_ptr, args, block_size):
		''' Simple image shift implementation. '''
		offset = args['offset']
		x, y = pos
		width, height = block_size
		c_in_ptr = in_ptr + width*y + (x + offset)
		c_out_ptr = out_ptr + width*y + x
		with scoped_alloc(code, 1) as v:
			for instr in load_mem_value(code, c_in_ptr, pos, v, block_size):
				yield instr
			yield MemWImm(c_out_ptr, v)
Ejemplo n.º 19
0
 def process_pixel(code, in_ptr, pos, acc, neigborhood, pixel_op, args, block_size):
     j, i = pos
     for ii, row in enumerate(neighborhood):
         for jj, m in enumerate(row):
             if m:  # works implicitly for booleans and coefficients
                 apos = (j + jj - h_nwidth, i + ii - h_nheight)
                 with scoped_alloc(code, 1) as v:
                     for x in load_mem_value(code, in_ptr, apos, v, block_size):
                         yield x
                     for x in pixel_op(code, m, v, acc, args, block_size):
                         yield x
Ejemplo n.º 20
0
def gen_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, block_size):
	''' Generate instructions integral image of the image and squared image calculation.'''
	width, height = block_size
	with scoped_alloc(code, 2) as (acc, tmp):
		for i in xrange(height):
			for j in xrange(width):
				ptr = width*i + j
				# r2: acc
				# r3: prev addr
				# r4: tmp val
				if j > 0:
					#int_im[i][j] += (float(image[i][j]) + float(int_im[i][j-1]))
					yield MemRImm(tmp, src_ptr + ptr)           # tmp = image[i][j]
					yield MemRImm(acc, integral_ptr + ptr -1)        # acc = int_im[i][j-1]
					yield Add(acc, acc, tmp) # acc = int_im[i][j-1] + image[i][j]
					yield MemWImm(integral_ptr + ptr, acc)				

					#sq_int_im[i][j] += (float(image[i][j]*float_image[i][j]) + float(sq_int_im[i][j-1]))
					yield Mul(tmp, tmp, tmp) # tmp = image[i][j] * image[i][j]
					yield MemRImm(acc, sq_integral_ptr + ptr -1)           # acc = sq_int_im[i][j-1]
					yield Add(acc, acc, tmp) # acc = (image[i][j]*image[i][j]) + sq_int_im[i][j-1]

					yield MemWImm(sq_integral_ptr + ptr, acc)

				else:
					#int_im[i][j] = float(image[i][j])
					yield MemRImm(acc, src_ptr + ptr)
					yield MemWImm(integral_ptr + ptr, acc)

					#sq_int_im[i][j] = float(image[i][j]*image[i][j])
					yield Mul(acc, acc, acc)
					yield MemWImm(sq_integral_ptr + ptr, acc)

		for j in xrange(width):
			for i in xrange(height):
				if i > 0:
					#int_im[i][j] += float(int_im[i-1][j])
					int_ptr_i_j = integral_ptr + i*width + j
					yield MemRImm(acc, int_ptr_i_j)
					int_ptr_im1_j = integral_ptr + (i-1)*width + j
					yield MemRImm(tmp, int_ptr_im1_j)

					yield Add(acc, acc, tmp)
					yield MemWImm(int_ptr_i_j, acc)

					#sq_int_im[i][j] += float(sq_int_im[i-1][j])
					sq_int_ptr_i_j = sq_integral_ptr + i*width + j
					yield MemRImm(acc, sq_int_ptr_i_j)
					sq_int_ptr_im1_j = sq_integral_ptr + (i-1)*width + j
					yield MemRImm(tmp, sq_int_ptr_im1_j)

					yield Add(acc, acc, tmp)
					yield MemWImm(sq_int_ptr_i_j, acc)
Ejemplo n.º 21
0
def gen_calc_planarity_inlined(code, block_size, args):
	''' Optimised version by manually inlining all code. '''
	filterbank = args['filterbank']
	f = filterbank.filters[0]
	rows, cols = block_size
	frows, fcols = f.size()
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0
	buffer_ptr = rows*cols*2
	assert buffer_ptr != in_ptr
	assert buffer_ptr != out_ptr

	for filter_nr, f in enumerate(filterbank.filters):
		# convolution + abs
		for i in xrange(rows):
			for j in xrange(cols):
				with scoped_alloc(code, 1) as acc:
					# convolution
					yield Xor(acc, acc, acc)
					for x, y, coeff in f.coefficients:
						ii = i + y - hfrow
						jj = j + x - hfcol
						with scoped_alloc(code, 2) as (coeff_reg, v):
							yield Imm(coeff_reg, coeff)
							for instr in load_mem_value(code, in_ptr, (jj, ii), v, block_size):
								yield instr
							yield Mul(v, v, coeff_reg)
							yield Add(acc, acc, v)

					# take max
					with scoped_alloc(code, 1) as const0:
						yield Imm(const0, 0)
						yield Cmp(acc, const0)
					yield Neg(acc, acc, cond='LT')
					yield MemWImm(buffer_ptr+i*cols + j, acc)
		# gather
		for i in xrange(rows):
			for j in xrange(cols):
				with scoped_alloc(code, 1) as max_v:
					# local max
					yield Imm(max_v, -float('inf'))
					for ii in xrange(frows):
						for jj in xrange(fcols):
							if not f.mask[ii][jj]: continue # skip if not enabled
							iii = i + ii - hfrow
							jjj = j + jj - hfcol
							with scoped_alloc(code, 1) as v:
								for instr in load_mem_value(code, buffer_ptr, (jjj, iii), v, block_size):
									yield instr
								yield Cmp(v, max_v)
								yield Mov(max_v, v, cond='GT')
					# global max
					if filter_nr != 0:
						with scoped_alloc(code, 1) as old_v:
							yield MemRImm(old_v, out_ptr+i*cols+j)
							yield Cmp(old_v, max_v)
							yield Mov(max_v, old_v, cond='GT')
					yield MemWImm(out_ptr+i*cols + j, max_v)
Ejemplo n.º 22
0
	def test_codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (a, b):
			yield Imm(b, 3)
			yield Mov(a, b)
			with scoped_alloc(code, 1) as c:
				for x in xrange(3):
					with scoped_alloc(code, 2) as (e, f):
						yield Imm(f, 2)
						yield Add(e, b, f)
					with scoped_alloc(code, 2) as (g, h):
						yield Imm(g, 1)
						yield Add(c, a, g)
						yield Add(c, a, g)
						yield Imm(h, 1)
						yield Mov(c, h)
						yield Mov(c, h)
			yield Xor(a, a, a)
			with scoped_alloc(code, 1) as const_1:
				yield Imm(const_1, 1)
				yield Mov(a, const_1)
			yield Mov(code.out, a)
			#yield Mov(b, code.east)
			yield Mov(b, a)
Ejemplo n.º 23
0
def gen_apply_sparse_filter(code, block_size, args):
	''' Apply sparse filter code generation. '''
	f = args['filter']
	rows, cols = block_size
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0

	for i in xrange(rows):
		for j in xrange(cols):
			with scoped_alloc(code, 1) as acc:
				yield Xor(acc, acc, acc)
				for x, y, coeff in f.coefficients:
					ii = i + y - hfrow
					jj = j + x - hfcol
					with scoped_alloc(code, 2) as (coeff_reg, v):
						yield Imm(coeff_reg, coeff)
						for instr in load_mem_value(code, in_ptr, (jj, ii), v, block_size):
							yield instr
						yield Mul(v, v, coeff_reg)
						yield Add(acc, acc, v)
				yield MemWImm(out_ptr+i*cols + j, acc)
Ejemplo n.º 24
0
def gen_gray_image_code(code, block_size, args):
    ''' generate flat gray image '''

    with scoped_alloc(code, 3) as (out_ptr_r, const_1, const_gray):
        # init pointer to output memory
        yield Imm(out_ptr_r, block_size[0]*block_size[1]) 
        yield Imm(const_1, 1) 
      
        # gen gray image
        yield Imm(const_gray, 128) 
        for i in xrange(block_size[0]):
            for j in xrange(block_size[1]):
                yield MemW(out_ptr_r, const_gray) 
                yield Add(out_ptr_r, out_ptr_r, const_1)
Ejemplo n.º 25
0
def gen_global_max(code, block_size, args):
	''' Calculate element-wise max over two buffers. '''
	rows, cols = block_size

	in_ptr_1 = args['in_ptr_1']
	in_ptr_2 = args['in_ptr_2']
	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols

	for i in xrange(rows):
		for j in xrange(cols):
			addr = i*cols + j
			with scoped_alloc(code, 2) as (v2_res, v1):
				yield MemRImm(v1, in_ptr_1 + addr)
				yield MemRImm(v2_res, in_ptr_2 + addr)
				yield Cmp(v1, v2_res)
				yield Mov(v2_res, v1, cond='GT')
				yield MemWImm(out_ptr + addr, v2_res)
Ejemplo n.º 26
0
	def pixel_op(code, mask_val, image_val, acc, args, block_size):
		''' Simple convolution implementation. '''
		with scoped_alloc(code, 2) as (v, mask_val_r):
			yield Imm(mask_val_r, mask_val)
			yield Mul(v, mask_val_r, image_val)
			yield Add(acc, acc, v)
Ejemplo n.º 27
0
	def codegen(code):
		with scoped_alloc(code, 3) as (a, b, c):
			yield Imm(b, 3)
			yield Mov(a, b)
			yield Add(c, b, a)
Ejemplo n.º 28
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (a, b):
			yield Imm(a, 1)
			yield Mov(code.out, a)
			yield Mov(b, code.east)
			yield Add(a, a, b)
Ejemplo n.º 29
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (a, b):
			yield Imm(a, 1)
			yield Imm(b, 2)
			yield Add(a, a, b)
Ejemplo n.º 30
0
	def codegen(code, block_size, args):
		with scoped_alloc(code, 2) as (a, b):
			yield MemRImm(code.out, 3)
			yield Mov(a, code.west)
			yield MemRImm(code.out, 3)
			yield Mov(b, code.east)