Beispiel #1
0
def gen_calc_planarity_inlined(code, block_size, args):
	''' Optimised version by manually inlining all code. '''
	filterbank = args['filterbank']
	f = filterbank.filters[0]
	rows, cols = block_size
	frows, fcols = f.size()
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0
	buffer_ptr = rows*cols*2
	assert buffer_ptr != in_ptr
	assert buffer_ptr != out_ptr

	for filter_nr, f in enumerate(filterbank.filters):
		# convolution + abs
		for i in xrange(rows):
			for j in xrange(cols):
				with scoped_alloc(code, 1) as acc:
					# convolution
					yield Xor(acc, acc, acc)
					for x, y, coeff in f.coefficients:
						ii = i + y - hfrow
						jj = j + x - hfcol
						with scoped_alloc(code, 2) as (coeff_reg, v):
							yield Imm(coeff_reg, coeff)
							for instr in load_mem_value(code, in_ptr, (jj, ii), v, block_size):
								yield instr
							yield Mul(v, v, coeff_reg)
							yield Add(acc, acc, v)

					# take max
					with scoped_alloc(code, 1) as const0:
						yield Imm(const0, 0)
						yield Cmp(acc, const0)
					yield Neg(acc, acc, cond='LT')
					yield MemWImm(buffer_ptr+i*cols + j, acc)
		# gather
		for i in xrange(rows):
			for j in xrange(cols):
				with scoped_alloc(code, 1) as max_v:
					# local max
					yield Imm(max_v, -float('inf'))
					for ii in xrange(frows):
						for jj in xrange(fcols):
							if not f.mask[ii][jj]: continue # skip if not enabled
							iii = i + ii - hfrow
							jjj = j + jj - hfcol
							with scoped_alloc(code, 1) as v:
								for instr in load_mem_value(code, buffer_ptr, (jjj, iii), v, block_size):
									yield instr
								yield Cmp(v, max_v)
								yield Mov(max_v, v, cond='GT')
					# global max
					if filter_nr != 0:
						with scoped_alloc(code, 1) as old_v:
							yield MemRImm(old_v, out_ptr+i*cols+j)
							yield Cmp(old_v, max_v)
							yield Mov(max_v, old_v, cond='GT')
					yield MemWImm(out_ptr+i*cols + j, max_v)
Beispiel #2
0
def gen_gather_local_max(code, block_size, args):
	''' Gather local maximum from mask code generation. '''
	f = args['filter']
	rows, cols = block_size
	frows, fcols = f.size()
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0

	for i in xrange(rows):
		for j in xrange(cols):
			with scoped_alloc(code, 1) as max_v:
				yield Imm(max_v, -float('inf'))
				for ii in xrange(frows):
					for jj in xrange(fcols):
						if not f.mask[ii][jj]: continue # skip if not enabled
						iii = i + ii - hfrow
						jjj = j + jj - hfcol
						with scoped_alloc(code, 1) as v:
							for instr in load_mem_value(code, in_ptr, (jjj, iii), v, block_size):
								yield instr
							yield Cmp(v, max_v)
							yield Mov(max_v, v, cond='GT')
				yield MemWImm(out_ptr+i*cols + j, max_v)
Beispiel #3
0
def gen_apply_sparse_filter(code, block_size, args):
	''' Apply sparse filter code generation. '''
	f = args['filter']
	rows, cols = block_size
	hfrow, hfcol = [x//2 for x in f.size()]

	out_ptr = args['out_ptr'] if 'out_ptr' in args else rows*cols
	in_ptr = args['in_ptr'] if 'in_ptr' in args else 0

	for i in xrange(rows):
		for j in xrange(cols):
			with scoped_alloc(code, 1) as acc:
				yield Xor(acc, acc, acc)
				for x, y, coeff in f.coefficients:
					ii = i + y - hfrow
					jj = j + x - hfcol
					with scoped_alloc(code, 2) as (coeff_reg, v):
						yield Imm(coeff_reg, coeff)
						for instr in load_mem_value(code, in_ptr, (jj, ii), v, block_size):
							yield instr
						yield Mul(v, v, coeff_reg)
						yield Add(acc, acc, v)
				yield MemWImm(out_ptr+i*cols + j, acc)