def test_full_integral_image_correctness():
	''' Test generated full integral image correctness,
	    note that this relies on the corectness of interpreter and reference.py '''

	block_size = (20, 20)
	size = tuple(x*3 for x in block_size)

	# generate random test image
	test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])]

	# reference implementation
	integral_ref = reference.gen_integral_image(test_image)
	sq_integral_ref = reference.gen_integral_squared_image(test_image)

	# pointer config
	buffer_size = block_size[0]*block_size[1]
	src_ptr = 0
	integral_ptr = buffer_size
	sq_integral_ptr = 2*buffer_size

	# set up interpreter for integral image calculation
	pe_dim = [s//b for s, b in zip(size, block_size)]
	def code_gen(code, block_size, args):
		return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size)
	code = Code()
	code.set_generator(optimiser_wrapper(code_gen), block_size)

	sim = Interpreter(code, test_image, block_size)
	sim.run()

	# get result of simulator with scaling, truncation turned off and float output
	integral_test = sim.gen_output_image(1, False, False, True)
	sq_integral_test = sim.gen_output_image(2, False, False, True)

	# comparison of reference with blip sim
	integral_err = compare_images(integral_ref, integral_test)
	sq_integral_err = compare_images(sq_integral_ref, sq_integral_test)

	err_eps = 0.001
	if not ((integral_err < err_eps) and (sq_integral_err < err_eps)):
		print 'integral comp:', integral_err
		print 'squared integral comp:', sq_integral_err
		
		print 'rendering instruction stream to file, can take a while'
		try:
			f = open('unoptimised_full_integral_image_trace.txt', 'w')
			def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else ''
			f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in code_gen(Code())))
			f.close()

			optim_gen = optimiser_wrapper(code_gen, block_size, {})
			f = open('bad_full_integral_image_trace.txt', 'w')
			def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else ''
			f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in optim_gen(Code())))
			f.close()
		except Exception, e:
			print 'could render instruction stream to file'
			print 'err: ' + str(e)
		
		assert False
Example #2
0
def main(block_size, codegen_args, image_filename, result_filename, codegen_implementation, analyser_classes):

	# image
	image = imageio.read(image_filename)
	if not image:
		print 'could not read image %s'%image_filename
		image = [[0 for i in xrange(128)] for j in xrange(128)]

	im_size = len(image[0]), len(image)
	pe_dim = [s//b for s,b in zip(im_size, block_size)]
	codegen_args['pe_dim'] = pe_dim

	# code
	code = Code()
	code.set_generator(codegen_implementation, block_size, codegen_args)
	
	# setup interpreter
	interpreter = AnalysisInterpreter(code, image, block_size)

	# setup analysers, create instances of analysis classes
	analysers = [x(interpreter) for x in analyser_classes]
	for a in analysers: interpreter.set_analysis(a)
	
	# run interpreter
	interpreter.run()
	
	# generate report for all analysers
	res = ''
	for a in analysers:
		res += '- '*40 + '\n'
		res += str(a) + '\n'
		try:
			res += a.report() + '\n'
		except Exception, e:
			res += 'could not print report\n'	
Example #3
0
def run_implementation(block_size, implementation, image_filename, filterbank_filename, res_filename_prefix):
	''' Execution wrapper '''
	from blip.simulator import interpreter
	from blip.support import imageio

	# first load the cascade
	filterbank = Filterbank.load(filterbank_filename)

	image = imageio.read(image_filename)
	if not image: raise Exception('image %s not found or not supported'%image_filename)

	im_size = len(image[0]), len(image)
	pe_dim = [s//b for s,b in zip(im_size, block_size)]

	args = {'filterbank':filterbank, 'pe_dim':pe_dim}

	# now execute the codegen
	code = Code()
	code.set_generator(implementation, block_size, args)

	sim = interpreter.Interpreter(code, image, block_size, 4)
	sim.run()

	result = sim.gen_output_image(1) # result is saved in first buffer

	imageio.write(res_filename_prefix + '.png', result, 1)
def gen_faces_detect_no_errors():
	''' just check if codegen generates no errors '''

	block_size = (24,24)
	size = tuple([x*2 for x in block_size])

	cascade_filename = '../data/haarcascade_frontalface_alt.xml'
	cascade = parse_haar.parse_haar_xml(cascade_filename)

	args = {'haar_classifier':cascade}
	code = Code()
	code.set_generator(gen_code.gen_detect_faces_opt, block_size, args)

	# extract some data from simulator
	sim = Interpreter(code, [[0 for x in xrange(size[0])] for y in xrange(size[1])], block_size)
	nr_reg = sim.procs[0][0].nr_reg
	mem_size = sim.procs[0][0].memory.size
	del sim

	# run through instructions and apply some checks
	cnt = 0
	current_imm = 0
	for x in code.gen(code):
		if x.opcode() == 'imm':
			current_im = x.value
		# heuristic test, not all errors are detected as memw(rx, ry) is also possible
		if (x.opcode() == 'memr' and str(x.src) == 'imm') \
		or (x.opcode() == 'memw' and str(x.dest) == 'imm'):
			if current_im >= mem_size:
				raise IllegalInstructionException('memx out of bounds, addr = %i'%int(current_im))
		cnt+=1

	print '# instructions: %i'%cnt
Example #5
0
    def run_test(images, th, alpha, block_size):
        image0 = images[0]
        im_size = len(image0)
        bwidth, bheight = block_size
        assert(im_size == bwidth * bheight) # only one pe

        code = Code()
        code.set_generator(gen_bbs, block_size, {'th':th, 'alpha':alpha})

        output = []
        sim = None
        for im in images:
            # interpreter expects 2D array
            im_tr = [[im[i*width + j] for j in xrange(bwidth)] for i in xrange(bheight)]
            if not sim:
                sim = Interpreter(code, im_tr, block_size)
            else:
                # restart code gen
                sim.reset()
                # set new image
                sim.set_src_image(im_tr)
            sim.run()
            im_out = sim.gen_output_image(1)
            # convert to 1D vector
            im_out_1D = []
            for row in im_out:
                for v in row:
                    im_out_1D.append(v)
            output.append(im_out_1D)
        return output
Example #6
0
def test_lid():
    block_size = (2, 2)
    def test_code(code, block_size, args):
        yield Imm(code.r(7), 0)
        yield Lid(code.r(0), code.r(7))
        yield Imm(code.r(6), 1)
        yield Lid(code.r(1), code.r(6))
        yield Imm(code.r(6), block_size[0])
        yield Mul(code.r(0), code.r(0), code.r(6))
        yield Add(code.r(0), code.r(1), code.r(0))
        yield MemW(code.r(0), code.r(0))

    code = Code()
    code.set_generator(test_code, block_size)
    print str(code)

    image = zeros(block_size[1], block_size[0])
    sim = Interpreter(code, image, block_size)
    sim.run_kernel()

    out_image = sim.gen_output_image(0, False)
    width, height = block_size
    for i in xrange(height):
        for j in xrange(width):
            assert out_image[i][j] == (i*width + j)
Example #7
0
def main(block_size, code_gen, args, image_filename, output_filename):
    # load image
    image = imageio.read(image_filename)

    #print image[0][0]

    if not image:
        print 'could not read image, continuing with empty 128x128 image'
        image = [[0 for i in xrange(128)] for j in xrange(128)]

    code = Code()
    code.set_generator(code_gen, block_size, args)

    interpreter = Interpreter(code, image, block_size)
    interpreter.run()

    doRGB = True
    out_image = interpreter.gen_output_image(1, True, True, False, doRGB)
    if doRGB:
        imageio.write(output_filename, out_image, 3)
    else:
	    imageio.write(output_filename, out_image, 1)
    
    '''f = open('curr_codegen.txt', 'w')
    f.write(str(code))'''

    '''print len(code)'''
    
    return interpreter
Example #8
0
def demo(im_filename, out_filename, no_optimalisations):
    from blip.code.trace_optimiser import Optimiser, ImmediatePass, PeepholePass, MemoryPass
    from blip.simulator import interpreter
    from blip.support import imageio
    from blip.simulator.opcodes import Imm, Mul, Add

    # settings
    block_size = (32, 32)
    out_ptr = block_size[0] * block_size[1]
    coeff = [[1, -2, 1]] * 3

    # convolution implementation with map_neighborhood_to_pixel skeleton
    def convolution_op(code, coeff_v, val, acc, args, block_size):
        """ Simple convolution implementation. """
        with scoped_alloc(code, 2) as (v, coeff_r):
            yield Imm(coeff_r, coeff_v)
            yield Mul(v, coeff_r, val)
            yield Add(acc, acc, v)

    def codegen(code, block_size, args):
        """ Map convolution to image. """
        return map_neighborhood_to_pixel(code, 0, out_ptr, coeff, convolution_op, args, block_size)

        # Wrap optimisers

    optimiser = Optimiser(50)
    optimiser.register_pass(ImmediatePass(optimiser))
    # optimiser.register_pass(PeepholePass(optimiser))
    optimiser.register_pass(MemoryPass(optimiser))

    def optim_wrapper(code, block_size, args):
        if no_optimalisations:
            print "optimalisations disabled"
            return codegen(code, block_size, args)
        else:
            return optimiser.run(code, codegen, block_size, args)

            # Render instruction trace

    f = open(out_filename + "_trace.txt", "w")

    def tag_str(instr):
        return ", ".join(instr.tag) if hasattr(instr, "tag") else ""

    f.write("\n".join(str(x).ljust(40) + " tags: " + tag_str(x) for x in optim_wrapper(Code(), block_size, {})))
    f.close()

    # Run simulation
    code = Code()

    code.set_generator(optim_wrapper, block_size, {})
    image = imageio.read(im_filename)
    sim = interpreter.Interpreter(code, image, block_size)
    sim.run()
    out = sim.gen_output_image(1)
    imageio.write(out_filename, out, 1)
	def run_test(image, position, shape, block_size):
		def code_gen(code, block_size, args):
			return gen_code.gen_fullintegral_sum(code, code.r(4), position, shape, ptr, block_size)
		code = Code()
		code.set_generator(optimiser_wrapper(code_gen), block_size)

		sim = Interpreter(code, image, block_size)
		sim.run()
		# extract value
		return sim.procs[0][0].get_reg_by_name('r4')
	def run_test(image, position, shape, block_size):
		code = Code()
		out_reg = code.alloc_reg()
		def code_gen(code, block_size, args): 
			return gen_code.gen_integral_sum(code, out_reg, position, shape, ptr, block_size)
		code.set_generator(optimiser_wrapper(code_gen), block_size)

		sim = Interpreter(code, image, block_size)
		sim.run()
		# extract value
		return sim.procs[0][0].get_reg_by_name(str(out_reg))
def compile_code(src_code, args, block_size, nr_reg = 8, no_sequencer=True):
	''' Compile a kernel and return a codegen function. '''
	comp = CompilerDriver(nr_reg, no_sequencer=True)
	main_object = comp.run(src_code)
	patched_object = Compiler.patch_arguments_before_run(main_object, [args[argname] for argname in main_object.arguments])
	def codegen_func(code, block_size, args):
		for x in patched_object.code:
			yield InstrAdapter(x, use_reg_wrapper=True)
	code = Code()
	code.set_generator(codegen_func, block_size, args)
	return code
Example #12
0
	def run_test(image, offset):
		code = Code()
		block_size = (16, 16)
		in_ptr = 0
		out_ptr = block_size[0]*block_size[1]
		args = {'offset' : offset}
		def codegen(code, block_size, args):
			return map_image_to_pixel(code, in_ptr, out_ptr, pixel_op, args, block_size)
		code.set_generator(codegen, block_size, args)
		sim = Interpreter(code, image, block_size)
		sim.run()
		return sim.gen_output_image(1)
Example #13
0
	def run_test(image, coeff):
		code = Code()
		block_size = (16, 16)
		in_ptr = 0
		out_ptr = block_size[0]*block_size[1]
		args = {}
		def codegen(code, block_size, args):
			return map_neighborhood_to_pixel(code, in_ptr, out_ptr, coeff, pixel_op, args, block_size)
		code.set_generator(codegen, block_size, args)
		sim = Interpreter(code, image, block_size)
		sim.run()
		return sim.gen_output_image(1)
Example #14
0
    def run_test(image, args, block_size):
        im_size = len(image[0]), len(image)
        bwidth, bheight = block_size
        assert(im_size == block_size) # only one pe

        code = Code()
        code.set_generator(codegen, block_size, args)

        sim = Interpreter(code, image, block_size)
        sim.run()
        output = sim.gen_output_image(1, False)
        return output
Example #15
0
	def run_test(image, position, shape, ptr, block_size):
		px, py = position
		x, y, w, h = shape
		xx = px + x
		yy = py + y
		points =  ((xx, yy), (xx+w-1, yy), (xx, yy+h-1), (xx+w-1, yy+h-1))

		def code_gen(code, block_size, args): return gen_code.gen_fullintegral_sum2_2(code, code.r(4), ptr, points, block_size)
		code = Code()
		code.set_generator(code_gen, block_size)

		sim = Interpreter(code, image, block_size)
		sim.run()
		# extract value
		return sim.procs[0][0].get_reg_by_name('r4')
Example #16
0
def run_codegen(args, block_size, codegen_implementation):
	''' Codegen to run codegen_implementation for different parameters  '''
	pe_dim = (2, 1)
	args = {'mask_size':args['mask_size'], 'pe_dim':pe_dim}

	code = Code()
	code.set_generator(codegen_implementation, block_size, args)

	# init analysers
	opcodeFreq = analysis.OpcodeFreq()
	communication = analysis.Communication()
	analysers = [opcodeFreq, communication]

	analysis.analyse_code(code, analysers)
	return analysers
Example #17
0
def test_sleep_wakeup():
    ''' Test sleep and wakeup opcodes. '''
    block_size = (2, 2)
    def test_code(code, block_size, args):
        yield Imm(code.r(0), 1)
        yield Sleep()
        yield Imm(code.r(0), 2)
        yield WakeUp()

    code = Code()
    code.set_generator(test_code, block_size)
    print str(code)

    image = zeros(block_size[1], block_size[0])
    sim = Interpreter(code, image, block_size)
    sim.run_kernel()
    # second imm should not be executed
    assert sim.procs[0][0].get_reg_by_name('r0') == 1
Example #18
0
def run_codegen_function(test_image, code_gen, block_size, args, buffer_sel = 1, **kwargs):
	image2buffer = kwargs['image2buffer'] if 'image2buffer' in kwargs else {}

	im_size = len(test_image[0]), len(test_image)
	pe_dim = [s//b for s,b in zip(im_size, block_size)]
	# fill this in for all functions
	args['pe_dim'] = pe_dim

	code = Code()
	code.set_generator(code_gen, block_size, args)

	sim = Interpreter(code, test_image, block_size)
	for buffer_nr, image in image2buffer.iteritems():
		sim.set_src_image(image, buffer_nr)

	sim.run()

	return sim.gen_output_image(buffer_sel, False, False, True), sim
def gen_integral_image_correctness():
	''' test if generated integral image is correct, 
	    note that this relies on the corectness of interpreter and reference.py '''

#		size = (120, 80)
#		block_size = (40, 40)
	size = (80, 80)
	block_size = size 

	# generate random test image
	test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])]

	# reference implementation
	integral_ref = reference.gen_integral_image(test_image)
	sq_integral_ref = reference.gen_integral_squared_image(test_image)
	
	# pointer config
	buffer_size = block_size[0]*block_size[1]
	src_ptr = 0
	integral_ptr = buffer_size
	sq_integral_ptr = 2*buffer_size

	# set up interpreter for integral image calculation
	def code_gen(code, block_size, args):
		return gen_code.gen_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, block_size)
	code = Code()
	code.set_generator(optimiser_wrapper(code_gen), block_size)

	sim = Interpreter(code, test_image, block_size)
	sim.run()

	# get result of simulator with scaling, truncation turned off and float output
	integral_test = sim.gen_output_image(1, False, False, True)
	sq_integral_test = sim.gen_output_image(2, False, False, True)

	# comparison of reference with blip sim
	integral_err = compare_images(integral_ref, integral_test)
	sq_integral_err = compare_images(sq_integral_ref, sq_integral_test)

	err_eps = 0.001
	if not ((integral_err < err_eps) and (sq_integral_err < err_eps)):
		print 'integral comp:', integral_err
		print 'squared integral comp:', sq_integral_err 
		assert False
Example #20
0
def run_codegen(cascade, block_size, codegen_implementation):
	''' Codegen to run violajones for a set of parameters

	This function runs the VJ codegen to analyse the code
	for a certain set of parameters
	'''

	pe_dim = (2, 1)
	args = {'haar_classifier':cascade, 'pe_dim':pe_dim}

	code = Code()
	code.set_generator(codegen_implementation, block_size, args)

	# init analysers
	opcodeFreq = analysis.OpcodeFreq()
	communication = analysis.Communication()
	analysers = [opcodeFreq, communication]

	analysis.analyse_code(code, analysers)
	return analysers
Example #21
0
def test_register_value_fetch():
    block_size = (rsize, csize) = 64,64
    image = zeros(rsize, csize)

    # test code
    def test_code(code, block_size, args):
         yield Imm(code.r(0), 3)

    code = Code()
    code.set_generator(test_code, block_size)
    print str(code)

    # PE
    pe = PE(None, image, (rsize, csize)) 

    # execute program
    for c in code.plain_instr():
        pe.step1(c)
        pe.step2(c)

    assert pe.get_reg_by_name('r0') == 3
	def run_test(image, cascade):
		block_size = (64, 64)

		print 'XXX histogram equalisation is not implemented yet, use violajones impl'
		print '    before executing simulator'
		image = reference.equalizeHist(image)

		args = {'haar_classifier':cascade}
		# now execute the codegen
		code = Code()
		code.set_generator(gen_code.gen_detect_faces_opt, block_size, args)
		#print '# instructions: %i'%(code.instr_size())

		sim = Interpreter(code, image, block_size, 4)
		sim.run()

		detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer

		# convert the number of rejections in the stages to detections
		detections = gen_code.convert_pixelmap_to_detections(detections_pixmap, cascade.size)
		return detections
Example #23
0
def test_neg():
    block_size = (rsize, csize) = 32,32

    # test program
    def test_code(code, block_size, args):
         yield Neg(code.r(0), code.r(0))

    code = Code()
    code.set_generator(test_code, block_size)
    print str(code)

    image = zeros(rsize, csize)
    pe = PE(None, image, (rsize, csize))
    # warning: this makes certain assumptions on internals of PE
    pe.regs[regname_to_id('r0')] = 42

    for c in code.plain_instr():
        pe.step1(c)
        pe.step2(c)

    assert pe.get_reg_by_name('r0') == -42.
Example #24
0
def test_sleep_wakeup2():
    ''' Test sleep and wakeup opcodes. '''
    block_size = (2, 2)
    def test_code_sleep(code, block_size, args):
        yield Sleep()

    code = Code()
    code.set_generator(test_code_sleep, block_size)

    image = zeros(block_size[1], block_size[0])
    sim = Interpreter(code, image, block_size)

    # check if attribute is correct
    assert sim.procs[0][0].is_powerdown() == False
    sim.run_kernel()
    assert sim.procs[0][0].is_powerdown() == True

    # check if attribute is correct after wakeup
    def test_code_sleep_wakeup(code, block_size, args):
        yield Sleep()
        yield WakeUp()
    code2 = Code()
    code2.set_generator(test_code_sleep_wakeup, block_size)

    sim2 = Interpreter(code2, image, block_size)
    assert sim2.procs[0][0].is_powerdown() == False
    sim2.run_kernel()
    assert sim2.procs[0][0].is_powerdown() == False 
Example #25
0
def test_full_integral_image_correctness():
	''' Test generated full integral image correctness,
	    note that this relies on the correctness of interpreter and reference.py '''

	block_size = (20, 20)
	size = tuple(x*3 for x in block_size)

	# generate random test image
	test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])]

	# reference implementation
	integral_ref = reference.gen_integral_image(test_image)
	sq_integral_ref = reference.gen_integral_squared_image(test_image)

	# pointer config
	buffer_size = block_size[0]*block_size[1]
	src_ptr = 0
	integral_ptr = buffer_size
	sq_integral_ptr = 2*buffer_size

	# set up interpreter for integral image calculation
	pe_dim = [s//b for s, b in zip(size, block_size)]
	code = Code()
	def code_gen(code, block_size, args): 
		return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size)
	code.set_generator(code_gen, block_size)

	sim = Interpreter(code, test_image, block_size)
	sim.run()

	# get result of simulator with scaling, truncation turned off and float output
	integral_test = sim.gen_output_image(1, False, False, True)
	sq_integral_test = sim.gen_output_image(2, False, False, True)

	# comparison of reference with blip sim
	integral_err = compare_images(integral_ref, integral_test)
	sq_integral_err = compare_images(sq_integral_ref, sq_integral_test)

	err_eps = 0.001
	assert (integral_err < err_eps) and (sq_integral_err < err_eps)
Example #26
0
def test_communication_profiler():
    code = Code()
    comm = Communication()
    i1 = Mov(code.r(4), code.r(3))
    tag_instr(i1, "communication overhead")
    i2 = Imm(code.r(4), 3)
    i3 = Mov(code.r(4), code.r(3))
    tag_instr(i3, "communication overhead")
    i4 = Mul(code.r(2), code.r(4), code.r(3))

    for i in [i1, i2, i3, i4]:
        comm.process(i)
    assert comm.nr_instr == 4
    assert comm.overhead == 2
Example #27
0
def test_codesection_profiler():
    code = Code()
    csp = CodeSectionProfiler()

    i1 = Mov(code.r(4), code.r(3))
    tag_instr(i1, "blah")
    i2 = Mov(code.r(4), code.r(3))
    tag_instr(i2, "section:test")
    i3 = Mov(code.r(4), code.r(3))
    tag_instr(i3, "section:test")
    i4 = Mov(code.r(4), code.r(3))
    tag_instr(i4, "section:test2")

    for i in [i1, i2, i3, i4]:
        csp.process(i)

    gt = {"test": 2, "test2": 1}
    assert compare_cnt_dict(gt, csp.section_cnt)
	def run_test(codegen_function, image, cascade, block_size):
		print 'running %s'%codegen_function.__name__
		print 'XXX histogram equalisation is not implemented yet, use violajones impl'
		print '    before executing simulator'
		image = reference.equalizeHist(image)

		width, height = block_size
		pe_dim = (len(image[0])//width, len(image)//height)

		args = {'haar_classifier': cascade, 'pe_dim':pe_dim}
		# now execute the codegen
		code = Code()
		code.set_generator(optimiser_wrapper(codegen_function), block_size, args)

		sim = Interpreter(code, image, block_size, 4)
		sim.run()

		detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer

		# convert the number of rejections in the stages to detections
		detections = gen_code.convert_pixelmap_to_detections(detections_pixmap, cascade.size)
		return detections
	def run_test(position, integral_test, sq_integral_test, haar_size, block_size):
		integral_ptr = 0
		sq_integral_ptr = block_size[0]*block_size[1]

		code = Code()
		out_reg = code.alloc_reg()
		def code_gen(code, block_size, args):
			return gen_code.gen_calc_variance(code, out_reg, position, integral_ptr, sq_integral_ptr, haar_size, block_size)
		code.set_generator(optimiser_wrapper(code_gen), block_size)

		sim = Interpreter(code, integral_test, block_size)
		# hack: in order to avoid calculating integral images, inject random values into the sq_integral buffer
		# this is easy since their is only a single PE
		for i, row in enumerate(sq_integral_test):
			for j, v in enumerate(row):
				sim.procs[0][0].memory.set(sq_integral_ptr + len(row)*i+j, v)

		sim.run()

		pe = sim.procs[0][0]
		# extract value
		return (1./(pe.get_reg_by_name(str(out_reg)))), pe
Example #30
0
def run_detector(block_size, implementation, image_filename, cascade_filename, res_filename_prefix):
	from blip.simulator import interpreter
	from blip.support import imageio
	import violajones.reference

	# first load the cascade
	cascade = violajones.parse_haar.parse_haar_xml(cascade_filename)
	print cascade

	image = imageio.read(image_filename)
	if not image: raise Exception('image %s not found or not supported'%image_filename)

	print 'XXX histogram equalisation is not implemented yet, use violajones impl'
	print '    before executing simulator'
	image = violajones.reference.equalizeHist(image)
	im_size = len(image[0]), len(image)

	pe_dim = [s//b for s,b in zip(im_size, block_size)]

	args = {'haar_classifier':cascade, 'pe_dim':pe_dim}
	# now execute the codegen
	code = Code()
	code.set_generator(implementation, block_size, args)
	#print '# instructions: %i'%(code.instr_size())

	sim = interpreter.Interpreter(code, image, block_size, 4)
	sim.run()

	detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer

	# convert the number of rejections in the stages to detections
	detections = convert_pixelmap_to_detections(detections_pixmap, cascade.size)
	print 'detections:', detections
	detections_im = visualisation.draw_faces(image, detections)

	imageio.write(res_filename_prefix + '_pixmap.png', detections_pixmap, 1)
	imageio.write(res_filename_prefix + '.png', detections_im, 3)