def test_full_integral_image_correctness(): ''' Test generated full integral image correctness, note that this relies on the corectness of interpreter and reference.py ''' block_size = (20, 20) size = tuple(x*3 for x in block_size) # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation pe_dim = [s//b for s, b in zip(size, block_size)] def code_gen(code, block_size, args): return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size) code = Code() code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 if not ((integral_err < err_eps) and (sq_integral_err < err_eps)): print 'integral comp:', integral_err print 'squared integral comp:', sq_integral_err print 'rendering instruction stream to file, can take a while' try: f = open('unoptimised_full_integral_image_trace.txt', 'w') def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else '' f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in code_gen(Code()))) f.close() optim_gen = optimiser_wrapper(code_gen, block_size, {}) f = open('bad_full_integral_image_trace.txt', 'w') def tag_str(instr): return ', '.join(instr.tag) if hasattr(instr, 'tag') else '' f.write('\n'.join(str(x).ljust(40) + ' tags: ' + tag_str(x) for x in optim_gen(Code()))) f.close() except Exception, e: print 'could render instruction stream to file' print 'err: ' + str(e) assert False
def main(block_size, codegen_args, image_filename, result_filename, codegen_implementation, analyser_classes): # image image = imageio.read(image_filename) if not image: print 'could not read image %s'%image_filename image = [[0 for i in xrange(128)] for j in xrange(128)] im_size = len(image[0]), len(image) pe_dim = [s//b for s,b in zip(im_size, block_size)] codegen_args['pe_dim'] = pe_dim # code code = Code() code.set_generator(codegen_implementation, block_size, codegen_args) # setup interpreter interpreter = AnalysisInterpreter(code, image, block_size) # setup analysers, create instances of analysis classes analysers = [x(interpreter) for x in analyser_classes] for a in analysers: interpreter.set_analysis(a) # run interpreter interpreter.run() # generate report for all analysers res = '' for a in analysers: res += '- '*40 + '\n' res += str(a) + '\n' try: res += a.report() + '\n' except Exception, e: res += 'could not print report\n'
def run_implementation(block_size, implementation, image_filename, filterbank_filename, res_filename_prefix): ''' Execution wrapper ''' from blip.simulator import interpreter from blip.support import imageio # first load the cascade filterbank = Filterbank.load(filterbank_filename) image = imageio.read(image_filename) if not image: raise Exception('image %s not found or not supported'%image_filename) im_size = len(image[0]), len(image) pe_dim = [s//b for s,b in zip(im_size, block_size)] args = {'filterbank':filterbank, 'pe_dim':pe_dim} # now execute the codegen code = Code() code.set_generator(implementation, block_size, args) sim = interpreter.Interpreter(code, image, block_size, 4) sim.run() result = sim.gen_output_image(1) # result is saved in first buffer imageio.write(res_filename_prefix + '.png', result, 1)
def gen_faces_detect_no_errors(): ''' just check if codegen generates no errors ''' block_size = (24,24) size = tuple([x*2 for x in block_size]) cascade_filename = '../data/haarcascade_frontalface_alt.xml' cascade = parse_haar.parse_haar_xml(cascade_filename) args = {'haar_classifier':cascade} code = Code() code.set_generator(gen_code.gen_detect_faces_opt, block_size, args) # extract some data from simulator sim = Interpreter(code, [[0 for x in xrange(size[0])] for y in xrange(size[1])], block_size) nr_reg = sim.procs[0][0].nr_reg mem_size = sim.procs[0][0].memory.size del sim # run through instructions and apply some checks cnt = 0 current_imm = 0 for x in code.gen(code): if x.opcode() == 'imm': current_im = x.value # heuristic test, not all errors are detected as memw(rx, ry) is also possible if (x.opcode() == 'memr' and str(x.src) == 'imm') \ or (x.opcode() == 'memw' and str(x.dest) == 'imm'): if current_im >= mem_size: raise IllegalInstructionException('memx out of bounds, addr = %i'%int(current_im)) cnt+=1 print '# instructions: %i'%cnt
def run_test(images, th, alpha, block_size): image0 = images[0] im_size = len(image0) bwidth, bheight = block_size assert(im_size == bwidth * bheight) # only one pe code = Code() code.set_generator(gen_bbs, block_size, {'th':th, 'alpha':alpha}) output = [] sim = None for im in images: # interpreter expects 2D array im_tr = [[im[i*width + j] for j in xrange(bwidth)] for i in xrange(bheight)] if not sim: sim = Interpreter(code, im_tr, block_size) else: # restart code gen sim.reset() # set new image sim.set_src_image(im_tr) sim.run() im_out = sim.gen_output_image(1) # convert to 1D vector im_out_1D = [] for row in im_out: for v in row: im_out_1D.append(v) output.append(im_out_1D) return output
def test_lid(): block_size = (2, 2) def test_code(code, block_size, args): yield Imm(code.r(7), 0) yield Lid(code.r(0), code.r(7)) yield Imm(code.r(6), 1) yield Lid(code.r(1), code.r(6)) yield Imm(code.r(6), block_size[0]) yield Mul(code.r(0), code.r(0), code.r(6)) yield Add(code.r(0), code.r(1), code.r(0)) yield MemW(code.r(0), code.r(0)) code = Code() code.set_generator(test_code, block_size) print str(code) image = zeros(block_size[1], block_size[0]) sim = Interpreter(code, image, block_size) sim.run_kernel() out_image = sim.gen_output_image(0, False) width, height = block_size for i in xrange(height): for j in xrange(width): assert out_image[i][j] == (i*width + j)
def main(block_size, code_gen, args, image_filename, output_filename): # load image image = imageio.read(image_filename) #print image[0][0] if not image: print 'could not read image, continuing with empty 128x128 image' image = [[0 for i in xrange(128)] for j in xrange(128)] code = Code() code.set_generator(code_gen, block_size, args) interpreter = Interpreter(code, image, block_size) interpreter.run() doRGB = True out_image = interpreter.gen_output_image(1, True, True, False, doRGB) if doRGB: imageio.write(output_filename, out_image, 3) else: imageio.write(output_filename, out_image, 1) '''f = open('curr_codegen.txt', 'w') f.write(str(code))''' '''print len(code)''' return interpreter
def demo(im_filename, out_filename, no_optimalisations): from blip.code.trace_optimiser import Optimiser, ImmediatePass, PeepholePass, MemoryPass from blip.simulator import interpreter from blip.support import imageio from blip.simulator.opcodes import Imm, Mul, Add # settings block_size = (32, 32) out_ptr = block_size[0] * block_size[1] coeff = [[1, -2, 1]] * 3 # convolution implementation with map_neighborhood_to_pixel skeleton def convolution_op(code, coeff_v, val, acc, args, block_size): """ Simple convolution implementation. """ with scoped_alloc(code, 2) as (v, coeff_r): yield Imm(coeff_r, coeff_v) yield Mul(v, coeff_r, val) yield Add(acc, acc, v) def codegen(code, block_size, args): """ Map convolution to image. """ return map_neighborhood_to_pixel(code, 0, out_ptr, coeff, convolution_op, args, block_size) # Wrap optimisers optimiser = Optimiser(50) optimiser.register_pass(ImmediatePass(optimiser)) # optimiser.register_pass(PeepholePass(optimiser)) optimiser.register_pass(MemoryPass(optimiser)) def optim_wrapper(code, block_size, args): if no_optimalisations: print "optimalisations disabled" return codegen(code, block_size, args) else: return optimiser.run(code, codegen, block_size, args) # Render instruction trace f = open(out_filename + "_trace.txt", "w") def tag_str(instr): return ", ".join(instr.tag) if hasattr(instr, "tag") else "" f.write("\n".join(str(x).ljust(40) + " tags: " + tag_str(x) for x in optim_wrapper(Code(), block_size, {}))) f.close() # Run simulation code = Code() code.set_generator(optim_wrapper, block_size, {}) image = imageio.read(im_filename) sim = interpreter.Interpreter(code, image, block_size) sim.run() out = sim.gen_output_image(1) imageio.write(out_filename, out, 1)
def run_test(image, position, shape, block_size): def code_gen(code, block_size, args): return gen_code.gen_fullintegral_sum(code, code.r(4), position, shape, ptr, block_size) code = Code() code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, image, block_size) sim.run() # extract value return sim.procs[0][0].get_reg_by_name('r4')
def run_test(image, position, shape, block_size): code = Code() out_reg = code.alloc_reg() def code_gen(code, block_size, args): return gen_code.gen_integral_sum(code, out_reg, position, shape, ptr, block_size) code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, image, block_size) sim.run() # extract value return sim.procs[0][0].get_reg_by_name(str(out_reg))
def compile_code(src_code, args, block_size, nr_reg = 8, no_sequencer=True): ''' Compile a kernel and return a codegen function. ''' comp = CompilerDriver(nr_reg, no_sequencer=True) main_object = comp.run(src_code) patched_object = Compiler.patch_arguments_before_run(main_object, [args[argname] for argname in main_object.arguments]) def codegen_func(code, block_size, args): for x in patched_object.code: yield InstrAdapter(x, use_reg_wrapper=True) code = Code() code.set_generator(codegen_func, block_size, args) return code
def run_test(image, offset): code = Code() block_size = (16, 16) in_ptr = 0 out_ptr = block_size[0]*block_size[1] args = {'offset' : offset} def codegen(code, block_size, args): return map_image_to_pixel(code, in_ptr, out_ptr, pixel_op, args, block_size) code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() return sim.gen_output_image(1)
def run_test(image, coeff): code = Code() block_size = (16, 16) in_ptr = 0 out_ptr = block_size[0]*block_size[1] args = {} def codegen(code, block_size, args): return map_neighborhood_to_pixel(code, in_ptr, out_ptr, coeff, pixel_op, args, block_size) code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() return sim.gen_output_image(1)
def run_test(image, args, block_size): im_size = len(image[0]), len(image) bwidth, bheight = block_size assert(im_size == block_size) # only one pe code = Code() code.set_generator(codegen, block_size, args) sim = Interpreter(code, image, block_size) sim.run() output = sim.gen_output_image(1, False) return output
def run_test(image, position, shape, ptr, block_size): px, py = position x, y, w, h = shape xx = px + x yy = py + y points = ((xx, yy), (xx+w-1, yy), (xx, yy+h-1), (xx+w-1, yy+h-1)) def code_gen(code, block_size, args): return gen_code.gen_fullintegral_sum2_2(code, code.r(4), ptr, points, block_size) code = Code() code.set_generator(code_gen, block_size) sim = Interpreter(code, image, block_size) sim.run() # extract value return sim.procs[0][0].get_reg_by_name('r4')
def run_codegen(args, block_size, codegen_implementation): ''' Codegen to run codegen_implementation for different parameters ''' pe_dim = (2, 1) args = {'mask_size':args['mask_size'], 'pe_dim':pe_dim} code = Code() code.set_generator(codegen_implementation, block_size, args) # init analysers opcodeFreq = analysis.OpcodeFreq() communication = analysis.Communication() analysers = [opcodeFreq, communication] analysis.analyse_code(code, analysers) return analysers
def test_sleep_wakeup(): ''' Test sleep and wakeup opcodes. ''' block_size = (2, 2) def test_code(code, block_size, args): yield Imm(code.r(0), 1) yield Sleep() yield Imm(code.r(0), 2) yield WakeUp() code = Code() code.set_generator(test_code, block_size) print str(code) image = zeros(block_size[1], block_size[0]) sim = Interpreter(code, image, block_size) sim.run_kernel() # second imm should not be executed assert sim.procs[0][0].get_reg_by_name('r0') == 1
def run_codegen_function(test_image, code_gen, block_size, args, buffer_sel = 1, **kwargs): image2buffer = kwargs['image2buffer'] if 'image2buffer' in kwargs else {} im_size = len(test_image[0]), len(test_image) pe_dim = [s//b for s,b in zip(im_size, block_size)] # fill this in for all functions args['pe_dim'] = pe_dim code = Code() code.set_generator(code_gen, block_size, args) sim = Interpreter(code, test_image, block_size) for buffer_nr, image in image2buffer.iteritems(): sim.set_src_image(image, buffer_nr) sim.run() return sim.gen_output_image(buffer_sel, False, False, True), sim
def gen_integral_image_correctness(): ''' test if generated integral image is correct, note that this relies on the corectness of interpreter and reference.py ''' # size = (120, 80) # block_size = (40, 40) size = (80, 80) block_size = size # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation def code_gen(code, block_size, args): return gen_code.gen_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, block_size) code = Code() code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 if not ((integral_err < err_eps) and (sq_integral_err < err_eps)): print 'integral comp:', integral_err print 'squared integral comp:', sq_integral_err assert False
def run_codegen(cascade, block_size, codegen_implementation): ''' Codegen to run violajones for a set of parameters This function runs the VJ codegen to analyse the code for a certain set of parameters ''' pe_dim = (2, 1) args = {'haar_classifier':cascade, 'pe_dim':pe_dim} code = Code() code.set_generator(codegen_implementation, block_size, args) # init analysers opcodeFreq = analysis.OpcodeFreq() communication = analysis.Communication() analysers = [opcodeFreq, communication] analysis.analyse_code(code, analysers) return analysers
def test_register_value_fetch(): block_size = (rsize, csize) = 64,64 image = zeros(rsize, csize) # test code def test_code(code, block_size, args): yield Imm(code.r(0), 3) code = Code() code.set_generator(test_code, block_size) print str(code) # PE pe = PE(None, image, (rsize, csize)) # execute program for c in code.plain_instr(): pe.step1(c) pe.step2(c) assert pe.get_reg_by_name('r0') == 3
def run_test(image, cascade): block_size = (64, 64) print 'XXX histogram equalisation is not implemented yet, use violajones impl' print ' before executing simulator' image = reference.equalizeHist(image) args = {'haar_classifier':cascade} # now execute the codegen code = Code() code.set_generator(gen_code.gen_detect_faces_opt, block_size, args) #print '# instructions: %i'%(code.instr_size()) sim = Interpreter(code, image, block_size, 4) sim.run() detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer # convert the number of rejections in the stages to detections detections = gen_code.convert_pixelmap_to_detections(detections_pixmap, cascade.size) return detections
def test_neg(): block_size = (rsize, csize) = 32,32 # test program def test_code(code, block_size, args): yield Neg(code.r(0), code.r(0)) code = Code() code.set_generator(test_code, block_size) print str(code) image = zeros(rsize, csize) pe = PE(None, image, (rsize, csize)) # warning: this makes certain assumptions on internals of PE pe.regs[regname_to_id('r0')] = 42 for c in code.plain_instr(): pe.step1(c) pe.step2(c) assert pe.get_reg_by_name('r0') == -42.
def test_sleep_wakeup2(): ''' Test sleep and wakeup opcodes. ''' block_size = (2, 2) def test_code_sleep(code, block_size, args): yield Sleep() code = Code() code.set_generator(test_code_sleep, block_size) image = zeros(block_size[1], block_size[0]) sim = Interpreter(code, image, block_size) # check if attribute is correct assert sim.procs[0][0].is_powerdown() == False sim.run_kernel() assert sim.procs[0][0].is_powerdown() == True # check if attribute is correct after wakeup def test_code_sleep_wakeup(code, block_size, args): yield Sleep() yield WakeUp() code2 = Code() code2.set_generator(test_code_sleep_wakeup, block_size) sim2 = Interpreter(code2, image, block_size) assert sim2.procs[0][0].is_powerdown() == False sim2.run_kernel() assert sim2.procs[0][0].is_powerdown() == False
def test_full_integral_image_correctness(): ''' Test generated full integral image correctness, note that this relies on the correctness of interpreter and reference.py ''' block_size = (20, 20) size = tuple(x*3 for x in block_size) # generate random test image test_image = [[float(random.randint(0, 255)) for i in xrange(size[0])] for j in xrange(size[1])] # reference implementation integral_ref = reference.gen_integral_image(test_image) sq_integral_ref = reference.gen_integral_squared_image(test_image) # pointer config buffer_size = block_size[0]*block_size[1] src_ptr = 0 integral_ptr = buffer_size sq_integral_ptr = 2*buffer_size # set up interpreter for integral image calculation pe_dim = [s//b for s, b in zip(size, block_size)] code = Code() def code_gen(code, block_size, args): return gen_code.gen_full_integral_image(code, src_ptr, integral_ptr, sq_integral_ptr, pe_dim, block_size) code.set_generator(code_gen, block_size) sim = Interpreter(code, test_image, block_size) sim.run() # get result of simulator with scaling, truncation turned off and float output integral_test = sim.gen_output_image(1, False, False, True) sq_integral_test = sim.gen_output_image(2, False, False, True) # comparison of reference with blip sim integral_err = compare_images(integral_ref, integral_test) sq_integral_err = compare_images(sq_integral_ref, sq_integral_test) err_eps = 0.001 assert (integral_err < err_eps) and (sq_integral_err < err_eps)
def test_communication_profiler(): code = Code() comm = Communication() i1 = Mov(code.r(4), code.r(3)) tag_instr(i1, "communication overhead") i2 = Imm(code.r(4), 3) i3 = Mov(code.r(4), code.r(3)) tag_instr(i3, "communication overhead") i4 = Mul(code.r(2), code.r(4), code.r(3)) for i in [i1, i2, i3, i4]: comm.process(i) assert comm.nr_instr == 4 assert comm.overhead == 2
def test_codesection_profiler(): code = Code() csp = CodeSectionProfiler() i1 = Mov(code.r(4), code.r(3)) tag_instr(i1, "blah") i2 = Mov(code.r(4), code.r(3)) tag_instr(i2, "section:test") i3 = Mov(code.r(4), code.r(3)) tag_instr(i3, "section:test") i4 = Mov(code.r(4), code.r(3)) tag_instr(i4, "section:test2") for i in [i1, i2, i3, i4]: csp.process(i) gt = {"test": 2, "test2": 1} assert compare_cnt_dict(gt, csp.section_cnt)
def run_test(codegen_function, image, cascade, block_size): print 'running %s'%codegen_function.__name__ print 'XXX histogram equalisation is not implemented yet, use violajones impl' print ' before executing simulator' image = reference.equalizeHist(image) width, height = block_size pe_dim = (len(image[0])//width, len(image)//height) args = {'haar_classifier': cascade, 'pe_dim':pe_dim} # now execute the codegen code = Code() code.set_generator(optimiser_wrapper(codegen_function), block_size, args) sim = Interpreter(code, image, block_size, 4) sim.run() detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer # convert the number of rejections in the stages to detections detections = gen_code.convert_pixelmap_to_detections(detections_pixmap, cascade.size) return detections
def run_test(position, integral_test, sq_integral_test, haar_size, block_size): integral_ptr = 0 sq_integral_ptr = block_size[0]*block_size[1] code = Code() out_reg = code.alloc_reg() def code_gen(code, block_size, args): return gen_code.gen_calc_variance(code, out_reg, position, integral_ptr, sq_integral_ptr, haar_size, block_size) code.set_generator(optimiser_wrapper(code_gen), block_size) sim = Interpreter(code, integral_test, block_size) # hack: in order to avoid calculating integral images, inject random values into the sq_integral buffer # this is easy since their is only a single PE for i, row in enumerate(sq_integral_test): for j, v in enumerate(row): sim.procs[0][0].memory.set(sq_integral_ptr + len(row)*i+j, v) sim.run() pe = sim.procs[0][0] # extract value return (1./(pe.get_reg_by_name(str(out_reg)))), pe
def run_detector(block_size, implementation, image_filename, cascade_filename, res_filename_prefix): from blip.simulator import interpreter from blip.support import imageio import violajones.reference # first load the cascade cascade = violajones.parse_haar.parse_haar_xml(cascade_filename) print cascade image = imageio.read(image_filename) if not image: raise Exception('image %s not found or not supported'%image_filename) print 'XXX histogram equalisation is not implemented yet, use violajones impl' print ' before executing simulator' image = violajones.reference.equalizeHist(image) im_size = len(image[0]), len(image) pe_dim = [s//b for s,b in zip(im_size, block_size)] args = {'haar_classifier':cascade, 'pe_dim':pe_dim} # now execute the codegen code = Code() code.set_generator(implementation, block_size, args) #print '# instructions: %i'%(code.instr_size()) sim = interpreter.Interpreter(code, image, block_size, 4) sim.run() detections_pixmap = sim.gen_output_image(1) # result is saved in first buffer # convert the number of rejections in the stages to detections detections = convert_pixelmap_to_detections(detections_pixmap, cascade.size) print 'detections:', detections detections_im = visualisation.draw_faces(image, detections) imageio.write(res_filename_prefix + '_pixmap.png', detections_pixmap, 1) imageio.write(res_filename_prefix + '.png', detections_im, 3)