def generate_embedded_testbench(self, tc_list, io_map, input_signals, output_signals, time_step, test_fname="test.input"): """ Generate testbench with embedded input and output data """ self_component = self.implementation.get_component_object() self_instance = self_component(io_map = io_map, tag = "tested_entity") test_statement = Statement() for index, (input_values, output_values) in enumerate(tc_list): test_statement.add( self.implement_test_case(io_map, input_values, output_signals, output_values, time_step, index=index) ) reset_statement = self.get_reset_statement(io_map, time_step) testbench = CodeEntity("testbench") test_process = Process( reset_statement, test_statement, # end of test Assert( Constant(0, precision = ML_Bool), " \"end of test, no error encountered \"", severity = Assert.Warning ), # infinite end loop WhileLoop( Constant(1, precision=ML_Bool), Statement( Wait(time_step * (self.stage_num + 2)), ) ) ) testbench_scheme = Statement( self_instance, test_process ) if self.pipelined: half_time_step = time_step / 2 assert (half_time_step * 2) == time_step # adding clock process for pipelined bench clk_process = Process( Statement( ReferenceAssign( io_map["clk"], Constant(1, precision = ML_StdLogic) ), Wait(half_time_step), ReferenceAssign( io_map["clk"], Constant(0, precision = ML_StdLogic) ), Wait(half_time_step), ) ) testbench_scheme.push(clk_process) testbench.add_process(testbench_scheme) return [testbench]
def test_ref_assign(self): """ test behavior of StaticVectorizer on predicated ReferenceAssign """ va = Variable("a") vb = Variable("b") vc = Variable("c") scheme = Statement( ReferenceAssign(va, Constant(3)), ConditionBlock( (va > vb).modify_attributes(likely=True), Statement(ReferenceAssign(vb, va), ReferenceAssign(va, Constant(11)), Return(va)), ), ReferenceAssign(va, Constant(7)), Return(vb)) vectorized_path = StaticVectorizer().extract_vectorizable_path( scheme, fallback_policy) linearized_most_likely_path = instanciate_variable( vectorized_path.linearized_optree, vectorized_path.variable_mapping) test_result = (isinstance(linearized_most_likely_path, Constant) and linearized_most_likely_path.get_value() == 11) if not test_result: print("test UT_StaticVectorizer failure") print("scheme: {}".format(scheme.get_str())) print("linearized_most_likely_path: {}".format( linearized_most_likely_path)) self.assertTrue(test_result)
def expand_sub_ndrange(var_range_list, kernel): if len(var_range_list) == 0: pre_expanded_kernel = expand_kernel_expr(kernel) expanded_kernel, statement_list = extract_placeholder( pre_expanded_kernel) expanded_statement = Statement(*tuple(statement_list)) print("expand_ndrange: ", expanded_kernel, statement_list) if not expanded_kernel is None: # append expanded_kernel at the Statement's end once # every PlaceHolder's dependency has been resolved expanded_statement.add(expanded_kernel) return expanded_statement else: var_range = var_range_list.pop(0) scheme = Loop( # init statement ReferenceAssign(var_range.var_index, var_range.first_index), # exit condition var_range.var_index <= var_range.last_index, # loop body Statement( expand_sub_ndrange(var_range_list, kernel), # loop iterator increment ReferenceAssign(var_range.var_index, var_range.var_index + var_range.index_step)), ) return scheme
def generate_tensor_check_loop(self, tensor_descriptors, input_tables, output_tables): # unpack tensor descriptors tuple (input_tensor_descriptor_list, output_tensor_descriptor_list) = tensor_descriptors # internal array iterator index vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local) printf_error_detail_function = self.get_printf_error_detail_fct( output_tensor_descriptor_list[0]) NUM_INPUT_ARRAY = len(input_tables) # generate the expected table for the whole multi-array expected_tables = self.generate_expected_table(tensor_descriptors, input_tables) # global statement to list all checks check_statement = Statement() # implement check for each output tensor for out_id, out_td in enumerate(output_tensor_descriptor_list): # expected values for the (vj)-th entry of the sub-array expected_values = [ TableLoad(expected_tables[out_id], vj, i) for i in range(self.accuracy.get_num_output_value()) ] # local result for the (vj)-th entry of the sub-array local_result = TableLoad(output_tables[out_id], vj) array_len = out_td.get_bounding_size() if self.break_error: return_statement_break = Statement( printf_error_detail_function(*((vj, ) + (local_result, ))), self.accuracy.get_output_print_call( self.function_name, output_values)) else: return_statement_break = Statement( printf_error_detail_function(*((vj, ) + (local_result, ))), self.accuracy.get_output_print_call( self.function_name, expected_values), Return(Constant(1, precision=ML_Int32))) check_array_loop = Loop( ReferenceAssign(vj, 0), vj < array_len, Statement( ConditionBlock( self.accuracy.get_output_check_test( local_result, expected_values), return_statement_break), ReferenceAssign(vj, vj + 1), )) check_statement.add(check_array_loop) return check_statement
def get_reset_statement(self, io_map, time_step): reset_statement = Statement() if self.reset_pipeline: # TODO: fix pipeline register reset reset_value = 0 if self.negate_reset else 1 unreset_value = 1 - reset_value reset_signal = io_map[self.reset_name] reset_statement.add(ReferenceAssign(reset_signal, Constant(reset_value, precision=ML_StdLogic))) # to account for synchronous reset reset_statement.add(Wait(time_step * 3)) reset_statement.add(ReferenceAssign(reset_signal, Constant(unreset_value, precision=ML_StdLogic))) reset_statement.add(Wait(time_step * 3)) for recirculate_signal in self.recirculate_signal_map.values(): reset_statement.add(ReferenceAssign(io_map[recirculate_signal.get_tag()], Constant(0, precision=ML_StdLogic))) return reset_statement
def get_input_assign(input_signal, input_value): """ Get input assignation statement """ input_assign = ReferenceAssign( input_signal, Constant(input_value, precision=input_signal.get_precision()) ) return input_assign
def add_stage_forward(self, op_dst, op_src, stage): Log.report( Log.Verbose, " adding stage forward {op_src} to {op_dst} @ stage {stage}". format(op_src=op_src, op_dst=op_dst, stage=stage)) if not stage in self.stage_forward: self.stage_forward[stage] = [] self.stage_forward[stage].append(ReferenceAssign(op_dst, op_src)) self.pre_statement.add(op_src)
def expand_kernel_expr(kernel, iterator_format=ML_Int32): """ Expand a kernel expression into the corresponding MDL graph """ if isinstance(kernel, NDRange): return expand_ndrange(kernel) elif isinstance(kernel, Sum): var_iter = kernel.index_iter_range.var_index # TODO/FIXME to be uniquified acc = Variable("acc", var_type=Variable.Local, precision=kernel.precision) # TODO/FIXME implement proper acc init if kernel.precision.is_vector_format(): C0 = Constant([0] * kernel.precision.get_vector_size(), precision=kernel.precision) else: C0 = Constant(0, precision=kernel.precision) scheme = Loop( Statement( ReferenceAssign(var_iter, kernel.index_iter_range.first_index), ReferenceAssign(acc, C0)), var_iter <= kernel.index_iter_range.last_index, Statement( ReferenceAssign( acc, Addition(acc, expand_kernel_expr(kernel.elt_operation), precision=kernel.precision)), # loop iterator increment ReferenceAssign(var_iter, var_iter + kernel.index_iter_range.index_step))) return PlaceHolder(acc, scheme) elif isinstance(kernel, (ReadAccessor, WriteAccessor)): return expand_accessor(kernel) elif is_leaf_node(kernel): return kernel else: # vanilla metalibm ops are left unmodified (except # recursive expansion) for index, op in enumerate(kernel.inputs): new_op = expand_kernel_expr(op) kernel.set_input(index, new_op) return kernel
def generate_scalar_scheme(self, vx, vy): div = Division(vx, vy, precision=self.precision) div_if = Trunc(div, precision=self.precision) rem = Variable("rem", var_type=Variable.Local, precision=self.precision) qi = Variable("qi", var_type=Variable.Local, precision=self.precision) qi_bound = Constant(S2**self.precision.get_mantissa_size()) init_rem = FusedMultiplyAdd(-div_if, vy, vx) # factorizing 1 / vy to save time # NOTES: it makes rem / vy approximate # shared_rcp = Division(1, vy, precision=self.precision) iterative_fmod = Loop( Statement( ReferenceAssign(rem, init_rem), ReferenceAssign(qi, div_if), ), Abs(qi) > qi_bound, Statement( ReferenceAssign( qi, #Trunc(shared_rcp * rem, precision=self.precision) Trunc(rem / vy, precision=self.precision)), ReferenceAssign(rem, FMA(-qi, vy, rem)))) scheme = Statement( rem, # shared_rcp, iterative_fmod, ConditionBlock( # if rem's sign and vx sign mismatch (rem * vx < 0.0).modify_attributes(tag="update_cond", debug=debug_multi), Return(rem + vy), Return(rem), )) return scheme
def convert_bit_heap_to_fixed_point(current_bit_heap, signed=False): # final propagating sum op_index = 0 op_list = [] op_statement = Statement() while current_bit_heap.max_count() > 0: op_size = current_bit_heap.max_index - current_bit_heap.min_index + 1 op_format = ML_StdLogicVectorFormat(op_size) op_reduce = Signal("op_%d" % op_index, precision=op_format, var_type=Variable.Local) offset_index = current_bit_heap.min_index for index in range(current_bit_heap.min_index, current_bit_heap.max_index + 1): out_index = index - offset_index bit_list = current_bit_heap.pop_bits(index, 1) if len(bit_list) == 0: op_statement.push( ReferenceAssign(BitSelection(op_reduce, out_index), Constant(0, precision=ML_StdLogic))) else: assert len(bit_list) == 1 op_statement.push( ReferenceAssign(BitSelection(op_reduce, out_index), bit_list[0])) op_precision = fixed_point(op_size + offset_index, -offset_index, signed=signed) op_list.append( PlaceHolder(TypeCast(op_reduce, precision=op_precision), op_statement)) op_index += 1 return op_list, op_statement
def add_stage_forward(self, op_dst, op_src, stage): """ Adding a node to forward op_src to op_dst at stage index :param op_dst: stage output node (register output) :type op_dst: ML_Operation :param op_src: stage input node (register entry) :type op_src: ML_Operation :param stage: destination stage index :type stage: int """ Log.report(Log.Verbose, " adding stage forward {op_src} to {op_dst} @ stage {stage}", op_src=op_src, op_dst=op_dst, stage=stage) if not stage in self.stage_forward: self.stage_forward[stage] = [] self.stage_forward[stage].append( ReferenceAssign(op_dst, op_src) ) self.register_count += op_dst.get_precision().get_bit_size() self.pre_statement.add(op_src)
def recursive_inline(node): if node in memoization_map: return memoization_map[node] elif node in inputs_var2value: input_value = inputs_var2value[node] memoization_map[node] = input_value return input_value elif isinstance(node, Return): node_value = recursive_inline(node.get_input(0)) if not node_value is dst_var: new_node = ReferenceAssign(dst_var, node_value) memoization_map[node] = new_node return new_node else: return node_value elif isinstance(node, ML_LeafNode): memoization_map[node] = node return node else: for i, op in enumerate(node.inputs): node.set_input(i, recursive_inline(op)) memoization_map[node] = node return node
def generate_datafile_testbench(self, tc_list, io_map, input_signals, output_signals, time_step, test_fname="test.input"): """ Generate testbench with input and output data externalized in a data file """ # textio function to read hexadecimal text def FCT_HexaRead_gen(input_format): legalized_input_format = input_format FCT_HexaRead = FunctionObject("hread", [HDL_LINE, legalized_input_format], ML_Void, FunctionOperator("hread", void_function=True, arity=2)) return FCT_HexaRead # textio function to read binary text FCT_Read = FunctionObject("read", [HDL_LINE, ML_StdLogic], ML_Void, FunctionOperator("read", void_function=True, arity=2)) input_line = Variable("input_line", precision=HDL_LINE, var_type=Variable.Local) # building ordered list of input and output signal names input_signal_list = [sname for sname in input_signals.keys()] input_statement = Statement() for input_name in input_signal_list: input_format = input_signals[input_name].precision input_var = Variable( "v_" + input_name, precision=input_format, var_type=Variable.Local) if input_format is ML_StdLogic: input_statement.add(FCT_Read(input_line, input_var)) else: input_statement.add(FCT_HexaRead_gen(input_format)(input_line, input_var)) input_statement.add(ReferenceAssign(input_signals[input_name], input_var)) output_signal_list = [sname for sname in output_signals.keys()] output_statement = Statement() for output_name in output_signal_list: output_format = output_signals[output_name].precision output_var = Variable( "v_" + output_name, precision=output_format, var_type=Variable.Local) if output_format is ML_StdLogic: output_statement.add(FCT_Read(input_line, output_var)) else: output_statement.add(FCT_HexaRead_gen(output_format)(input_line, output_var)) output_signal = output_signals[output_name] #value_msg = get_output_value_msg(output_signal, output_value) test_pass_cond, check_statement = get_output_check_statement(output_signal, output_name, output_var) input_msg = multi_Concatenation(*tuple(sum([[" %s=" % input_tag, signal_str_conversion(input_signals[input_tag], input_signals[input_tag].precision)] for input_tag in input_signal_list], []))) output_statement.add(check_statement) assert_statement = Assert( test_pass_cond, multi_Concatenation( "unexpected value for inputs ", input_msg, " expecting :", signal_str_conversion(output_var, output_format), " got :", signal_str_conversion(output_signal, output_format), precision = ML_String ), severity=Assert.Failure ) output_statement.add(assert_statement) self_component = self.implementation.get_component_object() self_instance = self_component(io_map = io_map, tag = "tested_entity") test_statement = Statement() DATA_FILE_NAME = test_fname with open(DATA_FILE_NAME, "w") as data_file: # dumping column tags data_file.write("# " + " ".join(input_signal_list + output_signal_list) + "\n") def get_raw_cst_string(cst_format, cst_value): size = int((cst_format.get_bit_size() + 3) / 4) return ("{:x}").format(cst_format.get_base_format().get_integer_coding(cst_value)).zfill(size) for input_values, output_values in tc_list: # TODO; generate test data file cst_list = [] for input_name in input_signal_list: input_value = input_values[input_name] input_format = input_signals[input_name].get_precision() cst_list.append(get_raw_cst_string(input_format, input_value)) for output_name in output_signal_list: output_value = output_values[output_name] output_format = output_signals[output_name].get_precision() cst_list.append(get_raw_cst_string(output_format, output_value)) # dumping line into file data_file.write(" ".join(cst_list) + "\n") input_stream = Variable("data_file", precision=HDL_FILE, var_type=Variable.Local) file_status = Variable("file_status", precision=HDL_OPEN_FILE_STATUS, var_type=Variable.Local) FCT_EndFile = FunctionObject("endfile", [HDL_FILE], ML_Bool, FunctionOperator("endfile", arity=1)) FCT_OpenFile = FunctionObject( "FILE_OPEN", [HDL_OPEN_FILE_STATUS, HDL_FILE, ML_String], ML_Void, FunctionOperator( "FILE_OPEN", arg_map={0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2), 3: "READ_MODE"}, void_function=True)) FCT_ReadLine = FunctionObject( "readline", [HDL_FILE, HDL_LINE], ML_Void, FunctionOperator("readline", void_function=True, arity=2)) reset_statement = self.get_reset_statement(io_map, time_step) OPEN_OK = Constant("OPEN_OK", precision=HDL_OPEN_FILE_STATUS) testbench = CodeEntity("testbench") test_process = Process( reset_statement, FCT_OpenFile(file_status, input_stream, DATA_FILE_NAME), ConditionBlock( Comparison(file_status, OPEN_OK, specifier=Comparison.NotEqual), Assert( Constant(0, precision=ML_Bool), " \"failed to open file {}\"".format(DATA_FILE_NAME), severity=Assert.Failure ) ), # consume legend line FCT_ReadLine(input_stream, input_line), WhileLoop( LogicalNot(FCT_EndFile(input_stream)), Statement( FCT_ReadLine(input_stream, input_line), input_statement, Wait(time_step * (self.stage_num + 2)), output_statement, ), ), # end of test Assert( Constant(0, precision = ML_Bool), " \"end of test, no error encountered \"", severity = Assert.Warning ), # infinite end loop WhileLoop( Constant(1, precision=ML_Bool), Statement( Wait(time_step * (self.stage_num + 2)), ) ) ) testbench_scheme = Statement( self_instance, test_process ) if self.pipelined: half_time_step = time_step / 2 assert (half_time_step * 2) == time_step # adding clock process for pipelined bench clk_process = Process( Statement( ReferenceAssign( io_map["clk"], Constant(1, precision = ML_StdLogic) ), Wait(half_time_step), ReferenceAssign( io_map["clk"], Constant(0, precision = ML_StdLogic) ), Wait(half_time_step), ) ) testbench_scheme.push(clk_process) testbench.add_process(testbench_scheme) return [testbench]
def generate_pipeline_stage(entity, reset=False, recirculate=False, one_process_per_stage=True): """ Process a entity to generate pipeline stages required """ retiming_map = {} retime_map = RetimeMap() output_assign_list = entity.implementation.get_output_assign() for output in output_assign_list: Log.report(Log.Verbose, "generating pipeline from output {} ", output) retime_op(output, retime_map) for recirculate_stage in entity.recirculate_signal_map: recirculate_ctrl = entity.recirculate_signal_map[recirculate_stage] Log.report(Log.Verbose, "generating pipeline from recirculation control signal {}", recirculate_ctrl) retime_op(recirculate_ctrl, retime_map) process_statement = Statement() # adding stage forward process clk = entity.get_clk_input() clock_statement = Statement() # handle towards the first clock Process (in generation order) # which must be the one whose pre_statement is filled with # signal required to be generated outside the processes first_process = False for stage_id in sorted(retime_map.stage_forward.keys()): stage_statement = Statement(*tuple( assign for assign in retime_map.stage_forward[stage_id])) if reset: reset_statement = Statement() for assign in retime_map.stage_forward[stage_id]: target = assign.get_input(0) reset_value = Constant(0, precision=target.get_precision()) reset_statement.push(ReferenceAssign(target, reset_value)) if recirculate: # inserting recirculation condition recirculate_signal = entity.get_recirculate_signal(stage_id) stage_statement = ConditionBlock( Comparison( recirculate_signal, Constant(0, precision=recirculate_signal.get_precision()), specifier=Comparison.Equal, precision=ML_Bool), stage_statement) stage_statement = ConditionBlock( Comparison(entity.reset_signal, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), reset_statement, stage_statement) # To meet simulation / synthesis tools, we build # a single if clock predicate block per stage clock_block = ConditionBlock( LogicalAnd(Event(clk, precision=ML_Bool), Comparison(clk, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), precision=ML_Bool), stage_statement) if one_process_per_stage: clock_process = Process(clock_block, sensibility_list=[clk]) entity.implementation.add_process(clock_process) first_process = first_process or clock_process else: clock_statement.add(clock_block) if one_process_per_stage: pass else: process_statement.add(clock_statement) pipeline_process = Process(process_statement, sensibility_list=[clk]) entity.implementation.add_process(pipeline_process) first_process = pipeline_process # statement that gather signals which must be pre-computed for op in retime_map.pre_statement: first_process.add_to_pre_statement(op) stage_num = len(retime_map.stage_forward.keys()) #print "there are %d pipeline stages" % (stage_num) return stage_num
def generate_scheme(self): # declaring target and instantiating optimization engine precision_ptr = self.get_input_precision(0) index_format = self.get_input_precision(2) multi_elt_num = self.multi_elt_num dst = self.implementation.add_input_variable("dst", precision_ptr) src = self.implementation.add_input_variable("src", precision_ptr) n = self.implementation.add_input_variable("len", index_format) i = Variable("i", precision=index_format, var_type=Variable.Local) CU0 = Constant(0, precision=index_format) element_format = self.precision self.function_list = [] if multi_elt_num > 1: element_format = VECTOR_TYPE_MAP[self.precision][multi_elt_num] elt_input = TableLoad(src, i, precision=element_format) local_exp = Variable("local_exp", precision=element_format, var_type=Variable.Local) if self.use_libm_function: libm_fct_operator = FunctionOperator(self.use_libm_function, arity=1) libm_fct = FunctionObject(self.use_libm_function, [ML_Binary32], ML_Binary32, libm_fct_operator) if multi_elt_num > 1: result_list = [ libm_fct( VectorElementSelection(elt_input, Constant(elt_id, precision=ML_Integer), precision=self.precision)) for elt_id in range(multi_elt_num) ] result = VectorAssembling(*result_list, precision=element_format) else: result = libm_fct(elt_input) elt_result = ReferenceAssign(local_exp, result) else: if multi_elt_num > 1: scalar_result = Variable("scalar_result", precision=self.precision, var_type=Variable.Local) fct_ctor_args = self.function_ctor.get_default_args( precision=self.precision, libm_compliant=False, ) meta_function = self.function_ctor(fct_ctor_args) exponential_scheme = meta_function.generate_scheme() # instanciating required passes for typing pass_inst_abstract_prec = PassInstantiateAbstractPrecision( self.processor) pass_inst_prec = PassInstantiatePrecision( self.processor, default_precision=None) # exectuting format instanciation passes on optree exponential_scheme = pass_inst_abstract_prec.execute_on_optree( exponential_scheme) exponential_scheme = pass_inst_prec.execute_on_optree( exponential_scheme) vectorizer = StaticVectorizer() # extracting scalar argument from meta_exponential meta function scalar_input = meta_function.implementation.arg_list[0] # vectorize scalar scheme vector_result, vec_arg_list, vector_scheme, scalar_callback, scalar_callback_fct = vectorize_function_scheme( vectorizer, self.get_main_code_object(), exponential_scheme, element_format.get_scalar_format(), [scalar_input], multi_elt_num) elt_result = inline_function(vector_scheme, vector_result, {vec_arg_list[0]: elt_input}) local_exp = vector_result self.function_list.append(scalar_callback_fct) libm_fct = scalar_callback else: scalar_input = elt_input scalar_result = local_exp elt_result = generate_inline_fct_scheme( self.function_ctor, scalar_result, [scalar_input], { "precision": self.precision, "libm_compliant": False }) CU1 = Constant(1, precision=index_format) local_exp_init_value = Constant(0, precision=self.precision) if multi_elt_num > 1: local_exp_init_value = Constant([0] * multi_elt_num, precision=element_format) remain_n = Modulo(n, multi_elt_num, precision=index_format) iter_n = n - remain_n CU_ELTNUM = Constant(multi_elt_num, precision=index_format) inc = i + CU_ELTNUM else: remain_n = None iter_n = n inc = i + CU1 # main loop processing multi_elt_num element(s) per iteration main_loop = Loop( ReferenceAssign(i, CU0), i < iter_n, Statement(ReferenceAssign(local_exp, local_exp_init_value), elt_result, TableStore(local_exp, dst, i, precision=ML_Void), ReferenceAssign(i, inc)), ) # epilog to process remaining item (when the length is not a multiple # of multi_elt_num) if not remain_n is None: # TODO/FIXME: try alternative method for processing epilog # by using full vector length and mask epilog_loop = Loop( Statement(), i < n, Statement( TableStore(libm_fct( TableLoad(src, i, precision=self.precision)), dst, i, precision=ML_Void), ReferenceAssign(i, i + CU1), )) main_loop = Statement(main_loop, epilog_loop) return main_loop
def generate_scheme(self): # declaring target and instantiating optimization engine precision_ptr = self.get_input_precision(0) index_format = self.get_input_precision(2) dst = self.implementation.add_input_variable("dst", precision_ptr) src = self.implementation.add_input_variable("src", precision_ptr) n = self.implementation.add_input_variable("len", index_format) i = Variable("i", precision=index_format, var_type=Variable.Local) CU1 = Constant(1, precision=index_format) CU0 = Constant(0, precision=index_format) inc = i + CU1 elt_input = TableLoad(src, i, precision=self.precision) local_exp = Variable("local_exp", precision=self.precision, var_type=Variable.Local) if self.use_libm_function: libm_exp_operator = FunctionOperator("expf", arity=1) libm_exp = FunctionObject("expf", [ML_Binary32], ML_Binary32, libm_exp_operator) elt_result = ReferenceAssign(local_exp, libm_exp(elt_input)) else: exponential_args = ML_Exponential.get_default_args( precision=self.precision, libm_compliant=False, debug=False, ) meta_exponential = ML_Exponential(exponential_args) exponential_scheme = meta_exponential.generate_scheme() elt_result = inline_function( exponential_scheme, local_exp, {meta_exponential.implementation.arg_list[0]: elt_input}, ) elt_acc = Variable("elt_acc", precision=self.precision, var_type=Variable.Local) exp_loop = Loop( ReferenceAssign(i, CU0), i < n, Statement(ReferenceAssign(local_exp, 0), elt_result, TableStore(local_exp, dst, i, precision=ML_Void), ReferenceAssign(elt_acc, elt_acc + local_exp), ReferenceAssign(i, i + CU1)), ) sum_rcp = Division(1, elt_acc, precision=self.precision, tag="sum_rcp", debug=debug_multi) div_loop = Loop( ReferenceAssign(i, CU0), i < n, Statement( TableStore(Multiplication( TableLoad(dst, i, precision=self.precision), sum_rcp), dst, i, precision=ML_Void), ReferenceAssign(i, inc)), ) main_scheme = Statement(ReferenceAssign(elt_acc, 0), exp_loop, sum_rcp, div_loop) return main_scheme
def generate_bench(self, processor, test_num=1000, unroll_factor=10): """ generate performance bench for self.op_class """ initial_inputs = [ Constant(random.uniform(inf(self.init_interval), sup(self.init_interval)), precision=precision) for i, precision in enumerate(self.input_precisions) ] var_inputs = [ Variable("var_%d" % i, precision=FormatAttributeWrapper(precision, ["volatile"]), var_type=Variable.Local) for i, precision in enumerate(self.input_precisions) ] printf_timing_op = FunctionOperator( "printf", arg_map={ 0: "\"%s[%s] %%lld elts computed "\ "in %%lld cycles =>\\n %%.3f CPE \\n\"" % ( self.bench_name, self.output_precision.get_display_format() ), 1: FO_Arg(0), 2: FO_Arg(1), 3: FO_Arg(2), 4: FO_Arg(3) }, void_function=True ) printf_timing_function = FunctionObject( "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64], ML_Void, printf_timing_op) timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local) void_function_op = FunctionOperator("(void)", arity=1, void_function=True) void_function = FunctionObject("(void)", [self.output_precision], ML_Void, void_function_op) # initialization of operation inputs init_assign = metaop.Statement() for var_input, init_value in zip(var_inputs, initial_inputs): init_assign.push(ReferenceAssign(var_input, init_value)) # test loop loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local) test_num_cst = Constant(test_num / unroll_factor, precision=ML_Int64, tag="test_num") # Goal build a chain of dependant operation to measure # elementary operation latency local_inputs = tuple(var_inputs) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) for i in range(unroll_factor - 1): local_inputs = tuple([local_result] + var_inputs[1:]) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) # renormalisation local_result = self.renorm_function(local_result) # variable assignation to build dependency chain var_assign = Statement() var_assign.push(ReferenceAssign(var_inputs[0], local_result)) final_value = var_inputs[0] # loop increment value loop_increment = 1 test_loop = Loop( ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)), loop_i < test_num_cst, Statement(var_assign, ReferenceAssign(loop_i, loop_i + loop_increment)), ) # bench scheme test_scheme = Statement( ReferenceAssign(timer, processor.get_current_timestamp()), init_assign, test_loop, ReferenceAssign( timer, Subtraction(processor.get_current_timestamp(), timer, precision=ML_Int64)), # prevent intermediary variable simplification void_function(final_value), printf_timing_function( final_value, Constant(test_num, precision=ML_Int64), timer, Division(Conversion(timer, precision=ML_Binary64), Constant(test_num, precision=ML_Binary64), precision=ML_Binary64)) # ,Return(Constant(0, precision = ML_Int32)) ) return test_scheme
def get_array_test_wrapper(self, test_num, tested_function, table_size_offset_array, input_tables, output_array, acc_num, post_statement_generator, NUM_INPUT_ARRAY=1): """ generate a test loop for multi-array tests @param test_num number of elementary array tests to be executed @param tested_function FunctionObject to be tested @param table_size_offset_array ML_NewTable object containing (table-size, offset) pairs for multi-array testing @param input_table ML_NewTable containing multi-array test inputs @param output_table ML_NewTable containing multi-array test outputs @param post_statement_generator is generator used to generate a statement executed at the end of the test of one of the arrays of the multi-test. It expects 6 arguments: (input_tables, output_array, table_size_offset_array, array_offset, array_len, test_id) @param printf_function FunctionObject to print error case """ test_id = Variable("test_id", precision=ML_Int32, var_type=Variable.Local) test_num_cst = Constant(test_num, precision=ML_Int32, tag="test_num") array_len = Variable("len", precision=ML_UInt32, var_type=Variable.Local) array_offset = TableLoad(table_size_offset_array, test_id, 1) def pointer_add(table_addr, offset): pointer_format = table_addr.get_precision_as_pointer_format() return Addition(table_addr, offset, precision=pointer_format) array_inputs = tuple( pointer_add(input_tables[in_id], array_offset) for in_id in range(NUM_INPUT_ARRAY)) function_call = tested_function( *((pointer_add(output_array, array_offset), ) + array_inputs + (array_len, ))) post_statement = post_statement_generator(input_tables, output_array, table_size_offset_array, array_offset, array_len, test_id) loop_increment = 1 test_loop = Loop( ReferenceAssign(test_id, Constant(0, precision=ML_Int32)), test_id < test_num_cst, Statement( ReferenceAssign(array_len, TableLoad(table_size_offset_array, test_id, 0)), function_call, post_statement, ReferenceAssign( acc_num, acc_num + Conversion(array_len, precision=acc_num.precision)), ReferenceAssign(test_id, test_id + loop_increment), ), ) test_statement = Statement() # adding functional test_loop to test statement test_statement.add(test_loop) return test_statement
def generate_scheme(self): int_precision = self.precision.get_integer_format() # We wish to compute vx / vy vx = self.implementation.add_input_variable("x", self.precision, interval=self.input_intervals[0]) vy = self.implementation.add_input_variable("y", self.precision, interval=self.input_intervals[1]) if self.mode is FULL_MODE: quo = self.implementation.add_input_variable("quo", ML_Pointer_Format(int_precision)) i = Variable("i", precision=int_precision, var_type=Variable.Local) q = Variable("q", precision=int_precision, var_type=Variable.Local) CI = lambda v: Constant(v, precision=int_precision) CF = lambda v: Constant(v, precision=self.precision) vx_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="vx_subnormal") vy_subnormal = Test(vy, specifier=Test.IsSubnormal, tag="vy_subnormal") DELTA_EXP = self.precision.get_mantissa_size() scale_factor = Constant(2.0**DELTA_EXP, precision=self.precision) inv_scale_factor = Constant(2.0**-DELTA_EXP, precision=self.precision) normalized_vx = Select(vx_subnormal, vx * scale_factor, vx, tag="scaled_vx") normalized_vy = Select(vy_subnormal, vy * scale_factor, vy, tag="scaled_vy") real_ex = ExponentExtraction(vx, tag="real_ex", precision=int_precision) real_ey = ExponentExtraction(vy, tag="real_ey", precision=int_precision) # if real_e<x/y> is +1023 then it may Overflow in -real_ex for ExponentInsertion # which only supports downto -1022 before falling into subnormal numbers (which are # not supported by ExponentInsertion) real_ex_h0 = real_ex / 2 real_ex_h1 = real_ex - real_ex_h0 real_ey_h0 = real_ey / 2 real_ey_h1 = real_ey - real_ey_h0 EI = lambda v: ExponentInsertion(v, precision=self.precision) mx = Abs((vx * EI(-real_ex_h0)) * EI(-real_ex_h1), tag="mx") my = Abs((vy * EI(-real_ey_h0)) * EI(-real_ey_h1), tag="pre_my") # scale_ey is used to regain the unscaling of mx in the first loop # if real_ey >= real_ex, the first loop is never executed # so a different scaling is required mx_unscaling = Select(real_ey < real_ex, real_ey, real_ex) ey_half0 = (mx_unscaling) / 2 ey_half1 = (mx_unscaling) - ey_half0 scale_ey_half0 = ExponentInsertion(ey_half0, precision=self.precision, tag="scale_ey_half0") scale_ey_half1 = ExponentInsertion(ey_half1, precision=self.precision, tag="scale_ey_half1") # if only vy is subnormal we want to normalize it #normal_cond = LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal)) normal_cond = vy_subnormal #LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal)) my = Select(normal_cond, Abs(MantissaExtraction(vy * scale_factor)), my, tag="my") # vx / vy = vx * 2^-ex * 2^(ex-ey) / (vy * 2^-ey) # vx % vy post_mx = Variable("post_mx", precision=self.precision, var_type=Variable.Local) # scaling for half comparison VY_SCALING = Select(vy_subnormal, 1.0, 0.5, precision=self.precision) VX_SCALING = Select(vy_subnormal, 2.0, 1.0, precision=self.precision) def LogicalXor(a, b): return LogicalOr(LogicalAnd(a, LogicalNot(b)), LogicalAnd(LogicalNot(a), b)) rem_sign = Select(vx < 0, CF(-1), CF(1), precision=self.precision, tag="rem_sign") quo_sign = Select(LogicalXor(vx <0, vy < 0), CI(-1), CI(1), precision=int_precision, tag="quo_sign") loop_watchdog = Variable("loop_watchdog", precision=ML_Int32, var_type=Variable.Local) loop = Statement( real_ex, real_ey, mx, my, loop_watchdog, ReferenceAssign(loop_watchdog, 5000), ReferenceAssign(q, CI(0)), Loop( ReferenceAssign(i, CI(0)), i < (real_ex - real_ey), Statement( ReferenceAssign(i, i+CI(1)), ReferenceAssign(q, ((q << 1) + Select(mx >= my, CI(1), CI(0))).modify_attributes(tag="step1_q")), ReferenceAssign(mx, (CF(2) * (mx - Select(mx >= my, my, CF(0)))).modify_attributes(tag="step1_mx")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), # unscaling remainder ReferenceAssign(mx, ((mx * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem")), ReferenceAssign(my, ((my * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem_my")), Loop( Statement(), (my > Abs(vy)), Statement( ReferenceAssign(q, ((q << 1) + Select(mx >= Abs(my), CI(1), CI(0))).modify_attributes(tag="step2_q")), ReferenceAssign(mx, (mx - Select(mx >= Abs(my), Abs(my), CF(0))).modify_attributes(tag="step2_mx")), ReferenceAssign(my, (my * 0.5).modify_attributes(tag="step2_my")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), ReferenceAssign(q, q << 1), Loop( ReferenceAssign(i, CI(0)), mx > Abs(vy), Statement( ReferenceAssign(q, (q + Select(mx > Abs(vy), CI(1), CI(0))).modify_attributes(tag="step3_q")), ReferenceAssign(mx, (mx - Select(mx > Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="step3_mx")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), ReferenceAssign(q, q + Select(mx >= Abs(vy), CI(1), CI(0))), ReferenceAssign(mx, (mx - Select(mx >= Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="pre_half_mx")), ConditionBlock( # actual comparison is mx > | abs(vy * 0.5) | to avoid rounding effect when # vy is subnormal we mulitply both side by 2.0**60 ((mx * VX_SCALING) > Abs(vy * VY_SCALING)).modify_attributes(tag="half_test"), Statement( ReferenceAssign(q, q + CI(1)), ReferenceAssign(mx, (mx - Abs(vy))) ) ), ConditionBlock( # if the remainder is exactly half the dividend # we need to make sure the quotient is even LogicalAnd( Equal(mx * VX_SCALING, Abs(vy * VY_SCALING)), Equal(Modulo(q, CI(2)), CI(1)), ), Statement( ReferenceAssign(q, q + CI(1)), ReferenceAssign(mx, (mx - Abs(vy))) ) ), ReferenceAssign(mx, rem_sign * mx), ReferenceAssign(q, Modulo(TypeCast(q, precision=self.precision.get_unsigned_integer_format()), Constant(2**self.quotient_size, precision=self.precision.get_unsigned_integer_format()), tag="mod_q") ), ReferenceAssign(q, quo_sign * q), ) # NOTES: Warning QuotientReturn must always preceeds RemainderReturn if self.mode is QUOTIENT_MODE: # QuotientReturn = Return RemainderReturn = lambda _: Statement() elif self.mode is REMAINDER_MODE: QuotientReturn = lambda _: Statement() RemainderReturn = Return elif self.mode is FULL_MODE: QuotientReturn = lambda v: ReferenceAssign(Dereference(quo, precision=int_precision), v) RemainderReturn = Return else: raise NotImplemented # quotient invalid value QUO_INVALID_VALUE = 0 mod_scheme = Statement( # x or y is NaN, a NaN is returned ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ), ), # ConditionBlock( Test(vy, specifier=Test.IsZero), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ), ), ConditionBlock( Test(vx, specifier=Test.IsZero), Statement( QuotientReturn(0), RemainderReturn(vx) ), ), ConditionBlock( Test(vx, specifier=Test.IsInfty), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ) ), ConditionBlock( Test(vy, specifier=Test.IsInfty), Statement( QuotientReturn(0), RemainderReturn(vx), ) ), ConditionBlock( Abs(vx) < Abs(vy * 0.5), Statement( QuotientReturn(0), RemainderReturn(vx), ) ), ConditionBlock( Equal(vx, vy), Statement( QuotientReturn(1), # 0 with the same sign as x RemainderReturn(vx - vx), ), ), ConditionBlock( Equal(vx, -vy), Statement( # quotient is -1 QuotientReturn(-1), # 0 with the same sign as x RemainderReturn(vx - vx), ), ), loop, QuotientReturn(q), RemainderReturn(mx), ) quo_scheme = Statement( # x or y is NaN, a NaN is returned ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)), Return(QUO_INVALID_VALUE), ), # ConditionBlock( Test(vy, specifier=Test.IsZero), Return(QUO_INVALID_VALUE), ), ConditionBlock( Test(vx, specifier=Test.IsZero), Return(0), ), ConditionBlock( Test(vx, specifier=Test.IsInfty), Return(QUO_INVALID_VALUE), ), ConditionBlock( Test(vy, specifier=Test.IsInfty), Return(QUO_INVALID_VALUE), ), ConditionBlock( Abs(vx) < Abs(vy * 0.5), Return(0), ), ConditionBlock( Equal(vx, vy), Return(1), ), ConditionBlock( Equal(vx, -vy), Return(-1), ), loop, Return(q), ) return mod_scheme
def generate_pipeline_stage(entity, reset=False, recirculate=False, one_process_per_stage=True, synchronous_reset=True, negate_reset=False): """ Process a entity to generate pipeline stages required to implement pipeline structure described by node's stage attributes. :param entity: input entity to pipeline :type entity: ML_EntityBasis :param reset: indicate if a reset must be generated for pipeline registers :type reset: bool :param recirculate: trigger the integration of a recirculation signal to the stage flopping condition :type recirculate: bool :param one_process_per_stage:forces the generation of a separate process for each pipeline stage (else a unique process is generated for all the stages :type one_process_per_stage: bool :param synchronous_reset: triggers the generation of a clocked reset :type synchronous_reset: bool :param negate_reset: if set indicates the reset is triggered when reset signal is 0 (else 1) :type negate_reset: bool """ retiming_map = {} retime_map = RetimeMap() output_assign_list = entity.implementation.get_output_assign() for output in output_assign_list: Log.report(Log.Verbose, "generating pipeline from output {} ", output) retime_op(output, retime_map) for recirculate_stage in entity.recirculate_signal_map: recirculate_ctrl = entity.recirculate_signal_map[recirculate_stage] Log.report(Log.Verbose, "generating pipeline from recirculation control signal {}", recirculate_ctrl) retime_op(recirculate_ctrl, retime_map) process_statement = Statement() # adding stage forward process clk = entity.get_clk_input() clock_statement = Statement() global_reset_statement = Statement() Log.report(Log.Info, "design has {} flip-flop(s).", retime_map.register_count) # handle towards the first clock Process (in generation order) # which must be the one whose pre_statement is filled with # signal required to be generated outside the processes first_process = False for stage_id in sorted(retime_map.stage_forward.keys()): stage_statement = Statement( *tuple(assign for assign in retime_map.stage_forward[stage_id])) if reset: reset_statement = Statement() for assign in retime_map.stage_forward[stage_id]: target = assign.get_input(0) reset_value = Constant(0, precision=target.get_precision()) reset_statement.push(ReferenceAssign(target, reset_value)) if recirculate: # inserting recirculation condition recirculate_signal = entity.get_recirculate_signal(stage_id) stage_statement = ConditionBlock( Comparison( recirculate_signal, Constant(0, precision=recirculate_signal.get_precision()), specifier=Comparison.Equal, precision=ML_Bool ), stage_statement ) if synchronous_reset: # build a compound statement with reset and flops statement stage_statement = ConditionBlock( Comparison( entity.reset_signal, Constant(0 if negate_reset else 1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool ), reset_statement, stage_statement ) else: # for asynchronous reset, reset is in a non-clocked statement # and will be added at the end of stage to the same process than # register clocking global_reset_statement.add(reset_statement) # To meet simulation / synthesis tools, we build # a single if clock predicate block per stage clock_block = ConditionBlock( LogicalAnd( Event(clk, precision=ML_Bool), Comparison( clk, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool ), precision=ML_Bool ), stage_statement ) if one_process_per_stage: if reset and not synchronous_reset: clock_block = ConditionBlock( Comparison( entity.reset_signal, Constant(0 if negate_reset else 1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool ), reset_statement, clock_block ) clock_process = Process(clock_block, sensibility_list=[clk, entity.reset_signal]) else: # no reset, or synchronous reset (already appended to clock_block) clock_process = Process(clock_block, sensibility_list=[clk]) entity.implementation.add_process(clock_process) first_process = first_process or clock_process else: clock_statement.add(clock_block) if one_process_per_stage: # reset and clock processed where generated at each stage loop pass else: process_statement.add(clock_statement) if synchronous_reset: pipeline_process = Process(process_statement, sensibility_list=[clk]) else: process_statement.add(global_reset_statement) pipeline_process = Process(process_statement, sensibility_list=[clk, entity.reset_signal]) entity.implementation.add_process(pipeline_process) first_process = pipeline_process # statement that gather signals which must be pre-computed for op in retime_map.pre_statement: first_process.add_to_pre_statement(op) stage_num = len(retime_map.stage_forward.keys()) Log.report(Log.Info, "there are {} pipeline stage(s)", stage_num) return stage_num
def generate_auto_test(self, test_num=10, test_range=Interval(-1.0, 1.0), debug=False, time_step=10): """ time_step: duration of a stage (in ns) """ # instanciating tested component # map of input_tag -> input_signal and output_tag -> output_signal io_map = {} # map of input_tag -> input_signal, excludind commodity signals # (e.g. clock and reset) input_signals = {} # map of output_tag -> output_signal output_signals = {} # excluding clock and reset signals from argument list # reduced_arg_list = [input_port for input_port in self.implementation.get_arg_list() if not input_port.get_tag() in ["clk", "reset"]] reduced_arg_list = self.implementation.get_arg_list() for input_port in reduced_arg_list: input_tag = input_port.get_tag() input_signal = Signal(input_tag + "_i", precision=input_port.get_precision(), var_type=Signal.Local) io_map[input_tag] = input_signal if not input_tag in ["clk", "reset"]: input_signals[input_tag] = input_signal for output_port in self.implementation.get_output_port(): output_tag = output_port.get_tag() output_signal = Signal(output_tag + "_o", precision=output_port.get_precision(), var_type=Signal.Local) io_map[output_tag] = output_signal output_signals[output_tag] = output_signal # building list of test cases tc_list = [] self_component = self.implementation.get_component_object() self_instance = self_component(io_map=io_map, tag="tested_entity") test_statement = Statement() # initializing random test case generator self.init_test_generator() # Appending standard test cases if required if self.auto_test_std: tc_list += self.standard_test_cases for i in range(test_num): input_values = self.generate_test_case(input_signals, io_map, i, test_range) tc_list.append((input_values, None)) def compute_results(tc): """ update test case with output values if required """ input_values, output_values = tc if output_values is None: return input_values, self.numeric_emulate(input_values) else: return tc # filling output values tc_list = [compute_results(tc) for tc in tc_list] for input_values, output_values in tc_list: input_msg = "" # Adding input setting for input_tag in input_values: input_signal = io_map[input_tag] # FIXME: correct value generation depending on signal precision input_value = input_values[input_tag] test_statement.add( ReferenceAssign( input_signal, Constant(input_value, precision=input_signal.get_precision()))) value_msg = input_signal.get_precision().get_cst( input_value, language=VHDL_Code).replace('"', "'") value_msg += " / " + hex(input_signal.get_precision( ).get_base_format().get_integer_coding(input_value)) input_msg += " {}={} ".format(input_tag, value_msg) test_statement.add(Wait(time_step * self.stage_num)) # Adding output value comparison for output_tag in output_signals: output_signal = output_signals[output_tag] output_value = Constant( output_values[output_tag], precision=output_signal.get_precision()) output_precision = output_signal.get_precision() expected_dec = output_precision.get_cst( output_values[output_tag], language=VHDL_Code).replace('"', "'") expected_hex = " / " + hex( output_precision.get_base_format().get_integer_coding( output_values[output_tag])) value_msg = "{} / {}".format(expected_dec, expected_hex) test_pass_cond = Comparison(output_signal, output_value, specifier=Comparison.Equal, precision=ML_Bool) test_statement.add( ConditionBlock( LogicalNot(test_pass_cond, precision=ML_Bool), Report( Concatenation( " result for {}: ".format(output_tag), Conversion(TypeCast( output_signal, precision=ML_StdLogicVectorFormat( output_signal.get_precision( ).get_bit_size())), precision=ML_String), precision=ML_String)))) test_statement.add( Assert( test_pass_cond, "\"unexpected value for inputs {input_msg}, output {output_tag}, expecting {value_msg}, got: \"" .format(input_msg=input_msg, output_tag=output_tag, value_msg=value_msg), severity=Assert.Failure)) testbench = CodeEntity("testbench") test_process = Process( test_statement, # end of test Assert(Constant(0, precision=ML_Bool), " \"end of test, no error encountered \"", severity=Assert.Failure)) testbench_scheme = Statement(self_instance, test_process) if self.pipelined: half_time_step = time_step / 2 assert (half_time_step * 2) == time_step # adding clock process for pipelined bench clk_process = Process( Statement( ReferenceAssign(io_map["clk"], Constant(1, precision=ML_StdLogic)), Wait(half_time_step), ReferenceAssign(io_map["clk"], Constant(0, precision=ML_StdLogic)), Wait(half_time_step), )) testbench_scheme.push(clk_process) testbench.add_process(testbench_scheme) return [testbench]
def generate_auto_test(self, test_num=10, test_range=Interval(-1.0, 1.0), debug=False, time_step=10): """ time_step: duration of a stage (in ns) """ # instanciating tested component # map of input_tag -> input_signal and output_tag -> output_signal io_map = {} # map of input_tag -> input_signal, excludind commodity signals # (e.g. clock and reset) input_signals = {} # map of output_tag -> output_signal output_signals = {} # excluding clock and reset signals from argument list # reduced_arg_list = [input_port for input_port in self.implementation.get_arg_list() if not input_port.get_tag() in ["clk", "reset"]] reduced_arg_list = self.implementation.get_arg_list() for input_port in reduced_arg_list: input_tag = input_port.get_tag() input_signal = Signal(input_tag + "_i", precision=input_port.get_precision(), var_type=Signal.Local) io_map[input_tag] = input_signal if not input_tag in ["clk", "reset"]: input_signals[input_tag] = input_signal for output_port in self.implementation.get_output_port(): output_tag = output_port.get_tag() output_signal = Signal(output_tag + "_o", precision=output_port.get_precision(), var_type=Signal.Local) io_map[output_tag] = output_signal output_signals[output_tag] = output_signal # building list of test cases tc_list = [] self_component = self.implementation.get_component_object() self_instance = self_component(io_map=io_map, tag="tested_entity") test_statement = Statement() # initializing random test case generator self.init_test_generator() # Appending standard test cases if required if self.auto_test_std: tc_list += self.standard_test_cases for i in range(test_num): input_values = self.generate_test_case(input_signals, io_map, i, test_range) tc_list.append((input_values, None)) def compute_results(tc): """ update test case with output values if required """ input_values, output_values = tc if output_values is None: return input_values, self.numeric_emulate(input_values) else: return tc # filling output values tc_list = [compute_results(tc) for tc in tc_list] for input_values, output_values in tc_list: test_statement.add( self.implement_test_case(io_map, input_values, output_signals, output_values, time_step)) testbench = CodeEntity("testbench") test_process = Process( test_statement, # end of test Assert(Constant(0, precision=ML_Bool), " \"end of test, no error encountered \"", severity=Assert.Failure)) testbench_scheme = Statement(self_instance, test_process) if self.pipelined: half_time_step = time_step / 2 assert (half_time_step * 2) == time_step # adding clock process for pipelined bench clk_process = Process( Statement( ReferenceAssign(io_map["clk"], Constant(1, precision=ML_StdLogic)), Wait(half_time_step), ReferenceAssign(io_map["clk"], Constant(0, precision=ML_StdLogic)), Wait(half_time_step), )) testbench_scheme.push(clk_process) testbench.add_process(testbench_scheme) return [testbench]
def generate_bench_wrapper(self, test_num=1, loop_num=100000, test_ranges=[Interval(-1.0, 1.0)], debug=False): # interval where the array lenght is chosen from (randomly) index_range = self.test_index_range auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64) tested_function = self.implementation.get_function_object() function_name = self.implementation.get_name() failure_report_op = FunctionOperator("report_failure") failure_report_function = FunctionObject("report_failure", [], ML_Void, failure_report_op) printf_success_op = FunctionOperator( "printf", arg_map={0: "\"test successful %s\\n\"" % function_name}, void_function=True) printf_success_function = FunctionObject("printf", [], ML_Void, printf_success_op) output_precision = FormatAttributeWrapper(self.precision, ["volatile"]) test_total = test_num # number of arrays expected as inputs for tested_function NUM_INPUT_ARRAY = 1 # position of the input array in tested_function operands (generally # equals to 1 as to 0-th input is often the destination array) INPUT_INDEX_OFFSET = 1 # concatenating standard test array at the beginning of randomly # generated array TABLE_SIZE_VALUES = [ len(std_table) for std_table in self.standard_test_cases ] + [ random.randrange(index_range[0], index_range[1] + 1) for i in range(test_num) ] OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)] table_size_offset_array = generate_2d_table( test_total, 2, ML_UInt32, self.uniquify_name("table_size_array"), value_gen=(lambda row_id: (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id]))) INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES) # TODO/FIXME: implement proper input range depending on input index # assuming a single input array input_precisions = [self.get_input_precision(1).get_data_precision()] rng_map = [ get_precision_rng(precision, inf(test_range), sup(test_range)) for precision, test_range in zip(input_precisions, test_ranges) ] # generated table of inputs input_tables = [ generate_1d_table( INPUT_ARRAY_SIZE, self.get_input_precision(INPUT_INDEX_OFFSET + table_id).get_data_precision(), self.uniquify_name("input_table_arg%d" % table_id), value_gen=( lambda _: input_precisions[table_id].round_sollya_object( rng_map[table_id].get_new_value(), sollya.RN))) for table_id in range(NUM_INPUT_ARRAY) ] # generate output_array output_array = generate_1d_table( INPUT_ARRAY_SIZE, output_precision, self.uniquify_name("output_array"), #value_gen=(lambda _: FP_QNaN(self.precision)) value_gen=(lambda _: None), const=False, empty=True) # accumulate element number acc_num = Variable("acc_num", precision=ML_Int64, var_type=Variable.Local) def empty_post_statement_gen(input_tables, output_array, table_size_offset_array, array_offset, array_len, test_id): return Statement() test_loop = self.get_array_test_wrapper(test_total, tested_function, table_size_offset_array, input_tables, output_array, acc_num, empty_post_statement_gen) timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local) printf_timing_op = FunctionOperator( "printf", arg_map={ 0: "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\"" % function_name, 1: FO_Arg(0), 2: FO_Arg(1), 3: FO_Arg(2) }, void_function=True) printf_timing_function = FunctionObject( "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void, printf_timing_op) vj = Variable("j", precision=ML_Int32, var_type=Variable.Local) loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num") loop_increment = 1 # bench measure of clock per element cpe_measure = Division( Conversion(timer, precision=ML_Binary64), Conversion(acc_num, precision=ML_Binary64), precision=ML_Binary64, tag="cpe_measure", ) # common test scheme between scalar and vector functions test_scheme = Statement( self.processor.get_init_timestamp(), ReferenceAssign(timer, self.processor.get_current_timestamp()), ReferenceAssign(acc_num, 0), Loop( ReferenceAssign(vj, Constant(0, precision=ML_Int32)), vj < loop_num_cst, Statement(test_loop, ReferenceAssign(vj, vj + loop_increment))), ReferenceAssign( timer, Subtraction(self.processor.get_current_timestamp(), timer, precision=ML_Int64)), printf_timing_function( Conversion(acc_num, precision=ML_Int64), timer, cpe_measure, ), Return(cpe_measure), # Return(Constant(0, precision = ML_Int32)) ) auto_test.set_scheme(test_scheme) return FunctionGroup([auto_test])
Log.report(LOG_PASS_INFO, "Registering ssa translation pass") Pass.register(Pass_SSATranslate) # registering basic-block simplification pass Log.report(LOG_PASS_INFO, "Registering basic-block simplification pass") Pass.register(Pass_BBSimplification) if __name__ == "__main__": bb_root = BasicBlock(tag="bb_root") bb_1 = BasicBlock(tag="bb_1") bb_2 = BasicBlock(tag="bb_2") bb_3 = BasicBlock(tag="bb_3") var_x = Variable("x", precision=None) var_y = Variable("y", precision=None) bb_root.add(ReferenceAssign(var_x, 1)) bb_root.add(ReferenceAssign(var_y, 2)) bb_root.add(ConditionalBranch(var_x > var_y, bb_1, bb_2)) bb_1.add(ReferenceAssign(var_x, 2)) bb_1.add(UnconditionalBranch(bb_3)) bb_2.add(ReferenceAssign(var_y, 3)) bb_2.add(UnconditionalBranch(bb_3)) bb_3.add(ReferenceAssign(var_y, var_x)) program_bb_list = BasicBlockList(tag="main") for bb in [bb_root, bb_1, bb_2, bb_3]: program_bb_list.add(bb)
def generate_array_check_loop(self, input_tables, output_array, table_size_offset_array, array_offset, array_len, test_id): # internal array iterator index vj = Variable("j", precision=ML_UInt32, var_type=Variable.Local) printf_input_function = self.get_printf_input_function() printf_error_template = "printf(\"max %s error is %s \\n\", %s)" % ( self.function_name, self.precision.get_display_format().format_string, self.precision.get_display_format().pre_process_fct("{0}")) printf_error_op = TemplateOperatorFormat(printf_error_template, arity=1, void_function=True, require_header=["stdio.h"]) printf_error_function = FunctionObject("printf", [self.precision], ML_Void, printf_error_op) printf_max_op = FunctionOperator( "printf", arg_map={ 0: "\"max %s error is reached at input number %s \\n \"" % (self.function_name, "%d"), 1: FO_Arg(0) }, void_function=True, require_header=["stdio.h"]) printf_max_function = FunctionObject("printf", [self.precision], ML_Void, printf_max_op) NUM_INPUT_ARRAY = len(input_tables) # generate the expected table for the whole multi-array expected_table = self.generate_expected_table(input_tables, table_size_offset_array) # inputs for the (vj)-th entry of the sub-arrat local_inputs = tuple( TableLoad(input_tables[in_id], array_offset + vj) for in_id in range(NUM_INPUT_ARRAY)) # expected values for the (vj)-th entry of the sub-arrat expected_values = [ TableLoad(expected_table, array_offset + vj, i) for i in range(self.accuracy.get_num_output_value()) ] # local result for the (vj)-th entry of the sub-arrat local_result = TableLoad(output_array, array_offset + vj) if self.break_error: return_statement_break = Statement( printf_input_function(*((vj, ) + local_inputs + (local_result, ))), self.accuracy.get_output_print_call(self.function_name, output_values)) else: return_statement_break = Statement( printf_input_function(*((vj, ) + local_inputs + (local_result, ))), self.accuracy.get_output_print_call(self.function_name, expected_values), Return(Constant(1, precision=ML_Int32))) # loop implementation to check sub-array array_offset # results validity check_array_loop = Loop( ReferenceAssign(vj, 0), vj < array_len, Statement( ConditionBlock( self.accuracy.get_output_check_test( local_result, expected_values), return_statement_break), ReferenceAssign(vj, vj + 1), )) return check_array_loop