class UT_RTL_Report(ML_Entity("ut_rtl_report"), TestRunner): @staticmethod def get_default_args(**kw): default_dict = { "precision": ML_Int32, "debug_flag": False, "target": VHDLBackend(), "output_file": "ut_rtl_report.vhd", "entity_name": "ut_rtl_report", "language": VHDL_Code, } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def __init__(self, arg_template=None): # initializing I/O precision precision = arg_template.precision io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="ut_rtl_report", arg_template=arg_template) self.precision = arg_template.precision def generate_scheme(self): """ generate main architecture for UT_RTL_Report """ main = Statement() # basic string main.add(Report("displaying simple string")) # string from std_logic_vector conversion cst_format = ML_StdLogicVectorFormat(12) cst = Constant(17, precision=cst_format) main.add(Report(Conversion(cst, precision=ML_String))) # string from concatenation of several elements complex_string = Concatenation("displaying concatenated string", Conversion(cst, precision=ML_String), precision=ML_String) main.add(Report(complex_string)) main.add(Wait(100)) # main process main_process = Process(main) self.implementation.add_process(main_process) return [self.implementation] @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_rtl_report = UT_RTL_Report(args) ut_rtl_report.gen_implementation() return True
class BipartiteApprox(ML_Entity("bipartite_approx")): def __init__( self, arg_template=DefaultEntityArgTemplate, ): # initializing base class ML_EntityBasis.__init__(self, arg_template=arg_template) self.pipelined = arg_template.pipelined # function to be approximated self.function = arg_template.function # interval on which the approximation must be valid self.interval = arg_template.interval self.disable_sub_testing = arg_template.disable_sub_testing self.disable_sv_testing = arg_template.disable_sv_testing self.alpha = arg_template.alpha self.beta = arg_template.beta self.gamma = arg_template.gamma self.guard_bits = arg_template.guard_bits ## default argument template generation @staticmethod def get_default_args(**kw): """ generate default argument structure for BipartiteApprox """ default_dict = { "target": VHDLBackend(), "output_file": "my_bipartite_approx.vhd", "entity_name": "my_bipartie_approx", "language": VHDL_Code, "function": lambda x: 1.0 / x, "interval": Interval(1, 2), "pipelined": False, "precision": fixed_point(1, 15, signed=False), "disable_sub_testing": False, "disable_sv_testing": False, "alpha": 6, "beta": 5, "gamma": 5, "guard_bits": 3, "passes": [ "beforepipelining:size_datapath", "beforepipelining:rtl_legalize", "beforepipelining:unify_pipeline_stages" ], } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def generate_scheme(self): ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = self.precision # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) # rounding mode input rnd_mode = self.implementation.add_input_signal( "rnd_mode", rnd_mode_format) # size of most significant table index (for linear slope tabulation) alpha = self.alpha # 6 # size of medium significant table index (for initial value table index LSB) beta = self.beta # 5 # size of least significant table index (for linear offset tabulation) gamma = self.gamma # 5 guard_bits = self.guard_bits # 3 vx.set_interval(self.interval) range_hi = sollya.sup(self.interval) range_lo = sollya.inf(self.interval) f_hi = self.function(range_hi) f_lo = self.function(range_lo) # fixed by format used for reduced_x range_size = range_hi - range_lo range_size_log2 = int(sollya.log2(range_size)) assert 2**range_size_log2 == range_size print("range_size_log2={}".format(range_size_log2)) reduced_x = Conversion(BitLogicRightShift(vx - range_lo, range_size_log2), precision=fixed_point(0, alpha + beta + gamma, signed=False), tag="reduced_x", debug=debug_fixed) alpha_index = get_fixed_slice(reduced_x, 0, alpha - 1, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromMSBToLSB, tag="alpha_index", debug=debug_std) gamma_index = get_fixed_slice(reduced_x, gamma - 1, 0, align_hi=FixedPointPosition.FromLSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="gamma_index", debug=debug_std) beta_index = get_fixed_slice(reduced_x, alpha, gamma, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="beta_index", debug=debug_std) # Assuming monotonic function f_absmax = max(abs(f_hi), abs(f_lo)) f_absmin = min(abs(f_hi), abs(f_lo)) f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1 f_lsb = int(sollya.floor(sollya.log2(f_absmin))) storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits f_int_size = f_msb f_frac_size = -storage_lsb storage_format = fixed_point(f_int_size, f_frac_size, signed=False) Log.report(Log.Info, "storage_format is {}".format(storage_format)) # table of initial value index tiv_index = Concatenation(alpha_index, beta_index, tag="tiv_index", debug=debug_std) # table of offset value index to_index = Concatenation(alpha_index, gamma_index, tag="to_index", debug=debug_std) tiv_index_size = alpha + beta to_index_size = alpha + gamma Log.report(Log.Info, "initial table structures") table_iv = ML_NewTable(dimensions=[2**tiv_index_size], storage_precision=storage_format, tag="tiv") table_offset = ML_NewTable(dimensions=[2**to_index_size], storage_precision=storage_format, tag="to") slope_table = [None] * (2**alpha) slope_delta = 1.0 / sollya.SollyaObject(2**alpha) delta_u = range_size * slope_delta * 2**-15 Log.report(Log.Info, "computing slope value") for i in range(2**alpha): # slope is computed at the middle of range_size interval slope_x = range_lo + (i + 0.5) * range_size * slope_delta # TODO: gross approximation of derivatives f_xpu = self.function(slope_x + delta_u / 2) f_xmu = self.function(slope_x - delta_u / 2) slope = (f_xpu - f_xmu) / delta_u slope_table[i] = slope range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size) Log.report(Log.Info, "computing value for initial-value table") for i in range(2**tiv_index_size): slope_index = i / 2**beta iv_x = range_lo + i * range_rcp_steps * range_size offset_x = 0.5 * range_rcp_steps * range_size # initial value is computed so that the piecewise linear # approximation intersects the function at iv_x + offset_x iv_y = self.function( iv_x + offset_x) - offset_x * slope_table[int(slope_index)] initial_value = storage_format.round_sollya_object(iv_y) table_iv[i] = initial_value # determining table of initial value interval tiv_min = table_iv[0] tiv_max = table_iv[0] for i in range(1, 2**tiv_index_size): tiv_min = min(tiv_min, table_iv[i]) tiv_max = max(tiv_max, table_iv[i]) table_iv.set_interval(Interval(tiv_min, tiv_max)) offset_step = range_size / S2**(alpha + beta + gamma) for i in range(2**alpha): Log.report(Log.Info, "computing offset value for sub-table {}".format(i)) for j in range(2**gamma): to_i = i * 2**gamma + j offset = slope_table[i] * j * offset_step table_offset[to_i] = offset # determining table of offset interval to_min = table_offset[0] to_max = table_offset[0] for i in range(1, 2**(alpha + gamma)): to_min = min(to_min, table_offset[i]) to_max = max(to_max, table_offset[i]) offset_interval = Interval(to_min, to_max) table_offset.set_interval(offset_interval) initial_value = TableLoad(table_iv, tiv_index, precision=storage_format, tag="initial_value", debug=debug_fixed) offset_precision = get_fixed_type_from_interval(offset_interval, 16) print("offset_precision is {} ({} bits)".format( offset_precision, offset_precision.get_bit_size())) table_offset.get_precision().storage_precision = offset_precision # rounding table value for i in range(1, 2**(alpha + gamma)): table_offset[i] = offset_precision.round_sollya_object( table_offset[i]) offset_value = TableLoad(table_offset, to_index, precision=offset_precision, tag="offset_value", debug=debug_fixed) Log.report( Log.Verbose, "initial_value's interval: {}, offset_value's interval: {}".format( evaluate_range(initial_value), evaluate_range(offset_value))) final_add = initial_value + offset_value round_bit = final_add # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB) vr_out = Conversion(initial_value + offset_value, precision=io_precision, tag="vr_out", debug=debug_fixed) self.implementation.add_output_signal("vr_out", vr_out) # Approximation error evaluation approx_error = 0.0 for i in range(2**alpha): for j in range(2**beta): tiv_i = (i * 2**beta + j) # = range_lo + tiv_i * range_rcp_steps * range_size iv = table_iv[tiv_i] for k in range(2**gamma): to_i = i * 2**gamma + k offset = table_offset[to_i] approx_value = offset + iv table_x = range_lo + range_size * ( (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta + gamma) local_error = abs(1 / (table_x) - approx_value) approx_error = max(approx_error, local_error) error_log2 = float(sollya.log2(approx_error)) print("approx_error is {}, error_log2 is {}".format( float(approx_error), error_log2)) # table size table_iv_size = 2**(alpha + beta) table_offset_size = 2**(alpha + gamma) print("tables' size are {} entries".format(table_iv_size + table_offset_size)) return [self.implementation] def init_test_generator(self): """ Initialize test case generator """ self.input_generator = FixedPointRandomGen( int_size=self.precision.get_integer_size(), frac_size=self.precision.get_frac_size(), signed=self.precision.signed) def generate_test_case(self, input_signals, io_map, index, test_range=None): """ specific test case generation for K1C TCA BLAU """ rnd_mode = 2 # random.randrange(4) hi = sup(self.auto_test_range) lo = inf(self.auto_test_range) nb_step = int((hi - lo) * S2**self.precision.get_frac_size()) x_value = lo + (hi - lo) * random.randrange(nb_step) / nb_step # self.input_generator.get_new_value() input_values = { "rnd_mode": rnd_mode, "x": x_value, } return input_values def numeric_emulate(self, io_map): vx = io_map["x"] rnd_mode_i = io_map["rnd_mode"] rnd_mode = { 0: sollya.RN, 1: sollya.RU, 2: sollya.RD, 3: sollya.RZ }[rnd_mode_i] result = {} result["vr_out"] = sollya.round(self.function(vx), self.precision.get_frac_size(), rnd_mode) print("numeric_emulate, ", io_map, result) return result #standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({ "x": 1.0, "rnd_mode": 0 }, None), ({ "x": 1.5, "rnd_mode": 0 }, None), ]
class FP_Divider(ML_Entity("fp_div")): def __init__(self, arg_template = DefaultEntityArgTemplate, ): # initializing base class ML_EntityBasis.__init__(self, arg_template = arg_template ) self.pipelined = arg_template.pipelined ## default argument template generation @staticmethod def get_default_args(**kw): default_dict = { "precision": ML_Binary32, "target": VHDLBackend(), "output_file": "my_fp_div.vhd", "entity_name": "my_fp_div", "language": VHDL_Code, "pipelined": False, } default_dict.update(kw) return DefaultEntityArgTemplate( **default_dict ) def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = HdlVirtualFormat(self.precision) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) if self.pipelined: self.implementation.add_input_signal("reset", ML_StdLogic) vx_precision = self.precision p = vx_precision.get_mantissa_size() exp_vx_precision = ML_StdLogicVectorFormat(vx_precision.get_exponent_size()) mant_vx_precision = ML_StdLogicVectorFormat(p) # mantissa extraction mant_vx = MantissaExtraction(vx, precision = mant_vx_precision, tag = "mant_vx") # exponent extraction exp_vx = RawExponentExtraction(vx, precision = exp_vx_precision, tag = "exp_vx", debug = debug_dec) approx_index_size = 8 approx_precision = RTL_FixedPointFormat( 2, approx_index_size, support_format = ML_StdLogicVectorFormat(approx_index_size + 2), ) # selecting table index from input mantissa MSBs tab_index = SubSignalSelection(mant_vx, p-2 - approx_index_size +1, p-2, tag = "tab_index") # declaring reciprocal approximation table inv_approx_table = ML_NewTable(dimensions = [2**approx_index_size], storage_precision = approx_precision, tag = "inv_approx_table") for i in range(2**approx_index_size): num_input = 1 + i * S2**-approx_index_size table_value = io_precision.get_base_format().round_sollya_object(1 / num_input) inv_approx_table[i] = table_value # extracting initial reciprocal approximation inv_approx_value = TableLoad(inv_approx_table, tab_index, precision = approx_precision, tag = "inv_approx_value", debug = debug_fixed) #inv_approx_value = TypeCast(inv_approx_value, precision = approx_precision) pre_it0_input = zext(SubSignalSelection(mant_vx, p-1 - approx_index_size , p-1, tag = "it0_input"), 1) it0_input = TypeCast(pre_it0_input, precision = approx_precision, tag = "it0_input", debug = debug_fixed) it1_precision = RTL_FixedPointFormat( 2, 2 * approx_index_size, support_format = ML_StdLogicVectorFormat(2 + 2 * approx_index_size) ) pre_it1_input = zext(SubSignalSelection(mant_vx, p - 1 - 2 * approx_index_size, p -1, tag = "it1_input"), 1) it1_input = TypeCast(pre_it1_input, precision = it1_precision, tag = "it1_input", debug = debug_fixed) final_approx = generate_NR_iteration( it0_input, inv_approx_value, (2, approx_index_size * 2), # mult precision (-3, 2 * approx_index_size), # error precision (2, approx_index_size * 3), # new-approx mult (2, approx_index_size * 2), # new approx precision self.implementation, pipelined = 0, #1 if self.pipelined else 0, tag_suffix = "_first" ) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( it1_input, final_approx, # mult precision (2, approx_index_size * 3), # error precision (-6, approx_index_size * 3), # approx mult precision (2, approx_index_size * 3), # new approx precision (2, approx_index_size * 3), self.implementation, pipelined = 1 if self.pipelined else 0, tag_suffix = "_second" ) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() last_it_precision = RTL_FixedPointFormat( 2, p - 1, support_format=ML_StdLogicVectorFormat(2 + p - 1) ) pre_last_it_input = zext(mant_vx, 1) last_it_input = TypeCast( pre_last_it_input, precision=last_it_precision, tag="last_it_input", debug=debug_fixed ) final_approx = generate_NR_iteration( last_it_input, final_approx, # mult-precision (2, 2 * p - 1), # error precision (int(- (3 * approx_index_size) / 2), approx_index_size * 2 + p - 1), # mult approx mult precision (2, approx_index_size * 2 + p - 1), # approx precision (2, p), self.implementation, pipelined = 2 if self.pipelined else 0, tag_suffix = "_third" ) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( last_it_input, final_approx, (2, 2 * p), (int(-(4 * p)/5), 2 * p), (2, 2 * p), (2, 2 * p), self.implementation, pipelined = 2 if self.pipelined else 0, tag_suffix = "_last" ) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx.set_attributes(tag = "final_approx", debug = debug_fixed) # bit indexes to select mantissa from final_approximation pre_mant_size = min(self.precision.get_field_size(), final_approx.get_precision().get_frac_size()) final_approx_frac_msb_index = final_approx.get_precision().get_frac_size() - 1 final_approx_frac_lsb_index = final_approx.get_precision().get_frac_size() - pre_mant_size # extracting bit to determine if result should be left-shifted and # exponent incremented cst_index = Constant(final_approx.get_precision().get_frac_size(), precision = ML_Integer) final_approx_casted = TypeCast(final_approx, precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size())) not_decrement = final_approx_casted[cst_index] not_decrement.set_attributes(precision = ML_StdLogic, tag = "not_decrement", debug = debug_std) logic_1 = Constant(1, precision = ML_StdLogic) result = Select( Comparison( not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool), SubSignalSelection( TypeCast( final_approx, precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size()) ), final_approx_frac_lsb_index, final_approx_frac_msb_index, ), SubSignalSelection( TypeCast( final_approx, precision = ML_StdLogicVectorFormat(final_approx.get_precision().get_bit_size()) ), final_approx_frac_lsb_index - 1, final_approx_frac_msb_index - 1, ), precision = ML_StdLogicVectorFormat(pre_mant_size), tag = "result" ) def get_bit(optree, bit_index): bit_index_cst = Constant(bit_index, precision = ML_Integer) bit_sel = VectorElementSelection( optree, bit_index_cst, precision = ML_StdLogic) return bit_sel least_bit = Select( Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool), get_bit(final_approx_casted, final_approx_frac_lsb_index), get_bit(final_approx_casted, final_approx_frac_lsb_index - 1), precision = ML_StdLogic, tag = "least_bit", debug = debug_std, ) round_bit = Select( Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool), get_bit(final_approx_casted, final_approx_frac_lsb_index - 1), get_bit(final_approx_casted, final_approx_frac_lsb_index - 2), precision = ML_StdLogic, tag = "round_bit", debug = debug_std, ) sticky_bit_input = Select( Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool), SubSignalSelection( final_approx_casted, 0, final_approx_frac_lsb_index - 2, precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1) ), zext( SubSignalSelection( final_approx_casted, 0, final_approx_frac_lsb_index - 3, precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 2) ), 1 ), precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1) ) sticky_bit = Select( Equal( sticky_bit_input, Constant(0, precision = ML_StdLogicVectorFormat(final_approx_frac_lsb_index - 1)) ), Constant(0, precision = ML_StdLogic), Constant(1, precision = ML_StdLogic), precision = ML_StdLogic, tag = "sticky_bit", debug = debug_std ) # if mantissa require extension if pre_mant_size < self.precision.get_mantissa_size() - 1: result = rzext(result, self.precision.get_mantissa_size() - 1 - pre_mant_size) res_mant_field = result # real_exp = exp_vx - bias # - real_exp = bias - exp_vx # encoded negated exp = bias - exp_vx + bias = 2 * bias - exp_vx fp_io_precision = io_precision.get_base_format() exp_op_precision = ML_StdLogicVectorFormat(fp_io_precision.get_exponent_size() + 2) biasX2 = Constant(- 2 * fp_io_precision.get_bias(), precision = exp_op_precision) neg_exp = Subtraction( SignCast( biasX2, specifier = SignCast.Unsigned, precision = get_unsigned_precision(exp_op_precision) ), SignCast( zext(exp_vx, 2), specifier = SignCast.Unsigned, precision = get_unsigned_precision(exp_op_precision), ), precision = exp_op_precision, tag = "neg_exp", debug = debug_dec ) neg_exp_field = SubSignalSelection( neg_exp, 0, fp_io_precision.get_exponent_size() - 1, precision = ML_StdLogicVectorFormat(fp_io_precision.get_exponent_size()) ) res_exp = Addition( SignCast( neg_exp_field, precision = get_unsigned_precision(exp_vx.get_precision()), specifier = SignCast.Unsigned ), SignCast( Select( Comparison(not_decrement, logic_1, specifier = Comparison.Equal, precision = ML_Bool), Constant(0, precision = exp_vx_precision), Constant(-1, precision = exp_vx_precision), precision = exp_vx_precision ), precision = get_unsigned_precision(exp_vx_precision), specifier = SignCast.Unsigned ), precision = exp_vx_precision, tag = "result_exp", debug = debug_dec ) res_sign = CopySign(vx, precision = ML_StdLogic) exp_mant_precision = ML_StdLogicVectorFormat(io_precision.get_bit_size() - 1) round_incr = Select( LogicalAnd( Equal(round_bit, Constant(1, precision = ML_StdLogic)), LogicalOr( Equal(sticky_bit, Constant(1, precision = ML_StdLogic)), Equal(least_bit, Constant(1, precision = ML_StdLogic)), precision = ML_Bool, ), precision = ML_Bool, ), Constant(1, precision = ML_StdLogic), Constant(0, precision = ML_StdLogic), tag = "round_incr", precision = ML_StdLogic, debug = debug_std ) exp_mant = Concatenation( res_exp, res_mant_field, precision = exp_mant_precision ) exp_mant_rounded = Addition( SignCast( exp_mant, SignCast.Unsigned, precision = get_unsigned_precision(exp_mant_precision) ), round_incr, precision = exp_mant_precision, tag = "exp_mant_rounded" ) vr_out = TypeCast( Concatenation( res_sign, exp_mant_rounded, precision = ML_StdLogicVectorFormat(io_precision.get_bit_size()) ), precision = io_precision, debug = debug_hex, tag = "vr_out" ) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] result = {} result["vr_out"] = sollya.round(1.0 / vx, self.precision.get_sollya_object(), sollya.RN) return result #standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({"x": sollya.parse("0x1.24f608p0")}, None), ({"x": 1.5}, None), ]
class ML_UT_EntityPass(ML_Entity("ml_lzc"), TestRunner): @staticmethod def get_default_args(width=32): return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_lzc.vhd", entity_name="my_lzc", language=VHDL_Code, width=width, ) def __init__(self, arg_template=None): # building default arg_template if necessary arg_template = ML_UT_EntityPass.get_default_args( ) if arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report(Log.Info, "generating LZC with width={}".format(self.width)) # initializing base class ML_EntityBasis.__init__(self, base_name="ml_lzc", arg_template=arg_template) pass_scheduler = self.get_pass_scheduler() pass_5 = LocalPass("pass 5", 5) pass_3 = LocalPass("pass 3", 3) pass_4 = LocalPass("pass 4", 4) pass_1 = LocalPass("pass 1", 1) pass_2 = LocalPass("pass 2", 2) pass_3_deps = CombineAnd( AfterPassById(pass_5.get_pass_id()), CombineAnd(AfterPassById(pass_2.get_pass_id()), AfterPassByClass(LocalPass))) pass_4_deps = CombineAnd(AfterPassById(pass_3.get_pass_id()), pass_3_deps) pass_5_deps = CombineOr(AfterPassById(pass_3.get_pass_id()), AfterPassById(pass_2.get_pass_id())) # registerting pass in arbitrary order pass_scheduler.register_pass(pass_4, pass_dep=pass_4_deps, pass_slot=PassScheduler.JustBeforeCodeGen) pass_scheduler.register_pass(pass_5, pass_dep=pass_5_deps, pass_slot=PassScheduler.JustBeforeCodeGen) pass_scheduler.register_pass(pass_3, pass_dep=pass_3_deps, pass_slot=PassScheduler.JustBeforeCodeGen) pass_scheduler.register_pass(pass_1, pass_slot=PassScheduler.Start) pass_scheduler.register_pass(pass_2, pass_slot=PassScheduler.JustBeforeCodeGen) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def numeric_emulate(self, io_map): def count_leading_zero(v, w): tmp = v lzc = -1 for i in range(w): if tmp & 2**(w - 1 - i): return i return w result = {} result["vr_out"] = count_leading_zero(io_map["x"], self.width) return result def generate_scheme(self): lzc_width = int(floor(log2(self.width))) + 1 Log.report(Log.Info, "width of lzc out is {}".format(lzc_width)) input_precision = ML_StdLogicVectorFormat(self.width) precision = ML_StdLogicVectorFormat(lzc_width) # declaring main input variable vx = self.implementation.add_input_signal("x", input_precision) vr_out = Signal("lzc", precision=precision, var_type=Variable.Local) iterator = Variable("i", precision=ML_Integer, var_type=Variable.Local) lzc_loop = RangeLoop( iterator, Interval(0, self.width - 1), ConditionBlock( Comparison(VectorElementSelection(vx, iterator, precision=ML_StdLogic), Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), ReferenceAssign( vr_out, Conversion(Subtraction(Constant(self.width - 1, precision=ML_Integer), iterator, precision=ML_Integer), precision=precision), )), specifier=RangeLoop.Increasing, ) lzc_process = Process(Statement( ReferenceAssign(vr_out, Constant(self.width, precision=precision)), lzc_loop, ), sensibility_list=[vx]) self.implementation.add_process(lzc_process) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] @staticmethod def get_default_args(**kw): root_arg = { "entity_name": "new_entity_pass", "output_file": "ut_entity_pass.c", "width": 32, "precision": ML_Int32 } root_arg.update(kw) return DefaultEntityArgTemplate(**root_arg) @staticmethod def __call__(args): # just ignore args here and trust default constructor? seems like a bad idea. ml_ut_block_lzcnt = ML_UT_EntityPass(args) ml_ut_block_lzcnt.gen_implementation() expected_id_list = [2, 5, 3, 4] Log.report(Log.Verbose, "expected_id_list: ", expected_id_list) assert reduce(lambda lhs, rhs: lhs and rhs, [ exp == real for exp, real in zip(executed_id_list, expected_id_list) ], True) return True
class UnifyPipelineBench(ML_Entity("ut_unify_pipeline_bench_entity"), TestRunner): """ Adaptative Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_adapative_entity.vhd", entity_name="my_adaptative_entity", language=VHDL_Code, width=width, passes=[ ("beforepipelining:dump_with_stages"), ("beforepipelining:size_datapath"), ("beforepipelining:dump_with_stages"), ("beforepipelining:rtl_legalize"), ("beforepipelining:dump_with_stages"), ("beforepipelining:unify_pipeline_stages"), ("beforepipelining:dump_with_stages"), ], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = UnifyPipelineBench.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report( Log.Info, "generating Adaptative Entity with width={}".format(self.width) ) # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template ) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def check_function(optree): """ Check that every node (except Statements) have a defined init_stage attributes """ if isinstance(optree, Statement): return True else: init_stage = optree.attributes.get_dyn_attribute("init_stage") if init_stage is None: raise Exception("Check of init_state definition failed") else: return True Log.report(Log.Info, "registering pass to check results") check_pass = Pass_CheckGeneric( self.backend, check_function, "checking pass" ) self.get_pass_scheduler().register_pass( check_pass, pass_slot = PassScheduler.JustBeforeCodeGen ) def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug = debug_fixed) var_y.set_attributes(debug = debug_fixed) sub = var_x - var_y c = Constant(0) self.implementation.start_new_stage() #pre_result = Select( # c > sub, # c, # sub #) pre_result = Max(0, sub) self.implementation.start_new_stage() result = Conversion(pre_result + var_x, precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation] standard_test_cases = [ ({"x": 2, "y": 2}, None), ({"x": 1, "y": 2}, None), ({"x": 0.5, "y": 2}, None), ({"x": -1, "y": -1}, None), ] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ raise NotImplementedError @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_adaptative_entity = UnifyPipelineBench(args) ut_adaptative_entity.gen_implementation() return True
class FP_FMA(ML_Entity("fp_fma")): def __init__(self, arg_template=DefaultEntityArgTemplate, precision=ML_Binary32, accuracy=ML_Faithful, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=VHDLBackend(), output_file="fp_fma.vhd", entity_name="fp_fma", language=VHDL_Code, vector_size=1): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="fp_fma", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, backend=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag, language=language, arg_template=arg_template) self.accuracy = accuracy self.precision = precision def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = VirtualFormat(base_format=self.precision, support_format=ML_StdLogicVectorFormat( self.precision.get_bit_size()), get_cst=get_virtual_cst) # declaring standard clock and reset input signal #clk = self.implementation.add_input_signal("clk", ML_StdLogic) # reset = self.implementation.add_input_signal("reset", ML_StdLogic) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) vy = self.implementation.add_input_signal("y", io_precision) vx_precision = self.precision vy_precision = self.precision result_precision = self.precision # precision for first operand vx which is to be statically # positionned p = vx_precision.get_mantissa_size() # precision for second operand vy which is to be dynamically shifted q = vy_precision.get_mantissa_size() # precision of output o = result_precision.get_mantissa_size() # vx must be aligned with vy # the largest shit amount (in absolute value) is precision + 2 # (1 guard bit and 1 rounding bit) exp_vx_precision = ML_StdLogicVectorFormat( vx_precision.get_exponent_size()) exp_vy_precision = ML_StdLogicVectorFormat( vy_precision.get_exponent_size()) mant_vx_precision = ML_StdLogicVectorFormat(p - 1) mant_vy_precision = ML_StdLogicVectorFormat(q - 1) mant_vx = MantissaExtraction(vx, precision=mant_vx_precision) mant_vy = MantissaExtraction(vy, precision=mant_vy_precision) exp_vx = RawExponentExtraction(vx, precision=exp_vx_precision) exp_vy = RawExponentExtraction(vy, precision=exp_vy_precision) # Maximum number of leading zero for normalized <vx> L_x = 0 # Maximum number of leading zero for normalized <vy> L_y = 0 sign_vx = CopySign(vx, precision=ML_StdLogic) sign_vy = CopySign(vy, precision=ML_StdLogic) # determining if the operation is an addition (effective_op = '0') # or a subtraction (effective_op = '1') effective_op = BitLogicXor(sign_vx, sign_vy, precision=ML_StdLogic, tag="effective_op", debug=ML_Debug(display_format="-radix 2")) exp_vx_bias = vx_precision.get_bias() exp_vy_bias = vy_precision.get_bias() exp_offset = max(o + L_y, q) + 2 exp_bias = exp_offset + exp_vx_bias - exp_vy_bias # Determine a working precision to accomodate exponent difference # FIXME: check interval and exponent operations size exp_precision_ext_size = max(vx_precision.get_exponent_size(), vy_precision.get_exponent_size()) + 2 exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size) # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x # and then shifted right by # exp_diff = exp_x - exp_y + offset # exp_vx in [emin, emax] # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2] exp_diff = Subtraction( Addition(zext( exp_vx, exp_precision_ext_size - vx_precision.get_exponent_size()), Constant(exp_bias, precision=exp_precision_ext), precision=exp_precision_ext), zext(exp_vy, exp_precision_ext_size - vy_precision.get_exponent_size()), precision=exp_precision_ext, tag="exp_diff", debug=debug_std) signed_exp_diff = SignCast(exp_diff, specifier=SignCast.Signed, precision=exp_precision_ext) datapath_full_width = exp_offset + max(o + L_x, p) + 2 + q max_exp_diff = datapath_full_width - q exp_diff_lt_0 = Comparison(signed_exp_diff, Constant(0, precision=exp_precision_ext), specifier=Comparison.Less, precision=ML_Bool, tag="exp_diff_lt_0", debug=debug_std) exp_diff_gt_max_diff = Comparison(signed_exp_diff, Constant( max_exp_diff, precision=exp_precision_ext), specifier=Comparison.Greater, precision=ML_Bool) shift_amount_prec = ML_StdLogicVectorFormat( int(floor(log2(max_exp_diff)) + 1)) mant_shift = Select(exp_diff_lt_0, Constant(0, precision=shift_amount_prec), Select(exp_diff_gt_max_diff, Constant(max_exp_diff, precision=shift_amount_prec), Truncate(exp_diff, precision=shift_amount_prec), precision=shift_amount_prec), precision=shift_amount_prec, tag="mant_shift", debug=ML_Debug(display_format="-radix 10")) mant_ext_size = max_exp_diff shift_prec = ML_StdLogicVectorFormat(datapath_full_width) shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size), mant_shift, precision=shift_prec, tag="shifted_mant_vy", debug=debug_std) # vx is right-extended by q+2 bits # and left extend by exp_offset mant_vx_ext = zext(rzext(mant_vx, q + 2), exp_offset + 1) add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1) mant_vx_add_op = Select(Comparison(effective_op, Constant(1, precision=ML_StdLogic), precision=ML_Bool, specifier=Comparison.Equal), Negation(mant_vx_ext, precision=add_prec, tag="neg_mant_vx"), mant_vx_ext, precision=add_prec, tag="mant_vx_add_op", debug=ML_Debug(display_format=" ")) mant_add = Addition(zext(shifted_mant_vy, 1), mant_vx_add_op, precision=add_prec, tag="mant_add", debug=ML_Debug(display_format=" -radix 2")) # if the addition overflows, then it meant vx has been negated and # the 2's complement addition cancelled the negative MSB, thus # the addition result is positive, and the result is of the sign of Y # else the result is of opposite sign to Y add_is_negative = BitLogicAnd(CopySign(mant_add, precision=ML_StdLogic), effective_op, precision=ML_StdLogic, tag="add_is_negative", debug=ML_Debug(" -radix 2")) # Negate mantissa addition result if it is negative mant_add_abs = Select(Comparison(add_is_negative, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), Negation(mant_add, precision=add_prec, tag="neg_mant_add", debug=debug_std), mant_add, precision=add_prec, tag="mant_add_abs", debug=debug_std) res_sign = BitLogicXor(add_is_negative, sign_vy, precision=ML_StdLogic, tag="res_sign") # Precision for leading zero count lzc_width = int(floor(log2(datapath_full_width + 1)) + 1) lzc_prec = ML_StdLogicVectorFormat(lzc_width) lzc_args = ML_LeadingZeroCounter.get_default_args( width=(datapath_full_width + 1)) LZC_entity = ML_LeadingZeroCounter(lzc_args) lzc_entity_list = LZC_entity.generate_scheme() lzc_implementation = LZC_entity.get_implementation() lzc_component = lzc_implementation.get_component_object() #lzc_in = SubSignalSelection(mant_add, p+1, 2*p+3) lzc_in = mant_add_abs # SubSignalSelection(mant_add_abs, 0, 3*p+3, precision = ML_StdLogicVectorFormat(3*p+4)) add_lzc = Signal("add_lzc", precision=lzc_prec, var_type=Signal.Local, debug=debug_dec) add_lzc = PlaceHolder( add_lzc, lzc_component(io_map={ "x": lzc_in, "vr_out": add_lzc })) # Index of output mantissa least significant bit mant_lsb_index = datapath_full_width - o + 1 #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec) # CP stands for close path, the data path where X and Y are within 1 exp diff res_normed_mant = BitLogicLeftShift(mant_add_abs, add_lzc, precision=add_prec, tag="res_normed_mant", debug=debug_std) pre_mant_field = SubSignalSelection( res_normed_mant, mant_lsb_index, datapath_full_width - 1, precision=ML_StdLogicVectorFormat(o - 1)) ## Helper function to extract a single bit # from a vector of bits signal def BitExtraction(optree, index, **kw): return VectorElementSelection(optree, index, precision=ML_StdLogic, **kw) def IntCst(value): return Constant(value, precision=ML_Integer) round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1)) mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index)) sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o) sticky_input = SubSignalSelection(res_normed_mant, 0, datapath_full_width - o - 1, precision=sticky_prec) sticky_bit = Select(Comparison(sticky_input, Constant(0, precision=sticky_prec), specifier=Comparison.NotEqual, precision=ML_Bool), Constant(1, precision=ML_StdLogic), Constant(0, precision=ML_StdLogic), precision=ML_StdLogic, tag="sticky_bit", debug=debug_std) # increment selection for rouding to nearest (tie to even) round_increment_RN = BitLogicAnd(round_bit, BitLogicOr(sticky_bit, mant_lsb, precision=ML_StdLogic), precision=ML_StdLogic, tag="round_increment_RN", debug=debug_std) rounded_mant = Addition(zext(pre_mant_field, 1), round_increment_RN, precision=ML_StdLogicVectorFormat(o), tag="rounded_mant", debug=debug_std) rounded_overflow = BitExtraction(rounded_mant, IntCst(o - 1), tag="rounded_overflow", debug=debug_std) res_mant_field = Select(Comparison(rounded_overflow, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), SubSignalSelection(rounded_mant, 1, o - 1), SubSignalSelection(rounded_mant, 0, o - 2), precision=ML_StdLogicVectorFormat(o - 1), tag="final_mant", debug=debug_std) res_exp_tmp_size = max(vx_precision.get_exponent_size(), vy_precision.get_exponent_size()) + 2 res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size) exp_vy_biased = Addition(zext( exp_vy, res_exp_tmp_size - vy_precision.get_exponent_size()), Constant(vy_precision.get_bias() + 1, precision=res_exp_tmp_prec), precision=res_exp_tmp_prec, tag="exp_vy_biased", debug=debug_dec) # vx's exponent is biased with the format bias # plus the exponent offset so it is left align to datapath MSB exp_vx_biased = Addition( zext(exp_vx, res_exp_tmp_size - vx_precision.get_exponent_size()), Constant(vx_precision.get_bias() + exp_offset + 1, precision=res_exp_tmp_prec), precision=res_exp_tmp_prec, tag="exp_vx_biased", debug=debug_dec) # If exp diff is less than 0, then we must consider that vy's exponent is # the meaningful one and thus compute result exponent with respect # to vy's exponent value res_exp_base = Select(exp_diff_lt_0, exp_vy_biased, exp_vx_biased, precision=res_exp_tmp_prec, tag="res_exp_base", debug=debug_dec) # Eventually we add the result exponent base # with the exponent offset and the leading zero count res_exp_ext = Addition(Subtraction( Addition(zext(res_exp_base, 0), Constant(-result_precision.get_bias(), precision=res_exp_tmp_prec), precision=res_exp_tmp_prec), zext(add_lzc, res_exp_tmp_size - lzc_width), precision=res_exp_tmp_prec), rounded_overflow, precision=res_exp_tmp_prec, tag="res_exp_ext", debug=debug_std) res_exp_prec = ML_StdLogicVectorFormat( result_precision.get_exponent_size()) res_exp = Truncate(res_exp_ext, precision=res_exp_prec, tag="res_exp", debug=debug_dec_unsigned) vr_out = TypeCast(FloatBuild( res_sign, res_exp, res_mant_field, precision=self.precision, ), precision=io_precision, tag="result", debug=debug_std) self.implementation.add_output_signal("vr_out", vr_out) return lzc_entity_list + [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] vy = io_map["y"] result = {} print "vx, vy" print vx, vx.__class__ print vy, vy.__class__ result["vr_out"] = sollya.round(vx + vy, self.precision.get_sollya_object(), sollya.RN) return result # standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({ "x": 1.0, "y": (S2**-53 + S2**-54) }, None), ({ "y": ML_Binary64.get_value_from_integer_coding("47d273e91e2c9048", base=16), "x": ML_Binary64.get_value_from_integer_coding("c7eea5670485a5ec", base=16) }, None), ({ "y": ML_Binary64.get_value_from_integer_coding("75164a1df94cd488", base=16), "x": ML_Binary64.get_value_from_integer_coding("5a7567b08508e5b4", base=16) }, None) ]
class FP_Divider(ML_Entity("fp_div")): def __init__( self, arg_template=DefaultEntityArgTemplate, ): # initializing base class ML_EntityBasis.__init__(self, arg_template=arg_template) self.disable_sub_testing = arg_template.disable_sub_testing self.disable_sv_testing = arg_template.disable_sv_testing self.pipelined = arg_template.pipelined ## default argument template generation @staticmethod def get_default_args(**kw): default_dict = { "precision": ML_Binary32, "target": VHDLBackend(), "output_file": "my_fp_div.vhd", "entity_name": "my_fp_div", "language": VHDL_Code, "pipelined": False, } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = VirtualFormat(base_format=self.precision, support_format=ML_StdLogicVectorFormat( self.precision.get_bit_size()), get_cst=get_virtual_cst) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) # rounding mode input rnd_mode = self.implementation.add_input_signal( "rnd_mode", rnd_mode_format) if self.pipelined: self.implementation.add_input_signal("reset", ML_StdLogic) vx_precision = self.precision p = vx_precision.get_mantissa_size() exp_size = vx_precision.get_exponent_size() exp_vx_precision = ML_StdLogicVectorFormat( vx_precision.get_exponent_size()) mant_vx_precision = ML_StdLogicVectorFormat(p) # fixed-point precision for operand's exponent exp_fixed_precision = fixed_point(exp_size, 0, signed=False) # mantissa extraction mant_vx = TypeCast(MantissaExtraction(vx, precision=mant_vx_precision, tag="extracted_mantissa"), precision=fixed_point(1, p - 1, signed=False), debug=debug_fixed, tag="mant_vx") # exponent extraction exp_vx = TypeCast(RawExponentExtraction(vx, precision=exp_vx_precision, tag="exp_vx"), precision=exp_fixed_precision) approx_index_size = 8 approx_precision = fixed_point( 2, approx_index_size, ) # selecting table index from input mantissa MSBs tab_index = SubSignalSelection(mant_vx, p - 2 - approx_index_size + 1, p - 2, tag="tab_index") # declaring reciprocal approximation table inv_approx_table = ML_NewTable(dimensions=[2**approx_index_size], storage_precision=approx_precision, tag="inv_approx_table") for i in range(2**approx_index_size): num_input = 1 + i * S2**-approx_index_size table_value = io_precision.get_base_format().round_sollya_object( 1 / num_input) inv_approx_table[i] = table_value # extracting initial reciprocal approximation inv_approx_value = TableLoad(inv_approx_table, tab_index, precision=approx_precision, tag="inv_approx_value", debug=debug_fixed) #inv_approx_value = TypeCast(inv_approx_value, precision = approx_precision) pre_it0_input = zext( SubSignalSelection(mant_vx, p - 1 - approx_index_size, p - 1, tag="it0_input"), 1) it0_input = TypeCast(pre_it0_input, precision=approx_precision, tag="it0_input", debug=debug_fixed) it1_precision = RTL_FixedPointFormat( 2, 2 * approx_index_size, support_format=ML_StdLogicVectorFormat(2 + 2 * approx_index_size)) it1_input = mant_vx final_approx = generate_NR_iteration( mant_vx, inv_approx_value, (2, approx_index_size * 2), # mult precision (-3, 2 * approx_index_size), # error precision (2, approx_index_size * 3), # new-approx mult (2, approx_index_size * 2), # new approx precision self.implementation, pipelined=0, #1 if self.pipelined else 0, tag_suffix="_first") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, # mult precision (2, approx_index_size * 3), # error precision (-6, approx_index_size * 3), # approx mult precision (2, approx_index_size * 3), # new approx precision (2, approx_index_size * 3), self.implementation, pipelined=1 if self.pipelined else 0, tag_suffix="_second") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, # mult-precision (2, 2 * p - 1), # error precision (-(3 * approx_index_size) / 2, approx_index_size * 2 + p - 1), # mult approx mult precision (2, approx_index_size * 2 + p - 1), # approx precision (2, p), self.implementation, pipelined=2 if self.pipelined else 0, tag_suffix="_third") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, (2, 2 * p), (-(4 * p) / 5, 2 * p), (2, 2 * p), (2, 2 * p), self.implementation, pipelined=2 if self.pipelined else 0, tag_suffix="_last") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx.set_attributes(tag="final_approx", debug=debug_hex) last_approx_norm = final_approx offset_bit = BitSelection(last_approx_norm, FixedPointPosition( last_approx_norm, 0, align=FixedPointPosition.FromPointToLSB), tag="offset_bit", debug=debug_std) # extracting bit to determine if result should be left-shifted and # exponent incremented not_decrement = offset_bit final_approx_reduced = SubSignalSelection( final_approx, FixedPointPosition(final_approx, -(p - 1), align=FixedPointPosition.FromPointToLSB), FixedPointPosition(final_approx, 0, align=FixedPointPosition.FromPointToLSB), precision=fixed_point(p, 0, signed=False)) final_approx_reduced_shifted = SubSignalSelection( final_approx, FixedPointPosition(final_approx, -p, align=FixedPointPosition.FromPointToLSB), FixedPointPosition(final_approx, -1, align=FixedPointPosition.FromPointToLSB), precision=fixed_point(p, 0, signed=False)) # unrounded mantissa field excluding leading digit unrounded_mant_field = Select( equal_to(not_decrement, 1), final_approx_reduced, final_approx_reduced_shifted, precision=fixed_point(p, 0, signed=False), tag="unrounded_mant_field", debug=debug_hex, ) def get_bit(optree, bit_index): bit_sel = BitSelection( optree, FixedPointPosition(optree, -bit_index, align=FixedPointPosition.FromPointToLSB)) return bit_sel mant_lsb = Select( equal_to(not_decrement, 1), get_bit(final_approx, p - 1), get_bit(final_approx, p), precision=ML_StdLogic, tag="mant_lsb", debug=debug_std, ) round_bit = Select( equal_to(not_decrement, 1), get_bit(final_approx, p), get_bit(final_approx, p + 1), precision=ML_StdLogic, tag="round_bit", debug=debug_std, ) sticky_bit_input = Select( equal_to(not_decrement, 1), SubSignalSelection(final_approx, 0, FixedPointPosition( final_approx, -(p + 1), align=FixedPointPosition.FromPointToLSB), precision=None, tag="sticky_bit_input"), SubSignalSelection(final_approx, 0, FixedPointPosition( final_approx, -(p + 2), align=FixedPointPosition.FromPointToLSB), precision=None, tag="sticky_bit_input"), ) sticky_bit = Select(Equal(sticky_bit_input, Constant(0, precision=None)), Constant(0, precision=ML_StdLogic), Constant(1, precision=ML_StdLogic), precision=ML_StdLogic, tag="sticky_bit", debug=debug_std) # TODO: manage leading digit (in case of subnormal result) pre_result = unrounded_mant_field # real_exp = exp_vx - bias # - real_exp = bias - exp_vx # encoded negated exp = bias - exp_vx + bias = 2 * bias - exp_vx fp_io_precision = io_precision.get_base_format() neg_exp = -2 * fp_io_precision.get_bias() - exp_vx neg_exp.set_attributes(tag="neg_exp", debug=debug_fixed) res_exp = Subtraction(neg_exp, Select(equal_to(not_decrement, 1), Constant(0, precision=exp_fixed_precision), Constant(1, precision=exp_fixed_precision), precision=None, tag="exp_offset", debug=debug_fixed), tag="res_exp", debug=debug_fixed) res_exp_field = SubSignalSelection( res_exp, FixedPointPosition(res_exp, 0, align=FixedPointPosition.FromPointToLSB, tag="res_exp_field LSB"), FixedPointPosition(res_exp, exp_size - 1, align=FixedPointPosition.FromPointToLSB, tag="res_exp_field MSB"), precision=None, tag="res_exp_field", # debug=debug_fixed ) result_sign = CopySign(vx, precision=ML_StdLogic) exp_mant_precision = ML_StdLogicVectorFormat( io_precision.get_bit_size() - 1) rnd_mode_is_rne = Equal(rnd_mode, rnd_rne, precision=ML_Bool) rnd_mode_is_ru = Equal(rnd_mode, rnd_ru, precision=ML_Bool) rnd_mode_is_rd = Equal(rnd_mode, rnd_rd, precision=ML_Bool) rnd_mode_is_rz = Equal(rnd_mode, rnd_rz, precision=ML_Bool) round_incr = Conversion( logical_or_reduce([ logical_and_reduce([ rnd_mode_is_rne, equal_to(round_bit, 1), equal_to(sticky_bit, 1) ]), logical_and_reduce([ rnd_mode_is_rne, equal_to(round_bit, 1), equal_to(sticky_bit, 0), equal_to(mant_lsb, 1) ]), logical_and_reduce([ rnd_mode_is_ru, equal_to(result_sign, 0), LogicalOr(equal_to(round_bit, 1), equal_to(sticky_bit, 1), precision=ML_Bool) ]), logical_and_reduce([ rnd_mode_is_rd, equal_to(result_sign, 1), LogicalOr(equal_to(round_bit, 1), equal_to(sticky_bit, 1), precision=ML_Bool) ]), ]), precision=fixed_point(1, 0, signed=False), tag="round_incr", #debug=debug_fixed ) # Precision for result without sign unsigned_result_prec = fixed_point((p - 1) + exp_size, 0) unrounded_mant_field_nomsb = Conversion( unrounded_mant_field, precision=fixed_point(p - 1, 0, signed=False), tag="unrounded_mant_field_nomsb", debug=debug_hex) pre_rounded_unsigned_result = Concatenation( res_exp_field, unrounded_mant_field_nomsb, precision=unsigned_result_prec, tag="pre_rounded_unsigned_result") unsigned_result_rounded = Addition(pre_rounded_unsigned_result, round_incr, precision=unsigned_result_prec, tag="unsigned_result") vr_out = TypeCast(Concatenation( result_sign, TypeCast(unsigned_result_rounded, precision=ML_StdLogicVectorFormat(p - 1 + exp_size)), precision=ML_StdLogicVectorFormat(io_precision.get_bit_size())), precision=io_precision, debug=debug_hex, tag="vr_out") self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def init_test_generator(self): """ Initialize test case generator """ weight_map = { FPRandomGen.Category.SpecialValues: 0.0 if self.disable_sv_testing else 0.1, FPRandomGen.Category.Subnormal: 0.0 if self.disable_sub_testing else 0.2, FPRandomGen.Category.Normal: 0.7, } self.input_generator = FPRandomGen(self.precision, weight_map=weight_map) def generate_test_case(self, input_signals, io_map, index, test_range=None): """ specific test case generation for K1C TCA BLAU """ rnd_mode = random.randrange(4) input_values = { "rnd_mode": rnd_mode, "x": self.input_generator.get_new_value() } return input_values def numeric_emulate(self, io_map): vx = io_map["x"] rnd_mode_i = io_map["rnd_mode"] def div_numeric_emulate(vx): sollya_format = self.precision.get_sollya_object() return sollya.round(1.0 / vx, sollya_format, rnd_mode) rnd_mode = { 0: sollya.RN, 1: sollya.RU, 2: sollya.RD, 3: sollya.RZ }[rnd_mode_i] value_mapping = { is_plus_infty: lambda _: 0.0, is_nan: lambda _: FP_QNaN(self.precision), is_minus_infty: lambda _: FP_QNaN(self.precision), is_plus_zero: lambda _: FP_PlusInfty(self.precision), is_minus_zero: lambda _: FP_MinusInfty(self.precision), is_sv_omega: lambda op: lambda _: div_numeric_emulate(op.get_value()), lambda op: not (FP_SpecialValue.is_special_value(op)): div_numeric_emulate, } result = {} for predicate in value_mapping: if predicate(vx): result["vr_out"] = value_mapping[predicate](vx) return result Log.report(Log.Error, "no predicate fits {} in numeric_emulate\n".format(vx)) #standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({ "x": 2.0, "rnd_mode": 0 }, None), ({ "x": sollya.parse("0x1.24f608p0"), "rnd_mode": 0 }, None), ({ "x": 1.5, "rnd_mode": 0 }, None), ]
class AdaptativeEntity(ML_Entity("ml_adaptative_entity"), TestRunner): """ Adaptative Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_adapative_entity.vhd", entity_name="my_adaptative_entity", language=VHDL_Code, width=width, passes=[("beforecodegen:size_datapath")], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = AdaptativeEntity.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report( Log.Info, "generating Adaptative Entity with width={}".format(self.width) ) # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template ) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug = debug_fixed) var_y.set_attributes(debug = debug_fixed) test = (var_x > 1) test.set_attributes(tag = "test", debug = debug_std) large_add = (var_x + var_y) pre_result = Select( test, 1, large_add, tag = "pre_result", debug = debug_fixed ) result = Conversion(pre_result, precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation] standard_test_cases = [ ({"x": 2, "y": 2}, None), ({"x": 1, "y": 2}, None), ({"x": 0.5, "y": 2}, None), ({"x": -1, "y": -1}, None), ] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) value_x = io_map["x"] value_y = io_map["y"] test = value_x > 1 large_add = output_precision.truncate(value_x + value_y) result_value = 1 if test else large_add result = { "vr_out": result_value } print(io_map, result) return result @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_adaptative_entity = AdaptativeEntity(args) ut_adaptative_entity.gen_implementation() return True
class MultArray(ML_Entity("mult_array")): def __init__(self, arg_template=DefaultEntityArgTemplate, precision=fixed_point(32, 0, signed=False), accuracy=ML_Faithful, debug_flag=False, target=VHDLBackend(), output_file="mult_array.vhd", entity_name="mult_array", language=VHDL_Code, acc_prec=None, pipelined=False): # initializing I/O precision precision = arg_template.precision io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="mult_array", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, backend=target, debug_flag=debug_flag, language=language, arg_template=arg_template) self.accuracy = accuracy # main precision (used for product operand and default for accumulator) self.precision = precision # enable operator pipelining self.pipelined = pipelined # multiplication input descriptor self.op_expr = arg_template.op_expr self.dummy_mode = arg_template.dummy_mode self.booth_mode = arg_template.booth_mode # reduction method self.reduction_method = arg_template.method # limit of height for each compression stage self.stage_height_limit = arg_template.stage_height_limit ## default argument template generation @staticmethod def get_default_args(**kw): default_dict = { "precision": fixed_point(32, 0), "target": VHDLBackend(), "output_file": "mult_array.vhd", "entity_name": "mult_array", "language": VHDL_Code, "Method": ReductionMethod.Wallace_4to2, "pipelined": False, "dummy_mode": False, "booth_mode": False, "method": ReductionMethod.Wallace, "op_expr": multiplication_descriptor_parser("FS9.0xFS13.0"), "stage_height_limit": [None], "passes": [ ("beforepipelining:size_datapath"), ("beforepipelining:rtl_legalize"), ("beforepipelining:unify_pipeline_stages"), ], } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def generate_scheme(self): if self.dummy_mode: return self.generate_dummy_scheme() else: return self.generate_advanced_scheme() def clean_stage(self, stage_id): """ translate stage_id to current stage value if stage_id is undefined (None) """ if stage_id is None: return self.implementation.get_current_stage() else: return stage_id def instanciate_inputs(self, insert_mul_callback=(lambda a, b: (Multiplication, [a, b])), insert_op_callback=(lambda op: ((lambda v: v), [op]))): """ Generate an ordered dict of <stage_id> -> callback to add operation for each multiplication input and addition/standard input encountered in self.op_expr. A specific callback is used for multiplication and another for addition. Each callback return a tuple (method, op_list) and will call method(op_list) to insert the operation at the proper stage """ self.io_tags = {} stage_map = collections.defaultdict(list) for index, operation_input in enumerate(self.op_expr): if isinstance(operation_input, MultInput): mult_input = operation_input a_i_tag = "a_%d_i" % index if mult_input.lhs_tag is None else mult_input.lhs_tag b_i_tag = "b_%d_i" % index if mult_input.rhs_tag is None else mult_input.rhs_tag self.io_tags[("lhs", index)] = a_i_tag self.io_tags[("rhs", index)] = b_i_tag a_i = self.implementation.add_input_signal( a_i_tag, mult_input.lhs_precision) b_i = self.implementation.add_input_signal( b_i_tag, mult_input.rhs_precision) lhs_stage = self.clean_stage(mult_input.lhs_stage) rhs_stage = self.clean_stage(mult_input.rhs_stage) a_i.set_attributes(init_stage=lhs_stage) b_i.set_attributes(init_stage=rhs_stage) op_stage = max(lhs_stage, rhs_stage) stage_map[op_stage].append(insert_mul_callback(a_i, b_i)) elif isinstance(operation_input, OpInput): c_i_tag = "c_%d_i" % index if operation_input.tag is None else operation_input.tag self.io_tags[("op", index)] = c_i_tag c_i = self.implementation.add_input_signal( c_i_tag, operation_input.precision) op_stage = self.clean_stage(operation_input.stage) c_i.set_attributes(init_stage=self.clean_stage(op_stage)) stage_map[op_stage].append(insert_op_callback(c_i)) return stage_map def generate_dummy_scheme(self): Log.report( Log.Info, "generating MultArray with output precision {precision}".format( precision=self.precision)) acc = None a_inputs = {} b_inputs = {} stage_map = self.instanciate_inputs() stage_index_list = sorted(stage_map.keys()) for stage_id in stage_index_list: # synchronizing pipeline stage if stage_id is None: pass else: while stage_id > self.implementation.get_current_stage(): self.implementation.start_new_stage() operation_list = stage_map[stage_id] for ctor, operand_list in operation_list: new_term = ctor(*tuple(operand_list)) if acc is None: acc = new_term else: acc = Addition(acc, new_term) result = Conversion(acc, precision=self.precision) self.implementation.add_output_signal("result_o", result) return [self.implementation] def generate_advanced_scheme(self): ## Generate Fused multiply and add comput <x> . <y> + <z> Log.report( Log.Info, "generating MultArray with output precision {precision}".format( precision=self.precision)) acc = None def merge_product_in_heap(operand_list, pos_bit_heap, neg_bit_heap): """ generate product operand_list[0] * operand_list[1] and insert all the partial products into the heaps @p pos_bit_heap (positive bits) and @p neg_bit_heap (negative bits) """ a_i, b_i = operand_list if self.booth_mode: booth_radix4_multiply(a_i, b_i, pos_bit_heap, neg_bit_heap) else: # non-booth product generation a_i_precision = a_i.get_precision() b_i_precision = b_i.get_precision() a_i_signed = a_i_precision.get_signed() b_i_signed = b_i.get_precision().get_signed() unsigned_prod = not (a_i_signed) and not (b_i_signed) a_i_size = a_i_precision.get_bit_size() b_i_size = b_i_precision.get_bit_size() for pp_index in range(a_i_size): a_j_signed = a_i_signed and (pp_index == a_i_size - 1) bit_a_j = BitSelection(a_i, pp_index) pp = Select(equal_to(bit_a_j, 1), b_i, 0) offset = pp_index - a_i_precision.get_frac_size() for b_index in range(b_i_size): b_k_signed = b_i_signed and (b_index == b_i_size - 1) pp_signed = a_j_signed ^ b_k_signed pp_weight = offset + b_index local_bit = BitSelection(pp, b_index) if pp_signed: neg_bit_heap.insert_bit(pp_weight, local_bit) else: pos_bit_heap.insert_bit(pp_weight, local_bit) def merge_addition_in_heap(operand_list, pos_bit_heap, neg_bit_heap): """ expand addition of operand_list[0] to the bit heaps """ add_op = operand_list[0] precision = add_op.get_precision() size = precision.get_bit_size() offset = -precision.get_frac_size() # most significant bit if precision.get_signed(): neg_bit_heap.insert_bit(size - 1 + offset, BitSelection(add_op, size - 1)) else: pos_bit_heap.insert_bit(size - 1 + offset, BitSelection(add_op, size - 1)) # any other bit for index in range(size - 1): pos_bit_heap.insert_bit(index + offset, BitSelection(add_op, index)) # generating input signals stage_operation_map = self.instanciate_inputs( insert_mul_callback=lambda a, b: (merge_product_in_heap, [a, b]), insert_op_callback=lambda op: (merge_addition_in_heap, [op])) # heap of positive bits pos_bit_heap = BitHeap() # heap of negative bits neg_bit_heap = BitHeap() def reduce_heap(pos_bit_heap, neg_bit_heap, limit=2): """ reduce both pos_bit_heap and neg_bit_heap until their height is lower or equal to @p limit """ # Partial Product reduction while pos_bit_heap.max_count() > limit: pos_bit_heap = REDUCTION_METHOD_MAP[self.reduction_method]( pos_bit_heap) dump_heap_stats("pos_bit_heap", pos_bit_heap) while neg_bit_heap.max_count() > limit: neg_bit_heap = REDUCTION_METHOD_MAP[self.reduction_method]( neg_bit_heap) dump_heap_stats("neg_bit_heap", neg_bit_heap) return pos_bit_heap, neg_bit_heap def dump_heap_stats(title, bit_heap): Log.report(Log.Verbose, " {} max count: {}", title, bit_heap.max_count()) stage_index_list = sorted(stage_operation_map.keys()) for stage_id in stage_index_list: Log.report(Log.Verbose, "considering stage: {}".format(stage_id)) # synchronizing pipeline stage if stage_id is None: pass else: while stage_id > self.implementation.get_current_stage(): current_stage_id = self.implementation.get_current_stage() pos_bit_heap, neg_bit_heap = reduce_heap( pos_bit_heap, neg_bit_heap, limit=self.stage_height_limit[current_stage_id]) Log.report( Log.Verbose, "Inserting a new pipeline stage, stage_id={}, current_stage={}", stage_id, self.implementation.get_current_stage()) self.implementation.start_new_stage() # inserting input operations that appears at this stage operation_list = stage_operation_map[stage_id] for merge_ctor, operand_list in operation_list: Log.report(Log.Verbose, "merging a new operation result") merge_ctor(operand_list, pos_bit_heap, neg_bit_heap) Log.report(Log.Verbose, "final stage reduction") # final stage reduction pos_bit_heap, neg_bit_heap = reduce_heap(pos_bit_heap, neg_bit_heap) # final conversion to scalar operands pos_op_list, pos_assign_statement = convert_bit_heap_to_fixed_point( pos_bit_heap, signed=False) neg_op_list, neg_assign_statement = convert_bit_heap_to_fixed_point( neg_bit_heap, signed=False) # a PlaceHolder is inserted to force forwarding of op_statement # which will be removed otherwise as it does not appear anywhere in # the final operation graph acc = None if len(pos_op_list) > 0: reduced_pos_sum = reduce(operator.__add__, pos_op_list) reduced_pos_sum.set_attributes(tag="reduced_pos_sum", debug=debug_fixed) pos_acc = PlaceHolder(reduced_pos_sum, pos_assign_statement) acc = pos_acc if len(neg_op_list) > 0: reduced_neg_sum = reduce(operator.__add__, neg_op_list) reduced_neg_sum.set_attributes(tag="reduced_neg_sum", debug=debug_fixed) neg_acc = PlaceHolder(reduced_neg_sum, neg_assign_statement) acc = neg_acc if acc is None else acc - neg_acc acc.set_attributes(tag="raw_acc", debug=debug_fixed) self.precision = fixed_point(self.precision.get_integer_size(), self.precision.get_frac_size(), signed=self.precision.get_signed()) result = Conversion(acc, tag="result", precision=self.precision, debug=debug_fixed) self.implementation.add_output_signal("result_o", result) return [self.implementation] @property def standard_test_cases(self): test_case_max = {} test_case_min = {} for index, operation in enumerate(self.op_expr): if isinstance(operation, MultInput): a_i_tag = self.io_tags[("lhs", index)] b_i_tag = self.io_tags[("rhs", index)] test_case_max[a_i_tag] = operation.lhs_precision.get_max_value( ) test_case_max[b_i_tag] = operation.rhs_precision.get_max_value( ) test_case_min[a_i_tag] = operation.lhs_precision.get_min_value( ) test_case_min[b_i_tag] = operation.rhs_precision.get_min_value( ) elif isinstance(operation, OpInput): c_i_tag = self.io_tags[("op", index)] test_case_max[c_i_tag] = operation.precision.get_max_value() test_case_min[c_i_tag] = operation.precision.get_min_value() else: raise NotImplementedError return [(test_case_max, None), (test_case_min, None)] def numeric_emulate(self, io_map): acc = 0 for index, operation in enumerate(self.op_expr): if isinstance(operation, MultInput): a_i_tag = self.io_tags[("lhs", index)] b_i_tag = self.io_tags[("rhs", index)] a_i = io_map[a_i_tag] b_i = io_map[b_i_tag] acc += a_i * b_i elif isinstance(operation, OpInput): c_i_tag = self.io_tags[("op", index)] c_i = io_map[c_i_tag] acc += c_i # assert acc >= 0 return {"result_o": acc}
class UTSpecialValues(ML_Entity("ut_special_values"), TestRunner): """ Range Eval Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_range_eval_entity.vhd", entity_name="my_range_eval_entity", language=VHDL_Code, width=width, # passes=[("beforecodegen:size_datapath")], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = UTSpecialValues.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision precision = arg_template.precision io_precisions = [precision] * 2 self.width = 17 # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) expected_interval = {} # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) x_interval = Interval(-10.3, 10.7) var_x.set_interval(x_interval) expected_interval[var_x] = x_interval var_y = self.implementation.add_input_signal("y", input_precision) y_interval = Interval(-17.9, 17.2) var_y.set_interval(y_interval) expected_interval[var_y] = y_interval var_z = self.implementation.add_input_signal("z", input_precision) z_interval = Interval(-7.3, 7.7) var_z.set_interval(z_interval) expected_interval[var_z] = z_interval cst = Constant(42.5, tag="cst") expected_interval[cst] = Interval(42.5) conv_ceil = Ceil(var_x, tag="ceil") expected_interval[conv_ceil] = sollya.ceil(x_interval) conv_floor = Floor(var_y, tag="floor") expected_interval[conv_floor] = sollya.floor(y_interval) mult = var_z * var_x mult.set_tag("mult") mult_interval = z_interval * x_interval expected_interval[mult] = mult_interval large_add = (var_x + var_y) - mult large_add.set_attributes(tag="large_add") large_add_interval = (x_interval + y_interval) - mult_interval expected_interval[large_add] = large_add_interval reduced_result = Max(0, Min(large_add, 13)) reduced_result.set_tag("reduced_result") reduced_result_interval = interval_max( Interval(0), interval_min(large_add_interval, Interval(13))) expected_interval[reduced_result] = reduced_result_interval select_result = Select(var_x > var_y, reduced_result, var_z, tag="select_result") select_interval = interval_union(reduced_result_interval, z_interval) expected_interval[select_result] = select_interval # checking interval evaluation for var in [ cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor ]: interval = evaluate_range(var) expected = expected_interval[var] print("{}: {} vs expected {}".format(var.get_tag(), interval, expected)) assert not interval is None assert interval == expected return [self.implementation] standard_test_cases = [ ({ "x": 2, "y": 2 }, None), ({ "x": 1, "y": 2 }, None), ({ "x": 0.5, "y": 2 }, None), ({ "x": -1, "y": -1 }, None), ] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ raise NotImplementedError @staticmethod def __call__(args): PRECISION = ML_Binary64 value_list = [ FP_PlusInfty(PRECISION), FP_MinusInfty(PRECISION), FP_PlusZero(PRECISION), FP_MinusZero(PRECISION), FP_QNaN(PRECISION), FP_SNaN(PRECISION), #FP_PlusOmega(PRECISION), #FP_MinusOmega(PRECISION), NumericValue(7.0), NumericValue(-3.0), ] op_map = { "+": operator.__add__, "-": operator.__sub__, "*": operator.__mul__, } for op in op_map: for lhs in value_list: for rhs in value_list: print("{} {} {} = {}".format(lhs, op, rhs, op_map[op](lhs, rhs))) return True
class RangeEvalEntity(ML_Entity("ml_range_eval_entity"), TestRunner): """ Range Eval Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_range_eval_entity.vhd", entity_name="my_range_eval_entity", language=VHDL_Code, width=width, # passes=[("beforecodegen:size_datapath")], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = RangeEvalEntity.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision precision = arg_template.precision io_precisions = [precision] * 2 self.width = 17 # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template ) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) expected_interval = {} # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) x_interval = Interval(-10.3,10.7) var_x.set_interval(x_interval) expected_interval[var_x] = x_interval var_y = self.implementation.add_input_signal("y", input_precision) y_interval = Interval(-17.9,17.2) var_y.set_interval(y_interval) expected_interval[var_y] = y_interval var_z = self.implementation.add_input_signal("z", input_precision) z_interval = Interval(-7.3,7.7) var_z.set_interval(z_interval) expected_interval[var_z] = z_interval cst = Constant(42.5, tag = "cst") expected_interval[cst] = Interval(42.5) conv_ceil = Ceil(var_x, tag = "ceil") expected_interval[conv_ceil] = sollya.ceil(x_interval) conv_floor = Floor(var_y, tag = "floor") expected_interval[conv_floor] = sollya.floor(y_interval) mult = var_z * var_x mult.set_tag("mult") mult_interval = z_interval * x_interval expected_interval[mult] = mult_interval large_add = (var_x + var_y) - mult large_add.set_attributes(tag = "large_add") large_add_interval = (x_interval + y_interval) - mult_interval expected_interval[large_add] = large_add_interval var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc") expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size()) reduced_result = Max(0, Min(large_add, 13)) reduced_result.set_tag("reduced_result") reduced_result_interval = interval_max( Interval(0), interval_min( large_add_interval, Interval(13) ) ) expected_interval[reduced_result] = reduced_result_interval select_result = Select( var_x > var_y, reduced_result, var_z, tag = "select_result" ) select_interval = interval_union(reduced_result_interval, z_interval) expected_interval[select_result] = select_interval # floating-point operation on mantissa and exponents fp_x_range = Interval(-0.01, 100) unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range) mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32) exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32) ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32) expected_interval[unbound_fp_var] = fp_x_range expected_interval[exp_fp_x] = Interval( sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))), sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range)))) ) expected_interval[mant_fp_x] = Interval(1, 2) expected_interval[ins_exp_fp_x] = Interval( S2**sollya.inf(expected_interval[exp_fp_x]), S2**sollya.sup(expected_interval[exp_fp_x]) ) # checking interval evaluation for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]: interval = evaluate_range(var) expected = expected_interval[var] print("{}: {}".format(var.get_tag(), interval)) print(" vs expected {}".format(expected)) assert not interval is None assert interval == expected return [self.implementation] standard_test_cases = [ ({"x": 2, "y": 2}, None), ({"x": 1, "y": 2}, None), ({"x": 0.5, "y": 2}, None), ({"x": -1, "y": -1}, None), ] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ raise NotImplementedError @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_range_eval_entity = RangeEvalEntity(args) ut_range_eval_entity.gen_implementation() return True
class FP_Trunc(ML_Entity("fp_trunc")): def __init__(self, arg_template=DefaultEntityArgTemplate): # initializing I/O precision # initializing base class ML_EntityBasis.__init__(self, arg_template=arg_template) self.precision = arg_template.precision ## Generate default arguments structure (before any user / test overload) @staticmethod def get_default_args(**kw): default_arg_map = { "precision": HdlVirtualFormat(ML_Binary32), "pipelined": False, "output_file": "fp_trunc.vhd", "entity_name": "fp_trunc", "language": VHDL_Code, "passes": [("beforecodegen:size_datapath")], } default_arg_map.update(**kw) return DefaultEntityArgTemplate(**default_arg_map) def generate_scheme(self): vx = self.implementation.add_input_variable("vx", self.precision) support_format = self.precision.get_support_format() base_format = self.precision.get_base_format() exp_precision = fixed_point(base_format.get_exponent_size(), 0, signed=False) def fixed_exponent(op): e = op.get_precision().get_base_format().get_exponent_size() pre_exp_precision = ML_StdLogicVectorFormat(e) pre_exp = RawExponentExtraction(op, precision=pre_exp_precision) return TypeCast( pre_exp, precision=fixed_point(e, 0, signed=False), ) def fixed_mantissa(op): m = op.get_precision().get_base_format().get_mantissa_size() pre_mant_precision = ML_StdLogicVectorFormat(m) pre_mant = MantissaExtraction(op, precision=pre_mant_precision) return TypeCast(pre_mant, precision=fixed_point(m, 0, signed=False)) p = base_format.get_field_size() n = support_format.get_bit_size() vx_exp = fixed_exponent(vx).modify_attributes(tag="vx_exp", debug=debug_fixed) vx_mant = fixed_mantissa(vx) fixed_support_format = fixed_point(support_format.get_bit_size(), 0, signed=False) # shift amount to normalize mantissa into an integer int_norm_shift = Max(p - (vx_exp + base_format.get_bias()), 0, tag="int_norm_shift", debug=debug_fixed) pre_mant_mask = Constant(2**n - 1, precision=fixed_support_format) mant_mask = TypeCast(BitLogicLeftShift(pre_mant_mask, int_norm_shift, precision=fixed_support_format), precision=support_format, tag="mant_mask", debug=debug_std) #mant_mask = BitLogicNegate(neg_mant_mask, precision=support_format, tag="mant_mask", debug=debug_std) normed_result = TypeCast(BitLogicAnd(TypeCast( vx, precision=support_format), mant_mask, precision=support_format), precision=self.precision) vr_out = Select( # if exponent exceeds (precision - 1), then value # is equal to its integer part vx_exp + base_format.get_bias() > base_format.get_field_size(), vx, Select(vx_exp + base_format.get_bias() < 0, Constant(0, precision=self.precision), normed_result, precision=self.precision), precision=self.precision) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def numeric_emulate(self, io_map): vx = io_map["vx"] result = {} base_format = self.precision.get_base_format() result["vr_out"] = (sollya.floor if vx > 0 else sollya.ceil)(vx) return result standard_test_cases = [ ({ "vx": ML_Binary32.get_value_from_integer_coding("0x48bef48d", base=16) }, None), ({ "vx": 1.0 }, None), ]
class ML_LeadingZeroAnticipator(ML_Entity("ml_lza")): @staticmethod def get_default_args(width=32, signed=False, **kw): return DefaultEntityArgTemplate( precision = ML_Int32, debug_flag = False, target = vhdl_backend.VHDLBackend(), output_file = "my_lza.vhd", entity_name = "my_lza", language = VHDL_Code, width = width, signed = signed, **kw ) @staticmethod def generate_lza(lhs_input, rhs_input, lhs_signed=False, rhs_signed=False): """ Generate LZA sub-graph returning POSitive and NEGative leading zero counts, lhs and rhs are assumed to be right aligned (LSB have identical weights) """ lhs_size = lhs_input.get_precision().get_bit_size() rhs_size = rhs_input.get_precision().get_bit_size() lhs_raw_format = ML_StdLogicVectorFormat(lhs_size) lhs_fixed_format = fixed_point(lhs_size, 0, signed=lhs_signed) rhs_raw_format = ML_StdLogicVectorFormat(rhs_size) rhs_fixed_format = fixed_point(rhs_size, 0, signed=rhs_signed) common_size = 1 + max(rhs_size, lhs_size) common_fixed_format = fixed_point( common_size, 0, signed=(lhs_signed or rhs_signed) ) common_raw_format = ML_StdLogicVectorFormat(common_size) lhs = TypeCast( Conversion( TypeCast( lhs_input, precision=lhs_fixed_format ), precision=common_fixed_format ), precision=common_raw_format ) rhs = TypeCast( Conversion( TypeCast( rhs_input, precision=rhs_fixed_format ), precision=common_fixed_format ), precision=common_raw_format ) # design based on "1 GHz Leading Zero Anticipator Using Independent # Sign-Bit Determination Logic" # by K. T. Lee and K. J. Nowka propagate = BitLogicXor( lhs, rhs, tag="propagate", precision=common_raw_format ) kill = BitLogicAnd( BitLogicNegate(lhs, precision=common_raw_format), BitLogicNegate(rhs, precision=common_raw_format), tag="kill", precision=common_raw_format ) generate_s = BitLogicAnd( lhs, rhs, tag="generate_s", precision=common_raw_format ) pos_signal = BitLogicNegate( BitLogicXor( SubSignalSelection(propagate, 1, common_size - 1), SubSignalSelection(kill, 0, common_size - 2), precision=ML_StdLogicVectorFormat(common_size - 1) ), tag="pos_signal", debug=debug_std, precision=ML_StdLogicVectorFormat(common_size-1) ) neg_signal = BitLogicNegate( BitLogicXor( SubSignalSelection(propagate, 1, common_size - 1), SubSignalSelection(generate_s, 0, common_size - 2), precision=ML_StdLogicVectorFormat(common_size - 1) ), tag="neg_signal", debug=debug_std, precision=ML_StdLogicVectorFormat(common_size - 1) ) lzc_width = int(floor(log2(common_size-1))) + 1 lzc_format = ML_StdLogicVectorFormat(lzc_width) pos_lzc = CountLeadingZeros( pos_signal, tag="pos_lzc", precision=lzc_format ) neg_lzc = CountLeadingZeros( neg_signal, tag="neg_lzc", precision=lzc_format ) return pos_lzc, neg_lzc def __init__(self, arg_template = None): # building default arg_template if necessary arg_template = ML_LeadingZeroAnticipator.get_default_args() if arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report(Log.Info, "generating LZC with width={}".format(self.width)) # initializing base class ML_EntityBasis.__init__(self, base_name = "ml_lza", arg_template = arg_template ) self.accuracy = arg_template.accuracy self.precision = arg_template.precision self.signed = arg_template.signed def generate_scheme(self): fixed_precision = fixed_point(self.width, 0, signed=self.signed) # declaring main input variable vx = self.implementation.add_input_signal("x", fixed_precision) vy = self.implementation.add_input_signal("y", fixed_precision) ext_width = self.width + 1 fixed_precision_ext = fixed_point(ext_width, 0, signed=self.signed) input_precision = ML_StdLogicVectorFormat(ext_width) pos_lzc, neg_lzc = ML_LeadingZeroAnticipator.generate_lza( vx, vy, lhs_signed=self.signed, rhs_signed=self.signed ) self.implementation.add_output_signal("pos_lzc_o", pos_lzc) self.implementation.add_output_signal("neg_lzc_o", neg_lzc) return [self.implementation] def numeric_emulate(self, io_map): """ emulate leading zero anticipation """ def count_leading_zero(v, w): """ generic leading zero count """ tmp = v lzc = -1 for i in range(w): if int(tmp) & 2**(w - 1 - i): return i return w vx = io_map["x"] vy = io_map["y"] pre_op = abs(vx + vy) result = {} result["final_lzc"] = count_leading_zero(pre_op, self.width+1) return result def implement_test_case(self, io_map, input_values, output_signals, output_values, time_step, index=None): """ Implement the test case check and assertion whose I/Os values are described in input_values and output_values dict """ test_statement = Statement() # string message describing expected input values # and dumping actual results input_msg = "" # Adding input setting for input_tag in input_values: input_signal = io_map[input_tag] # FIXME: correct value generation depending on signal precision input_value = input_values[input_tag] test_statement.add(get_input_assign(input_signal, input_value)) input_msg += get_input_msg(input_tag, input_signal, input_value) test_statement.add(Wait(time_step * self.stage_num)) final_lzc_value = output_values["final_lzc"] vx = input_values["x"] vy = input_values["y"] pos_lzc = output_signals["pos_lzc_o"] neg_lzc = output_signals["neg_lzc_o"] if vx + vy >= 0: # positive result case main_lzc = pos_lzc output_tag = "pos_lzc_o" else: # negative result case main_lzc = neg_lzc output_tag = "neg_lzc_o" value_msg = get_output_value_msg(main_lzc, final_lzc_value) test_pass_cond = LogicalOr( Comparison( main_lzc, Constant(final_lzc_value, precision=main_lzc.get_precision()), precision=ML_Bool, specifier=Comparison.Equal ), Comparison( main_lzc, Constant(final_lzc_value - 1, precision=main_lzc.get_precision()), precision=ML_Bool, specifier=Comparison.Equal ), precision=ML_Bool ) check_statement = ConditionBlock( LogicalNot( test_pass_cond, precision = ML_Bool ), Report( Concatenation( " result for {}: ".format(output_tag), Conversion( TypeCast( main_lzc, precision = ML_StdLogicVectorFormat( main_lzc.get_precision().get_bit_size() ) ), precision = ML_String ), precision = ML_String ) ) ) test_statement.add(check_statement) assert_statement = Assert( test_pass_cond, "\"unexpected value for inputs {input_msg}, output {output_tag}, expecting {value_msg}, got: \"".format( input_msg=input_msg, output_tag=output_tag, value_msg=value_msg), severity = Assert.Failure ) test_statement.add(assert_statement) return test_statement standard_test_cases =[ ({ "x": 0, "y": 1, }, None), ({ "x": 0, "y": 0, }, None), ({ "x": -2, "y": 1, }, None), ]
class MinMaxSelectEntity(ML_Entity("ut_min_max_select_entity"), TestRunner): """ Adaptative Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_adapative_entity.vhd", entity_name="my_adaptative_entity", language=VHDL_Code, width=width, passes=[("beforecodegen:size_datapath"), ("beforecodegen:rtl_legalize"), ("beforecodegen:dump")], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = MinMaxSelectEntity.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report( Log.Info, "generating Adaptative Entity with width={}".format(self.width)) # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) test = (var_x > 1) test.set_attributes(tag="test", debug=debug_std) sub = var_x - var_y c = Constant(0) pre_result_select = Select(c > sub, Select(c < var_y, sub, Select(LogicalAnd( c > var_x, c < var_y, tag="last_lev_cond"), var_x, c, tag="last_lev_sel"), tag="pre_select"), var_y, tag="pre_result_select") pre_result = Max(0, var_x - var_y, tag="pre_result") result = Conversion(Addition(pre_result, pre_result_select, tag="add"), precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation] standard_test_cases = [ ({ "x": 2, "y": 2 }, None), ({ "x": 1, "y": 2 }, None), ({ "x": 0.5, "y": 2 }, None), ({ "x": -1, "y": -1 }, None), ] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ raise NotImplementedError @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_adaptative_entity = MinMaxSelectEntity(args) ut_adaptative_entity.gen_implementation() return True
class FP_FIXED_MPFMA(ML_Entity("fp_fixed_mpfma")): def __init__( self, arg_template=DefaultEntityArgTemplate, precision=ML_Binary32, target=VHDLBackend(), debug_flag=False, output_file="fp_fixed_mpfma.vhd", entity_name="fp_fixed_mpfma", language=VHDL_Code, vector_size=1, ): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="fp_fixed_mpfma", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, backend=target, debug_flag=debug_flag, language=language, arg_template=arg_template) self.precision = precision # number of extra bits to add to the accumulator fixed precision self.extra_digit = arg_template.extra_digit min_prod_exp = self.precision.get_emin_subnormal() * 2 self.acc_lsb_index = min_prod_exp # select sign-magintude encoded accumulator self.sign_magnitude = arg_template.sign_magnitude # enable/disable operator pipelining self.pipelined = arg_template.pipelined @staticmethod def get_default_args(**kw): default_mapping = { "extra_digit": 0, "sign_magnitude": False, "pipelined": False } default_mapping.update(kw) return DefaultEntityArgTemplate(**default_mapping) def get_acc_lsb_index(self): return self.acc_lsb_index def generate_scheme(self): ## Generate Fused multiply and add comput <x> . <y> + <z> Log.report( Log.Info, "generating fixed MPFMA with {ed} extra digit(s) and sign-magnitude accumulator: {sm}" .format(ed=self.extra_digit, sm=self.sign_magnitude)) def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = HdlVirtualFormat(self.precision) # declaring standard clock and reset input signal #clk = self.implementation.add_input_signal("clk", ML_StdLogic) # reset = self.implementation.add_input_signal("reset", ML_StdLogic) # declaring main input variable # maximum weigth for a mantissa product digit max_prod_exp = self.precision.get_emax() * 2 + 1 # minimum wieght for a mantissa product digit min_prod_exp = self.precision.get_emin_subnormal() * 2 ## Most and least significant digit index for the # accumulator acc_msb_index = max_prod_exp + self.extra_digit acc_lsb_index = min_prod_exp acc_width = acc_msb_index - min_prod_exp + 1 # precision of the accumulator acc_prec = ML_StdLogicVectorFormat(acc_width) reset = self.implementation.add_input_signal("reset", ML_StdLogic) vx = self.implementation.add_input_signal("x", io_precision) vy = self.implementation.add_input_signal("y", io_precision) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() acc = self.implementation.add_input_signal("acc", acc_prec) if self.sign_magnitude: # the accumulator is in sign-magnitude representation sign_acc = self.implementation.add_input_signal( "sign_acc", ML_StdLogic) else: sign_acc = CopySign(acc, precision=ML_StdLogic, tag="sign_acc", debug=debug_std) vx_precision = self.precision vy_precision = self.precision result_precision = acc_prec # precision for first operand vx which is to be statically # positionned p = vx_precision.get_mantissa_size() # precision for second operand vy which is to be dynamically shifted q = vy_precision.get_mantissa_size() # vx must be aligned with vy # the largest shit amount (in absolute value) is precision + 2 # (1 guard bit and 1 rounding bit) exp_vx_precision = ML_StdLogicVectorFormat( vx_precision.get_exponent_size()) exp_vy_precision = ML_StdLogicVectorFormat( vy_precision.get_exponent_size()) mant_vx_precision = ML_StdLogicVectorFormat(p - 1) mant_vy_precision = ML_StdLogicVectorFormat(q - 1) mant_vx = MantissaExtraction(vx, precision=mant_vx_precision) mant_vy = MantissaExtraction(vy, precision=mant_vy_precision) exp_vx = ExponentExtraction(vx, precision=exp_vx_precision, tag="exp_vx", debug=debug_dec) exp_vy = ExponentExtraction(vy, precision=exp_vy_precision, tag="exp_vy", debug=debug_dec) # Maximum number of leading zero for normalized <vx> mantissa L_x = 0 # Maximum number of leading zero for normalized <vy> mantissa L_y = 0 # Maximum number of leading zero for the product of <x>.<y> # mantissa. L_xy = L_x + L_y + 1 sign_vx = CopySign(vx, precision=ML_StdLogic) sign_vy = CopySign(vy, precision=ML_StdLogic) # determining if the operation is an addition (effective_op = '0') # or a subtraction (effective_op = '1') sign_xy = BitLogicXor(sign_vx, sign_vy, precision=ML_StdLogic, tag="sign_xy", debug=ML_Debug(display_format="-radix 2")) effective_op = BitLogicXor(sign_xy, sign_acc, precision=ML_StdLogic, tag="effective_op", debug=ML_Debug(display_format="-radix 2")) exp_vx_bias = vx_precision.get_bias() exp_vy_bias = vy_precision.get_bias() # <acc> is statically positionned in the datapath, # it may even constitute the whole datapath # # the product is shifted with respect to the fix accumulator exp_bias = (exp_vx_bias + exp_vy_bias) # because of the mantissa range [1, 2[, the product exponent # is located one bit to the right (lower) of the product MSB prod_exp_offset = 1 # Determine a working precision to accomodate exponent difference # FIXME: check interval and exponent operations size exp_precision_ext_size = max( vx_precision.get_exponent_size(), vy_precision.get_exponent_size(), abs(ceil(log2(abs(acc_msb_index)))), abs(ceil(log2(abs(acc_lsb_index)))), abs(ceil(log2(abs(exp_bias + prod_exp_offset)))), ) + 2 Log.report(Log.Info, "exp_precision_ext_size={}".format(exp_precision_ext_size)) exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size) # static accumulator exponent exp_acc = Constant(acc_msb_index, precision=exp_precision_ext, tag="exp_acc", debug=debug_cst_dec) # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x # and then shifted right by # exp_diff = exp_x - exp_y + offset # exp_vx in [emin, emax] # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2] exp_diff = Subtraction( exp_acc, Addition(Addition(zext( exp_vy, exp_precision_ext_size - vy_precision.get_exponent_size()), zext( exp_vx, exp_precision_ext_size - vx_precision.get_exponent_size()), precision=exp_precision_ext), Constant(exp_bias + prod_exp_offset, precision=exp_precision_ext, tag="diff_bias", debug=debug_cst_dec), precision=exp_precision_ext, tag="pre_exp_diff", debug=debug_dec), precision=exp_precision_ext, tag="exp_diff", debug=debug_dec) signed_exp_diff = SignCast(exp_diff, specifier=SignCast.Signed, precision=exp_precision_ext) datapath_full_width = acc_width # the maximum exp diff is the size of the datapath # minus the bit size of the product max_exp_diff = datapath_full_width - (p + q) exp_diff_lt_0 = Comparison(signed_exp_diff, Constant(0, precision=exp_precision_ext), specifier=Comparison.Less, precision=ML_Bool, tag="exp_diff_lt_0", debug=debug_std) exp_diff_gt_max_diff = Comparison(signed_exp_diff, Constant( max_exp_diff, precision=exp_precision_ext), specifier=Comparison.Greater, precision=ML_Bool) shift_amount_prec = ML_StdLogicVectorFormat( int(floor(log2(max_exp_diff)) + 1)) mant_shift = Select(exp_diff_lt_0, Constant(0, precision=shift_amount_prec), Select(exp_diff_gt_max_diff, Constant(max_exp_diff, precision=shift_amount_prec), Truncate(exp_diff, precision=shift_amount_prec), precision=shift_amount_prec), precision=shift_amount_prec, tag="mant_shift", debug=ML_Debug(display_format="-radix 10")) prod_prec = ML_StdLogicVectorFormat(p + q) prod = Multiplication(mant_vx, mant_vy, precision=prod_prec, tag="prod", debug=debug_std) # attempt at pipelining the operator # self.implementation.start_new_stage() mant_ext_size = datapath_full_width - (p + q) shift_prec = ML_StdLogicVectorFormat(datapath_full_width) shifted_prod = BitLogicRightShift(rzext(prod, mant_ext_size), mant_shift, precision=shift_prec, tag="shifted_prod", debug=debug_std) ## Inserting a pipeline stage after the product shifting if self.pipelined: self.implementation.start_new_stage() if self.sign_magnitude: # the accumulator is in sign-magnitude representation acc_negated = Select(Comparison(sign_xy, sign_acc, specifier=Comparison.Equal, precision=ML_Bool), acc, BitLogicNegate(acc, precision=acc_prec), precision=acc_prec) # one extra MSB bit is added to the final addition # to detect overflows add_width = acc_width + 1 add_prec = ML_StdLogicVectorFormat(add_width) # FIXME: implement with a proper compound adder mant_add_p0_ext = Addition(zext(shifted_prod, 1), zext(acc_negated, 1), precision=add_prec) mant_add_p1_ext = Addition( mant_add_p0_ext, Constant(1, precision=ML_StdLogic), precision=add_prec, tag="mant_add", debug=ML_Debug(display_format=" -radix 2")) # discarding carry overflow bit mant_add_p0 = SubSignalSelection(mant_add_p0_ext, 0, acc_width - 1, precision=acc_prec) mant_add_p1 = SubSignalSelection(mant_add_p1_ext, 0, acc_width - 1, precision=acc_prec) mant_add_pre_sign = CopySign(mant_add_p1_ext, precision=ML_StdLogic, tag="mant_add_pre_sign", debug=debug_std) mant_add = Select(Comparison(sign_xy, sign_acc, specifier=Comparison.Equal, precision=ML_Bool), mant_add_p0, Select( Comparison(mant_add_pre_sign, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), mant_add_p1, BitLogicNegate(mant_add_p0, precision=acc_prec), precision=acc_prec, ), precision=acc_prec, tag="mant_add") # if both operands had the same sign, then # mant_add is necessarily positive and the result # sign matches the input sign # if both operands had opposite signs, then # the result sign matches the product sign # if mant_add is positive, else the accumulator sign output_sign = Select( Comparison(effective_op, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), # if the effective op is a subtraction (prod - acc) BitLogicXor(sign_acc, mant_add_pre_sign, precision=ML_StdLogic), # the effective op is an addition, thus result and # acc share sign sign_acc, precision=ML_StdLogic, tag="output_sign") if self.pipelined: self.implementation.start_new_stage() # adding output self.implementation.add_output_signal("vr_sign", output_sign) self.implementation.add_output_signal("vr_acc", mant_add) else: # 2s complement encoding of the accumulator, # the accumulator is never negated, only the producted # is negated if negative # negate shifted prod when required shifted_prod_op = Select(Comparison(sign_xy, Constant( 1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), Negation(shifted_prod, precision=shift_prec), shifted_prod, precision=shift_prec) add_prec = shift_prec # ML_StdLogicVectorFormat(datapath_full_width + 1) mant_add = Addition(shifted_prod_op, acc, precision=acc_prec, tag="mant_add", debug=ML_Debug(display_format=" -radix 2")) if self.pipelined: self.implementation.start_new_stage() self.implementation.add_output_signal("vr_acc", mant_add) return [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] vy = io_map["y"] acc = io_map["acc"] result = {} acc_lsb_index = self.get_acc_lsb_index() if self.sign_magnitude: sign_acc = io_map["sign_acc"] acc = -acc if sign_acc else acc result_value = int( sollya.nearestint( (vx * vy + acc * S2**acc_lsb_index) * S2**-acc_lsb_index)) result_sign = 1 if result_value < 0 else 0 result["vr_sign"] = result_sign result["vr_acc"] = abs(result_value) else: result_value = int( sollya.nearestint( (vx * vy + acc * S2**acc_lsb_index) * S2**-acc_lsb_index)) result["vr_acc"] = result_value return result standard_test_cases = [ #({ #"y": ML_Binary16.get_value_from_integer_coding("bab9", base = 16), #"x": ML_Binary16.get_value_from_integer_coding("bbff", base = 16), #"acc": int("1000000011111001011000111000101000101101110110001010011000101001001111100010101001", 2), #"sign_acc": 0 #}, None), ({ "y": ML_Binary16.get_value_from_integer_coding("bbff", base=16), "x": ML_Binary16.get_value_from_integer_coding("bbfa", base=16), "acc": int( "1000100010100111001111000001000001101100110110011010001001011011000010010111111001", 2), "sign_acc": 1 }, None), ]
class BipartiteApprox(ML_Entity("bipartite_approx"), GenericBipartiteApprox): def __init__( self, arg_template=DefaultEntityArgTemplate, ): # initializing base class ML_EntityBasis.__init__(self, arg_template=arg_template) self.pipelined = arg_template.pipelined # function to be approximated self.function = arg_template.function # interval on which the approximation must be valid self.interval = arg_template.interval self.disable_sub_testing = arg_template.disable_sub_testing self.disable_sv_testing = arg_template.disable_sv_testing self.alpha = arg_template.alpha self.beta = arg_template.beta self.gamma = arg_template.gamma self.guard_bits = arg_template.guard_bits def get_debug_utils(self): """ return debug_fixed, debug_std """ return debug_fixed, debug_std ## default argument template generation @staticmethod def get_default_args(**kw): """ generate default argument structure for BipartiteApprox """ default_dict = { "target": VHDLBackend(), "output_file": "my_bipartite_approx.vhd", "entity_name": "my_bipartie_approx", "language": VHDL_Code, "function": lambda x: 1.0 / x, "interval": Interval(1, 2), "pipelined": False, "precision": fixed_point(1, 15, signed=False), "disable_sub_testing": False, "disable_sv_testing": False, "alpha": 6, "beta": 5, "gamma": 5, "guard_bits": 3, "passes": [ "beforepipelining:size_datapath", "beforepipelining:rtl_legalize", "beforepipelining:unify_pipeline_stages" ], } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def get_fixed_slice(self, optree, hi, lo, align_hi=FixedPointPosition.FromLSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, **optree_args): return rtl_get_fixed_slice(optree, hi, lo, align_hi, align_lo, **optree_args) def generate_scheme(self): ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = self.precision # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) # rounding mode input rnd_mode = self.implementation.add_input_signal( "rnd_mode", rnd_mode_format) vr_out = self.generate_bipartite_approx_module(vx) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def init_test_generator(self): """ Initialize test case generator """ self.input_generator = FixedPointRandomGen( int_size=self.precision.get_integer_size(), frac_size=self.precision.get_frac_size(), signed=self.precision.signed) def generate_test_case(self, input_signals, io_map, index, test_range=None): """ specific test case generation for K1C TCA BLAU """ rnd_mode = 2 # random.randrange(4) hi = sup(self.auto_test_range) lo = inf(self.auto_test_range) nb_step = int((hi - lo) * S2**self.precision.get_frac_size()) x_value = lo + (hi - lo) * random.randrange(nb_step) / nb_step # self.input_generator.get_new_value() input_values = { "rnd_mode": rnd_mode, "x": x_value, } return input_values def numeric_emulate(self, io_map): vx = io_map["x"] rnd_mode_i = io_map["rnd_mode"] rnd_mode = { 0: sollya.RN, 1: sollya.RU, 2: sollya.RD, 3: sollya.RZ }[rnd_mode_i] result = {} result["vr_out"] = sollya.round(self.function(vx), self.precision.get_frac_size(), rnd_mode) return result #standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({ "x": 1.0, "rnd_mode": 0 }, None), ({ "x": 1.5, "rnd_mode": 0 }, None), ]
class FP_MPFMA(ML_Entity("fp_mpfma")): def __init__(self, arg_template=DefaultEntityArgTemplate, precision=HdlVirtualFormat(ML_Binary32), accuracy=ML_Faithful, debug_flag=False, target=VHDLBackend(), output_file="fp_mpfma.vhd", entity_name="fp_mpfma", language=VHDL_Code, acc_prec=None, pipelined=False): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="fp_mpfma", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, backend=target, debug_flag=debug_flag, language=language, arg_template=arg_template) self.accuracy = accuracy # main precision (used for product operand and default for accumulator) self.precision = precision # accumulator precision self.acc_precision = precision if acc_prec is None else acc_prec # enable operator pipelining self.pipelined = pipelined def generate_scheme(self): ## Generate Fused multiply and add comput <x> . <y> + <z> Log.report( Log.Info, "generating MPFMA with acc precision {acc_precision} and precision {precision}" .format(acc_precision=self.acc_precision, precision=self.precision)) def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision prod_input_precision = self.precision accumulator_precision = self.acc_precision # declaring standard clock and reset input signal #clk = self.implementation.add_input_signal("clk", ML_StdLogic) # reset = self.implementation.add_input_signal("reset", ML_StdLogic) # declaring main input variable vx = self.implementation.add_input_signal("x", prod_input_precision) vy = self.implementation.add_input_signal("y", prod_input_precision) vz = self.implementation.add_input_signal("z", accumulator_precision) # extra reset input port reset = self.implementation.add_input_signal("reset", ML_StdLogic) # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() vx_precision = self.precision.get_base_format() vy_precision = self.precision.get_base_format() vz_precision = self.acc_precision.get_base_format() result_precision = self.acc_precision.get_base_format() # precision for first operand vx which is to be statically # positionned p = vx_precision.get_mantissa_size() # precision for second operand vy which is to be dynamically shifted q = vy_precision.get_mantissa_size() # precision for r = vz_precision.get_mantissa_size() # precision of output o = result_precision.get_mantissa_size() # vx must be aligned with vy # the largest shit amount (in absolute value) is precision + 2 # (1 guard bit and 1 rounding bit) exp_vx_precision = ML_StdLogicVectorFormat( vx_precision.get_exponent_size()) exp_vy_precision = ML_StdLogicVectorFormat( vy_precision.get_exponent_size()) exp_vz_precision = ML_StdLogicVectorFormat( vz_precision.get_exponent_size()) # MantissaExtraction performs the implicit # digit computation and concatenation mant_vx_precision = ML_StdLogicVectorFormat(p) mant_vy_precision = ML_StdLogicVectorFormat(q) mant_vz_precision = ML_StdLogicVectorFormat(r) mant_vx = MantissaExtraction(vx, precision=mant_vx_precision) mant_vy = MantissaExtraction(vy, precision=mant_vy_precision) mant_vz = MantissaExtraction(vz, precision=mant_vz_precision) exp_vx = RawExponentExtraction(vx, precision=exp_vx_precision) exp_vy = RawExponentExtraction(vy, precision=exp_vy_precision) exp_vz = RawExponentExtraction(vz, precision=exp_vz_precision) # Maximum number of leading zero for normalized <vx> mantissa L_x = 0 # Maximum number of leading zero for normalized <vy> mantissa L_y = 0 # Maximum number of leading zero for normalized <vz> mantissa L_z = 0 # Maximum number of leading zero for the product of <x>.<y> # mantissa. L_xy = L_x + L_y + 1 sign_vx = CopySign(vx, precision=ML_StdLogic) sign_vy = CopySign(vy, precision=ML_StdLogic) sign_vz = CopySign(vz, precision=ML_StdLogic) # determining if the operation is an addition (effective_op = '0') # or a subtraction (effective_op = '1') sign_xy = BitLogicXor(sign_vx, sign_vy, precision=ML_StdLogic, tag="sign_xy", debug=debug_std) effective_op = BitLogicXor(sign_xy, sign_vz, precision=ML_StdLogic, tag="effective_op", debug=debug_std) exp_vx_bias = vx_precision.get_bias() exp_vy_bias = vy_precision.get_bias() exp_vz_bias = vz_precision.get_bias() # x.y is statically positionned in the datapath # while z is shifted # This is justified by the fact that z alignment may be performed # in parallel with the multiplication of x and y mantissas # The product is positionned <exp_offset>-bit to the right of datapath MSB # (without including an extra carry bit) exp_offset = max(o + L_z, r) + 2 exp_bias = exp_offset + (exp_vx_bias + exp_vy_bias) - exp_vz_bias # because of the mantissa range [1, 2[, the product exponent # is located one bit to the right (lower) of the product MSB prod_exp_offset = 1 # Determine a working precision to accomodate exponent difference # FIXME: check interval and exponent operations size exp_precision_ext_size = max(vx_precision.get_exponent_size(), vy_precision.get_exponent_size(), vz_precision.get_exponent_size()) + 2 exp_precision_ext = ML_StdLogicVectorFormat(exp_precision_ext_size) # Y is first aligned offset = max(o+L_y,q) + 2 bits to the left of x # and then shifted right by # exp_diff = exp_x - exp_y + offset # exp_vx in [emin, emax] # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2] exp_diff = UnsignedSubtraction( UnsignedAddition(UnsignedAddition( zext(exp_vy, exp_precision_ext_size - vy_precision.get_exponent_size()), zext(exp_vx, exp_precision_ext_size - vx_precision.get_exponent_size()), precision=exp_precision_ext), Constant(exp_bias + prod_exp_offset, precision=exp_precision_ext), precision=exp_precision_ext), zext(exp_vz, exp_precision_ext_size - vz_precision.get_exponent_size()), precision=exp_precision_ext, tag="exp_diff", debug=debug_std) exp_precision_ext_signed = get_signed_precision(exp_precision_ext) signed_exp_diff = SignCast(exp_diff, specifier=SignCast.Signed, precision=exp_precision_ext_signed) datapath_full_width = exp_offset + max(o + L_xy, p + q) + 2 + r max_exp_diff = datapath_full_width - r exp_diff_lt_0 = Comparison(signed_exp_diff, Constant( 0, precision=exp_precision_ext_signed), specifier=Comparison.Less, precision=ML_Bool, tag="exp_diff_lt_0", debug=debug_std) exp_diff_gt_max_diff = Comparison( signed_exp_diff, Constant(max_exp_diff, precision=exp_precision_ext_signed), specifier=Comparison.Greater, precision=ML_Bool) shift_amount_prec = ML_StdLogicVectorFormat( int(floor(log2(max_exp_diff)) + 1)) mant_shift = Select(exp_diff_lt_0, Constant(0, precision=shift_amount_prec), Select(exp_diff_gt_max_diff, Constant(max_exp_diff, precision=shift_amount_prec), Truncate(exp_diff, precision=shift_amount_prec), precision=shift_amount_prec), precision=shift_amount_prec, tag="mant_shift", debug=debug_dec) prod_prec = ML_StdLogicVectorFormat(p + q) prod = UnsignedMultiplication(mant_vx, mant_vy, precision=prod_prec, tag="prod", debug=debug_std) mant_ext_size = max_exp_diff shift_prec = ML_StdLogicVectorFormat(datapath_full_width) mant_vz_ext = rzext(mant_vz, mant_ext_size) shifted_mant_vz = BitLogicRightShift(mant_vz_ext, mant_shift, precision=shift_prec, tag="shifted_mant_vz", debug=debug_std) # Inserting pipeline stage # after production computation # and addend alignment shift if self.pipelined: self.implementation.start_new_stage() # vx is right-extended by q+2 bits # and left extend by exp_offset prod_ext = zext(rzext(prod, r + 2), exp_offset + 1) add_prec = ML_StdLogicVectorFormat(datapath_full_width + 1) ## Here we make the supposition that # the product is slower to compute than # aligning <vz> and negating it if necessary # which means that mant_add as the same sign as the product #prod_add_op = Select( # Comparison( # effective_op, # Constant(1, precision = ML_StdLogic), # precision = ML_Bool, # specifier = Comparison.Equal # ), # Negation(prod_ext, precision = add_prec, tag = "neg_prod"), # prod_ext, # precision = add_prec, # tag = "prod_add_op", # debug = debug_cst_dec #) addend_op = Select(Comparison(effective_op, Constant(1, precision=ML_StdLogic), precision=ML_Bool, specifier=Comparison.Equal), BitLogicNegate(zext(shifted_mant_vz, 1), precision=add_prec, tag="neg_addend_Op"), zext(shifted_mant_vz, 1), precision=add_prec, tag="addend_op", debug=debug_std) prod_add_op = prod_ext # Compound Addition mant_add_p1 = UnsignedAddition(UnsignedAddition(addend_op, prod_add_op, precision=add_prec), Constant(1, precision=ML_StdLogic), precision=add_prec, tag="mant_add_p1", debug=debug_std) mant_add_p0 = UnsignedAddition(addend_op, prod_add_op, precision=add_prec, tag="mant_add_p0", debug=debug_std) # if the addition overflows, then it meant vx has been negated and # the 2's complement addition cancelled the negative MSB, thus # the addition result is positive, and the result is of the sign of Y # else the result is of opposite sign to Y add_is_negative = BitLogicAnd(CopySign(mant_add_p1, precision=ML_StdLogic), effective_op, precision=ML_StdLogic, tag="add_is_negative", debug=debug_std) # Negate mantissa addition result if it is negative mant_add_abs = Select(Comparison(add_is_negative, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), BitLogicNegate(mant_add_p0, precision=add_prec, tag="neg_mant_add_p0", debug=debug_std), mant_add_p1, precision=add_prec, tag="mant_add_abs", debug=debug_std) # determining result sign, mant_add # as the same sign as the product res_sign = BitLogicXor(add_is_negative, sign_xy, precision=ML_StdLogic, tag="res_sign") # adding pipeline stage after addition computation if self.pipelined: self.implementation.start_new_stage() # Precision for leading zero count lzc_width = int(floor(log2(datapath_full_width + 1)) + 1) lzc_prec = ML_StdLogicVectorFormat(lzc_width) current_stage = self.implementation.get_current_stage() lzc_args = ML_LeadingZeroCounter.get_default_args( width=(datapath_full_width + 1)) LZC_entity = ML_LeadingZeroCounter(lzc_args) lzc_entity_list = LZC_entity.generate_scheme() lzc_implementation = LZC_entity.get_implementation() lzc_component = lzc_implementation.get_component_object() #self.implementation.set_current_stage(current_stage) # Attributes dynamic field (init_stage and init_op) # constructors must be initialized back after # building a sub-operator inside this operator self.implementation.instanciate_dyn_attributes() # lzc_in = mant_add_abs add_lzc_sig = Signal("add_lzc", precision=lzc_prec, var_type=Signal.Local, debug=debug_dec) add_lzc = PlaceHolder(add_lzc_sig, lzc_component(io_map={ "x": mant_add_abs, "vr_out": add_lzc_sig }, tag="lzc_i"), tag="place_holder") # adding pipeline stage after leading zero count if self.pipelined: self.implementation.start_new_stage() # Index of output mantissa least significant bit mant_lsb_index = datapath_full_width - o + 1 #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec) # CP stands for close path, the data path where X and Y are within 1 exp diff res_normed_mant = BitLogicLeftShift(mant_add_abs, add_lzc, precision=add_prec, tag="res_normed_mant", debug=debug_std) pre_mant_field = SubSignalSelection( res_normed_mant, mant_lsb_index, datapath_full_width - 1, precision=ML_StdLogicVectorFormat(o - 1)) ## Helper function to extract a single bit # from a vector of bits signal def BitExtraction(optree, index, **kw): return VectorElementSelection(optree, index, precision=ML_StdLogic, **kw) def IntCst(value): return Constant(value, precision=ML_Integer) # adding pipeline stage after normalization shift if self.pipelined: self.implementation.start_new_stage() round_bit = BitExtraction(res_normed_mant, IntCst(mant_lsb_index - 1)) mant_lsb = BitExtraction(res_normed_mant, IntCst(mant_lsb_index)) sticky_prec = ML_StdLogicVectorFormat(datapath_full_width - o) sticky_input = SubSignalSelection(res_normed_mant, 0, datapath_full_width - o - 1, precision=sticky_prec) sticky_bit = Select(Comparison(sticky_input, Constant(0, precision=sticky_prec), specifier=Comparison.NotEqual, precision=ML_Bool), Constant(1, precision=ML_StdLogic), Constant(0, precision=ML_StdLogic), precision=ML_StdLogic, tag="sticky_bit", debug=debug_std) # increment selection for rouding to nearest (tie to even) round_increment_RN = BitLogicAnd(round_bit, BitLogicOr(sticky_bit, mant_lsb, precision=ML_StdLogic), precision=ML_StdLogic, tag="round_increment_RN", debug=debug_std) rounded_mant = UnsignedAddition(zext(pre_mant_field, 1), round_increment_RN, precision=ML_StdLogicVectorFormat(o), tag="rounded_mant", debug=debug_std) rounded_overflow = BitExtraction(rounded_mant, IntCst(o - 1), tag="rounded_overflow", debug=debug_std) res_mant_field = Select(Comparison(rounded_overflow, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), SubSignalSelection(rounded_mant, 1, o - 1), SubSignalSelection(rounded_mant, 0, o - 2), precision=ML_StdLogicVectorFormat(o - 1), tag="final_mant", debug=debug_std) res_exp_tmp_size = max(vx_precision.get_exponent_size(), vy_precision.get_exponent_size(), vz_precision.get_exponent_size()) + 2 res_exp_tmp_prec = ML_StdLogicVectorFormat(res_exp_tmp_size) # Product biased exponent # is computed from both x and y exponent exp_xy_biased = UnsignedAddition(UnsignedAddition( UnsignedAddition(zext( exp_vy, res_exp_tmp_size - vy_precision.get_exponent_size()), Constant(vy_precision.get_bias(), precision=res_exp_tmp_prec), precision=res_exp_tmp_prec, tag="exp_vy_biased", debug=debug_dec), UnsignedAddition(zext( exp_vx, res_exp_tmp_size - vx_precision.get_exponent_size()), Constant(vx_precision.get_bias(), precision=res_exp_tmp_prec), precision=res_exp_tmp_prec, tag="exp_vx_biased", debug=debug_dec), precision=res_exp_tmp_prec), Constant( exp_offset + 1, precision=res_exp_tmp_prec, ), precision=res_exp_tmp_prec, tag="exp_xy_biased", debug=debug_dec) # vz's exponent is biased with the format bias # plus the exponent offset so it is left align to datapath MSB exp_vz_biased = UnsignedAddition( zext(exp_vz, res_exp_tmp_size - vz_precision.get_exponent_size()), Constant( vz_precision.get_bias() + 1, # + exp_offset + 1, precision=res_exp_tmp_prec), precision=res_exp_tmp_prec, tag="exp_vz_biased", debug=debug_dec) # If exp diff is less than 0, then we must consider that vz's exponent is # the meaningful one and thus compute result exponent with respect # to vz's exponent value res_exp_base = Select(exp_diff_lt_0, exp_vz_biased, exp_xy_biased, precision=res_exp_tmp_prec, tag="res_exp_base", debug=debug_dec) # Eventually we add the result exponent base # with the exponent offset and the leading zero count res_exp_ext = UnsignedAddition(UnsignedSubtraction( UnsignedAddition(zext(res_exp_base, 0), Constant(-result_precision.get_bias(), precision=res_exp_tmp_prec), precision=res_exp_tmp_prec), zext(add_lzc, res_exp_tmp_size - lzc_width), precision=res_exp_tmp_prec), rounded_overflow, precision=res_exp_tmp_prec, tag="res_exp_ext", debug=debug_std) res_exp_prec = ML_StdLogicVectorFormat( result_precision.get_exponent_size()) res_exp = Truncate(res_exp_ext, precision=res_exp_prec, tag="res_exp", debug=debug_dec_unsigned) vr_out = TypeCast(FloatBuild( res_sign, res_exp, res_mant_field, precision=accumulator_precision, ), precision=accumulator_precision, tag="result", debug=debug_std) # adding pipeline stage after rouding if self.pipelined: self.implementation.start_new_stage() self.implementation.add_output_signal("vr_out", vr_out) return lzc_entity_list + [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] vy = io_map["y"] vz = io_map["z"] result = {} result["vr_out"] = self.precision.round_sollya_object( vx * vy + vz, sollya.RN) return result standard_test_cases = [ #({"x": 2.0, "y": 4.0, "z": 16.0}, None), ({ "y": ML_Binary16.get_value_from_integer_coding("2cdc", base=16), "x": ML_Binary16.get_value_from_integer_coding("1231", base=16), "z": ML_Binary16.get_value_from_integer_coding("5b5e", base=16), }, None), #({ # "y": ML_Binary64.get_value_from_integer_coding("47d273e91e2c9048", base = 16), # "x": ML_Binary64.get_value_from_integer_coding("c7eea5670485a5ec", base = 16) #}, None), #({ # "y": ML_Binary64.get_value_from_integer_coding("75164a1df94cd488", base = 16), # "x": ML_Binary64.get_value_from_integer_coding("5a7567b08508e5b4", base = 16) #}, None) ]
class ML_HW_Adder(ML_Entity("ml_hw_adder")): def __init__(self, arg_template=DefaultEntityArgTemplate, precision=ML_Int32, accuracy=ML_Faithful, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=VHDLBackend(), output_file="my_hw_adder.vhd", entity_name="my_hw_adder", language=VHDL_Code, vector_size=1): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="ml_hw_adder", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, backend=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag, language=language, arg_template=arg_template) self.accuracy = accuracy self.precision = precision def generate_scheme(self): precision = ML_StdLogicVectorFormat(32) # declaring main input variable vx = self.implementation.add_input_signal("x", precision) vy = self.implementation.add_input_signal("y", precision) clk = self.implementation.add_input_signal("clk", ML_StdLogic) reset = self.implementation.add_input_signal("reset", ML_StdLogic) self.implementation.start_new_stage() vr_add = Addition(vx, vy, tag="vr", precision=precision) vr_sub = Subtraction(vx, vy, tag="vr_sub", precision=precision) self.implementation.start_new_stage() vr_out = Select(Comparison(vx, Constant(1, precision=precision), precision=ML_Bool, specifier=Comparison.Equal), vr_add, Select(Comparison(vx, Constant(1, precision=precision), precision=ML_Bool, specifier=Comparison.LessOrEqual), vr_sub, vx, precision=precision), precision=precision, tag="vr_res") #for sig in [vx, vy, vr_add, vr_sub, vr_out]: # print "%s, stage=%d" % (sig.get_tag(), sig.attributes.init_stage) #vr_d = Signal("vr_d", precision = vr.get_precision()) #process_statement = Statement( # ConditionBlock(LogicalAnd(Event(clk, precision = ML_Bool), Comparison(clk, Constant(1, precision = ML_StdLogic), specifier = Comparison.Equal, precision = ML_Bool), precision = ML_Bool), ReferenceAssign(vr_d, vr)) #) #process = Process(process_statement, sensibility_list = [clk, reset]) #self.implementation.add_process(process) #self.implementation.add_output_signal("r_d", vr_d) #self.implementation.add_output_signal("r", vr) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] standard_test_cases = [sollya_parse(x) for x in ["1.1", "1.5"]]
class ML_LeadingZeroCounter(ML_Entity("ml_lzc")): @staticmethod def get_default_args(width=32): return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=vhdl_backend.VHDLBackend(), output_file="my_lzc.vhd", entity_name="my_lzc", language=VHDL_Code, width=width, ) def __init__(self, arg_template=None): # building default arg_template if necessary arg_template = ML_LeadingZeroCounter.get_default_args( ) if arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report(Log.Info, "generating LZC with width={}".format(self.width)) # initializing base class ML_EntityBasis.__init__(self, base_name="ml_lzc", arg_template=arg_template) self.accuracy = arg_template.accuracy self.precision = arg_template.precision def numeric_emulate(self, io_map): def count_leading_zero(v, w): tmp = v lzc = -1 for i in range(w): if tmp & 2**(w - 1 - i): return i return w result = {} result["vr_out"] = count_leading_zero(io_map["x"], self.width) return result def generate_scheme(self): lzc_width = int(floor(log2(self.width))) + 1 Log.report(Log.Info, "width of lzc out is {}".format(lzc_width)) input_precision = ML_StdLogicVectorFormat(self.width) precision = ML_StdLogicVectorFormat(lzc_width) # declaring main input variable vx = self.implementation.add_input_signal("x", input_precision) vr_out = Signal("lzc", precision=precision, var_type=Variable.Local) tmp_lzc = Variable("tmp_lzc", precision=precision, var_type=Variable.Local) iterator = Variable("i", precision=ML_Integer, var_type=Variable.Local) lzc_loop = RangeLoop( iterator, Interval(0, self.width - 1), ConditionBlock( Comparison(VectorElementSelection(vx, iterator, precision=ML_StdLogic), Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), ReferenceAssign( tmp_lzc, Conversion(Subtraction(Constant(self.width - 1, precision=ML_Integer), iterator, precision=ML_Integer), precision=precision), )), specifier=RangeLoop.Increasing, ) lzc_process = Process(Statement( ReferenceAssign(tmp_lzc, Constant(self.width, precision=precision)), lzc_loop, ReferenceAssign(vr_out, tmp_lzc)), sensibility_list=[vx]) self.implementation.add_process(lzc_process) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] standard_test_cases = [sollya_parse(x) for x in ["1.1", "1.5"]]
class FP_Adder(ML_Entity("fp_adder")): def __init__( self, arg_template=DefaultEntityArgTemplate, precision=ML_Binary32, libm_compliant=True, debug_flag=False, target=VHDLBackend(), output_file="fp_adder.vhd", entity_name="fp_adder", language=VHDL_Code, ): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_EntityBasis.__init__(self, base_name="fp_adder", entity_name=entity_name, output_file=output_file, io_precisions=io_precisions, backend=target, debug_flag=debug_flag, language=language, arg_template=arg_template) self.precision = precision def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = VirtualFormat(base_format=self.precision, support_format=ML_StdLogicVectorFormat( self.precision.get_bit_size()), get_cst=get_virtual_cst) # declaring standard clock and reset input signal #clk = self.implementation.add_input_signal("clk", ML_StdLogic) reset = self.implementation.add_input_signal("reset", ML_StdLogic) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) vy = self.implementation.add_input_signal("y", io_precision) p = self.precision.get_mantissa_size() # vx must be aligned with vy # the largest shit amount (in absolute value) is precision + 2 # (1 guard bit and 1 rounding bit) exp_precision = ML_StdLogicVectorFormat( self.precision.get_exponent_size()) mant_precision = ML_StdLogicVectorFormat( self.precision.get_field_size()) mant_vx = MantissaExtraction(vx, precision=mant_precision) mant_vy = MantissaExtraction(vy, precision=mant_precision) exp_vx = ExponentExtraction(vx, precision=exp_precision) exp_vy = ExponentExtraction(vy, precision=exp_precision) sign_vx = CopySign(vx, precision=ML_StdLogic) sign_vy = CopySign(vy, precision=ML_StdLogic) # determining if the operation is an addition (effective_op = '0') # or a subtraction (effective_op = '1') effective_op = BitLogicXor(sign_vx, sign_vy, precision=ML_StdLogic, tag="effective_op", debug=ML_Debug(display_format="-radix 2")) ## Wrapper for zero extension # @param op the input operation tree # @param s integer size of the extension # @return the Zero extended operation node def zext(op, s): op_size = op.get_precision().get_bit_size() ext_precision = ML_StdLogicVectorFormat(op_size + s) return ZeroExt(op, s, precision=ext_precision) ## Generate the right zero extended output from @p optree def rzext(optree, ext_size): op_size = optree.get_precision().get_bit_size() ext_format = ML_StdLogicVectorFormat(ext_size) out_format = ML_StdLogicVectorFormat(op_size + ext_size) return Concatenation(optree, Constant(0, precision=ext_format), precision=out_format) exp_bias = p + 2 exp_precision_ext = ML_StdLogicVectorFormat( self.precision.get_exponent_size() + 2) # Y is first aligned p+2 bit to the left of x # and then shifted right by # exp_diff = exp_x - exp_y + precision + 2 # exp_vx in [emin, emax] # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2] exp_diff = Subtraction(Addition(zext(exp_vx, 2), Constant(exp_bias, precision=exp_precision_ext), precision=exp_precision_ext), zext(exp_vy, 2), precision=exp_precision_ext, tag="exp_diff") exp_diff_lt_0 = Comparison(exp_diff, Constant(0, precision=exp_precision_ext), specifier=Comparison.Less, precision=ML_Bool) exp_diff_gt_2pp4 = Comparison(exp_diff, Constant(2 * p + 4, precision=exp_precision_ext), specifier=Comparison.Greater, precision=ML_Bool) shift_amount_prec = ML_StdLogicVectorFormat( int(floor(log2(2 * p + 4)) + 1)) mant_shift = Select(exp_diff_lt_0, Constant(0, precision=shift_amount_prec), Select(exp_diff_gt_2pp4, Constant(2 * p + 4, precision=shift_amount_prec), Truncate(exp_diff, precision=shift_amount_prec), precision=shift_amount_prec), precision=shift_amount_prec, tag="mant_shift", debug=ML_Debug(display_format="-radix 10")) mant_ext_size = 2 * p + 4 shift_prec = ML_StdLogicVectorFormat(3 * p + 4) shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size), mant_shift, precision=shift_prec, tag="shifted_mant_vy", debug=debug_std) mant_vx_ext = zext(rzext(mant_vx, p + 2), p + 2 + 1) add_prec = ML_StdLogicVectorFormat(3 * p + 5) mant_vx_add_op = Select(Comparison(effective_op, Constant(1, precision=ML_StdLogic), precision=ML_Bool, specifier=Comparison.Equal), Negation(mant_vx_ext, precision=add_prec, tag="neg_mant_vx"), mant_vx_ext, precision=add_prec, tag="mant_vx_add_op", debug=ML_Debug(display_format=" ")) mant_add = Addition(zext(shifted_mant_vy, 1), mant_vx_add_op, precision=add_prec, tag="mant_add", debug=ML_Debug(display_format=" -radix 2")) # if the addition overflows, then it meant vx has been negated and # the 2's complement addition cancelled the negative MSB, thus # the addition result is positive, and the result is of the sign of Y # else the result is of opposite sign to Y add_is_negative = BitLogicAnd(CopySign(mant_add, precision=ML_StdLogic), effective_op, precision=ML_StdLogic, tag="add_is_negative", debug=ML_Debug(" -radix 2")) # Negate mantissa addition result if it is negative mant_add_abs = Select(Comparison(add_is_negative, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), Negation(mant_add, precision=add_prec, tag="neg_mant_add"), mant_add, precision=add_prec, tag="mant_add_abs") res_sign = BitLogicXor(add_is_negative, sign_vy, precision=ML_StdLogic, tag="res_sign") # Precision for leading zero count lzc_width = int(floor(log2(3 * p + 5)) + 1) lzc_prec = ML_StdLogicVectorFormat(lzc_width) lzc_args = ML_LeadingZeroCounter.get_default_args(width=(3 * p + 5)) LZC_entity = ML_LeadingZeroCounter(lzc_args) lzc_entity_list = LZC_entity.generate_scheme() lzc_implementation = LZC_entity.get_implementation() lzc_component = lzc_implementation.get_component_object() #lzc_in = SubSignalSelection(mant_add, p+1, 2*p+3) lzc_in = mant_add_abs # SubSignalSelection(mant_add_abs, 0, 3*p+3, precision = ML_StdLogicVectorFormat(3*p+4)) add_lzc = Signal("add_lzc", precision=lzc_prec, var_type=Signal.Local, debug=debug_dec) add_lzc = PlaceHolder( add_lzc, lzc_component(io_map={ "x": lzc_in, "vr_out": add_lzc })) #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec) # CP stands for close path, the data path where X and Y are within 1 exp diff res_normed_mant = BitLogicLeftShift(mant_add, add_lzc, precision=add_prec, tag="res_normed_mant", debug=debug_std) pre_mant_field = SubSignalSelection( res_normed_mant, 2 * p + 5, 3 * p + 3, precision=ML_StdLogicVectorFormat(p - 1)) ## Helper function to extract a single bit # from a vector of bits signal def BitExtraction(optree, index, **kw): return VectorElementSelection(optree, index, precision=ML_StdLogic, **kw) def IntCst(value): return Constant(value, precision=ML_Integer) round_bit = BitExtraction(res_normed_mant, IntCst(2 * p + 4)) mant_lsb = BitExtraction(res_normed_mant, IntCst(2 * p + 5)) sticky_prec = ML_StdLogicVectorFormat(2 * p + 4) sticky_input = SubSignalSelection(res_normed_mant, 0, 2 * p + 3, precision=sticky_prec) sticky_bit = Select(Comparison(sticky_input, Constant(0, precision=sticky_prec), specifier=Comparison.NotEqual, precision=ML_Bool), Constant(1, precision=ML_StdLogic), Constant(0, precision=ML_StdLogic), precision=ML_StdLogic, tag="sticky_bit", debug=debug_std) # increment selection for rouding to nearest (tie to even) round_increment_RN = BitLogicAnd(round_bit, BitLogicOr(sticky_bit, mant_lsb, precision=ML_StdLogic), precision=ML_StdLogic, tag="round_increment_RN", debug=debug_std) rounded_mant = Addition(zext(pre_mant_field, 1), round_increment_RN, precision=ML_StdLogicVectorFormat(p), tag="rounded_mant", debug=debug_std) rounded_overflow = BitExtraction(rounded_mant, IntCst(p - 1), tag="rounded_overflow", debug=debug_std) res_mant_field = Select(Comparison(rounded_overflow, Constant(1, precision=ML_StdLogic), specifier=Comparison.Equal, precision=ML_Bool), SubSignalSelection(rounded_mant, 1, p - 1), SubSignalSelection(rounded_mant, 0, p - 2), precision=ML_StdLogicVectorFormat(p - 1), tag="final_mant", debug=debug_std) res_exp_prec_size = self.precision.get_exponent_size() + 2 res_exp_prec = ML_StdLogicVectorFormat(res_exp_prec_size) res_exp_ext = Addition(Subtraction( Addition(zext(exp_vx, 2), Constant(3 + p, precision=res_exp_prec), precision=res_exp_prec), zext(add_lzc, res_exp_prec_size - lzc_width), precision=res_exp_prec), rounded_overflow, precision=res_exp_prec, tag="res_exp_ext", debug=debug_std) res_exp = Truncate(res_exp_ext, precision=ML_StdLogicVectorFormat( self.precision.get_exponent_size()), tag="res_exp", debug=debug_dec) vr_out = TypeCast(FloatBuild( res_sign, res_exp, res_mant_field, precision=self.precision, ), precision=io_precision, tag="result", debug=debug_std) self.implementation.add_output_signal("vr_out", vr_out) return lzc_entity_list + [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] vy = io_map["y"] result = {} result["vr_out"] = sollya.round(vx + vy, self.precision.get_sollya_object(), sollya.RN) return result standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)]
class Dequantizer(ML_Entity("dequantizer")): """ Implement the post-processing operator for a quantized neural network layer: quantized_input * scale + offset_input """ def __init__(self, arg_template=DefaultEntityArgTemplate): # initializing I/O precision # initializing base class ML_EntityBasis.__init__(self, arg_template = arg_template ) self.precision = arg_template.precision ## Generate default arguments structure (before any user / test overload) @staticmethod def get_default_args(**kw): default_arg_map = { "io_formats": { "scale": HdlVirtualFormat(ML_Binary32), "quantized_input": FIX32, "offset_input": FIX32, "result": FIX32 }, "pipelined": False, "output_file": "dequantizer.vhd", "entity_name": "dequantizer", "language": VHDL_Code, "passes": ["beforecodegen:size_datapath", "beforecodegen:rtl_legalize"], } default_arg_map.update(**kw) return DefaultEntityArgTemplate(**default_arg_map) def generate_scheme(self): # quantized_input * scale + offset_input scale_format = self.get_io_format("scale") quantized_input_format = self.get_io_format("quantized_input") offset_input_format = self.get_io_format("offset_input") result_format = self.get_io_format("result") RESULT_SIZE = result_format.get_bit_size() scale = self.implementation.add_input_variable("scale", scale_format) quantized_input = self.implementation.add_input_variable("quantized_input", quantized_input_format) offset_input = self.implementation.add_input_variable("offset", offset_input_format) support_format = self.precision.get_support_format() base_format = self.precision.get_base_format() exp_precision = fixed_point(base_format.get_exponent_size(), 0, signed=False) p = base_format.get_field_size() n = support_format.get_bit_size() biased_scale_exp = fixed_exponent(scale)#.modify_attributes(tag="scale_exp", debug=debug_fixed) scale_exp = biased_scale_exp + scale_format.get_base_format().get_bias() scale_exp.set_attributes(tag="scale_exp", debug=debug_fixed) scale_sign = CopySign(scale, precision=ML_StdLogic, tag="scale_sign") scale_mant = fixed_normalized_mantissa(scale) # unscaled field is in fixed-point normalized format unscaled_field = scale_mant * quantized_input unscaled_field.set_attributes(tag="unscaled_field", debug=debug_fixed) # p - 1 (precision without implicit one, or length of mantissa fractionnal part) pm1 = scale.get_precision().get_base_format().get_mantissa_size() - 1 # PRODUCT_SIZE is the width of the unscaled scale * input "mantissa" product PRODUCT_SIZE = scale_mant.get_precision().get_bit_size() + quantized_input.get_precision().get_bit_size() # MAX_SHIFT computed such that no bit is lost (and kept for proper rounding) # an extra +1 is added to ensure correct bit is used as round bit MAX_SHIFT = RESULT_SIZE + 1 + PRODUCT_SIZE + 1 # TODO/FIXME: manage case where shift_amount < 0 (should be forced to 0) shift_amount = Min(-scale_exp + RESULT_SIZE + 1, MAX_SHIFT, tag="shift_amount", debug=debug_fixed) # unscaled_field is widended (padded with "0" right") # TODO/FIXME manage fixed-point format signedness extended_unscaled_field = Conversion(unscaled_field, precision=fixed_point(PRODUCT_SIZE, MAX_SHIFT)) # widened unscaled_field is casted to set 0-padding as fractionnary part # TODO/FIXME manage fixed-point format signedness pre_shift_field = TypeCast(extended_unscaled_field, precision=fixed_point(MAX_SHIFT - 1, PRODUCT_SIZE + 1), tag="pre_shift_field", debug=debug_std) scaled_field = BitArithmeticRightShift(pre_shift_field, shift_amount, tag="scaled_field", debug=debug_std) #truncated_field = Conversion(scaled_field, precision=offset_input_format) #offseted_field = truncated_field + offset_input offseted_field = scaled_field + Conversion(offset_input, precision=fixed_point(offset_input_format.get_bit_size(), 0), tag="extended_offset", debug=debug_std) offseted_field.set_attributes(tag="offseted_field", debug=debug_std) round_bit = BitSelection(offseted_field, FixedPointPosition(offseted_field, -1, align=FixedPointPosition.FromPointToLSB)) sticky_bit = NotEqual(SubSignalSelection(offseted_field, 0, FixedPointPosition(offseted_field, -2, align=FixedPointPosition.FromPointToLSB)), 0) # TODO: implement rounding result_format = self.get_io_format("result") # detecting overflow / underflow MAX_BOUND = self.get_io_format("result").get_max_value() MIN_BOUND = self.get_io_format("result").get_min_value() bounded_result = Max(MIN_BOUND, Min(offseted_field, MAX_BOUND)) result = Conversion(bounded_result, precision=result_format) self.implementation.add_output_signal("result", result) return [self.implementation] def numeric_emulate(self, io_map): qinput = io_map["quantized_input"] scale = io_map["scale"] offset = io_map["offset"] result = {} unbounded_result = int(scale * qinput + offset) # threshold clamp MAX_BOUND = self.get_io_format("result").get_max_value() MIN_BOUND = self.get_io_format("result").get_min_value() result["result"] = max(min(MAX_BOUND, unbounded_result), MIN_BOUND) return result standard_test_cases = [ # dummy tests ({"quantized_input": 0, "scale": 0, "offset": 0}, None), ({"quantized_input": 0, "scale": 0, "offset": 1}, None), ({"quantized_input": 0, "scale": 0, "offset": 17}, None), ({"quantized_input": 0, "scale": 0, "offset": -17}, None), ({"quantized_input": 17, "scale": 1.0, "offset": 0}, None), #({"quantized_input": 17, "scale": -1.0, "offset": 0}, None), ({"quantized_input": -17, "scale": 1.0, "offset": 0}, None), #({"quantized_input": -17, "scale": -1.0, "offset": 0}, None), ({"quantized_input": 17, "scale": 1.0, "offset": 42}, None), #({"quantized_input": 17, "scale": -1.0, "offset": 42}, None), ({"quantized_input": -17, "scale": 1.0, "offset": 42}, None), #({"quantized_input": -17, "scale": -1.0, "offset": 42}, None), ({"quantized_input": 17, "scale": 1.125, "offset": 42}, None), #({"quantized_input": 17, "scale": -1.0, "offset": 42}, None), ({"quantized_input": -17, "scale": 17.0, "offset": 42}, None), #({"quantized_input": -17, "scale": -1.0, "offset": 42}, None), # rounding ({"quantized_input": 17, "scale": 0.625, "offset": 1337}, None), # TODO: cancellation tests # TODO: overflow tests ({"quantized_input": 2**31-1, "scale": 4.0, "offset": 42}, None), # TODO: other tests ]
class FP_Adder(ML_Entity("fp_adder")): def __init__(self, arg_template=DefaultEntityArgTemplate): # initializing I/O precision # initializing base class ML_EntityBasis.__init__(self, arg_template = arg_template ) self.precision = arg_template.precision ## Generate default arguments structure (before any user / test overload) @staticmethod def get_default_args(**kw): default_arg_map = { "precision": HdlVirtualFormat(ML_Binary32), "pipelined": False, "output_file": "fp_adder.vhd", "entity_name": "fp_adder", "language": VHDL_Code, "passes": [("beforecodegen:size_datapath")], } default_arg_map.update(**kw) return DefaultEntityArgTemplate(**default_arg_map) def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = self.precision # declaring standard clock and reset input signal #clk = self.implementation.add_input_signal("clk", ML_StdLogic) reset = self.implementation.add_input_signal("reset", ML_StdLogic) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) vy = self.implementation.add_input_signal("y", io_precision) base_precision = self.precision.get_base_format() p = base_precision.get_mantissa_size() # vx must be aligned with vy # the largest shit amount (in absolute value) is precision + 2 # (1 guard bit and 1 rounding bit) exp_precision = ML_StdLogicVectorFormat(base_precision.get_exponent_size()) mant_precision = ML_StdLogicVectorFormat(base_precision.get_mantissa_size()) mant_vx = MantissaExtraction(vx, precision = mant_precision) mant_vy = MantissaExtraction(vy, precision = mant_precision) exp_vx = RawExponentExtraction(vx, precision = exp_precision) exp_vy = RawExponentExtraction(vy, precision = exp_precision) sign_vx = CopySign(vx, precision = ML_StdLogic) sign_vy = CopySign(vy, precision = ML_StdLogic) # determining if the operation is an addition (effective_op = '0') # or a subtraction (effective_op = '1') effective_op = BitLogicXor(sign_vx, sign_vy, precision = ML_StdLogic, tag = "effective_op", debug=debug_std) ## Wrapper for zero extension # @param op the input operation tree # @param s integer size of the extension # @return the Zero extended operation node def zext(op,s): op_size = op.get_precision().get_bit_size() ext_precision = ML_StdLogicVectorFormat(op_size + s) return ZeroExt(op, s, precision = ext_precision) ## Generate the right zero extended output from @p optree def rzext(optree, ext_size): op_size = optree.get_precision().get_bit_size() ext_format = ML_StdLogicVectorFormat(ext_size) out_format = ML_StdLogicVectorFormat(op_size + ext_size) return Concatenation(optree, Constant(0, precision = ext_format), precision = out_format) exp_bias = p + 2 exp_precision_ext = fixed_point(base_precision.get_exponent_size() + 2, 0) exp_precision = fixed_point(base_precision.get_exponent_size(), 0, signed=False) # Y is first aligned p+2 bit to the left of x # and then shifted right by # exp_diff = exp_x - exp_y + precision + 2 # exp_vx in [emin, emax] # exp_vx - exp_vx + p +2 in [emin-emax + p + 2, emax - emin + p + 2] exp_diff = Subtraction( Addition( TypeCast(exp_vx, precision=exp_precision), Constant(exp_bias, precision=exp_precision_ext), ), TypeCast(exp_vy, precision=exp_precision), ) exp_diff_lt_0 = Comparison(exp_diff, Constant(0, precision=exp_precision_ext), specifier = Comparison.Less, precision = ML_Bool) exp_diff_gt_2pp4 = Comparison(exp_diff, Constant(2*p+4, precision = exp_precision_ext), specifier = Comparison.Greater, precision = ML_Bool) shift_amount_size = int(floor(log2(2*p+4))+1) shift_amount_prec = ML_StdLogicVectorFormat(shift_amount_size) mant_shift = Select( exp_diff_lt_0, 0, Select( exp_diff_gt_2pp4, Constant(2*p+4), exp_diff, ), tag = "mant_shift", debug = debug_dec ) mant_shift = TypeCast( Conversion(mant_shift, precision=fixed_point(shift_amount_size, 0, signed=False)), precision=shift_amount_prec ) mant_ext_size = 2*p+4 shift_prec = ML_StdLogicVectorFormat(3*p+4) shifted_mant_vy = BitLogicRightShift(rzext(mant_vy, mant_ext_size), mant_shift, precision = shift_prec, tag = "shifted_mant_vy", debug = debug_std) mant_vx_ext = zext(rzext(mant_vx, p+2), p+2+1) mant_vx_ext.set_attributes(tag="mant_vx_ext") add_prec = ML_StdLogicVectorFormat(3*p+5) mant_vx_add_op = Select( Comparison( effective_op, Constant(1, precision = ML_StdLogic), precision = ML_Bool, specifier = Comparison.Equal ), Negation(mant_vx_ext, precision = add_prec, tag = "neg_mant_vx"), mant_vx_ext, precision = add_prec, tag = "mant_vx_add_op", debug=debug_cst_dec ) mant_add = UnsignedAddition( zext(shifted_mant_vy, 1), mant_vx_add_op, precision = add_prec, tag = "mant_add", debug=debug_std ) # if the addition overflows, then it meant vx has been negated and # the 2's complement addition cancelled the negative MSB, thus # the addition result is positive, and the result is of the sign of Y # else the result is of opposite sign to Y add_is_negative = BitLogicAnd( CopySign(mant_add, precision = ML_StdLogic), effective_op, precision = ML_StdLogic, tag = "add_is_negative", debug = debug_std ) # Negate mantissa addition result if it is negative mant_add_abs = Select( Comparison( add_is_negative, Constant(1, precision = ML_StdLogic), specifier = Comparison.Equal, precision = ML_Bool ), Negation(mant_add, precision = add_prec, tag = "neg_mant_add"), mant_add, precision = add_prec, tag = "mant_add_abs" ) res_sign = BitLogicXor(add_is_negative, sign_vy, precision = ML_StdLogic, tag = "res_sign") # Precision for leading zero count lzc_width = int(floor(log2(3*p+5)) + 1) lzc_prec = ML_StdLogicVectorFormat(lzc_width) add_lzc = CountLeadingZeros( mant_add_abs, precision=lzc_prec, tag="add_lzc", debug=debug_dec_unsigned ) #add_lzc = CountLeadingZeros(mant_add, precision = lzc_prec) # CP stands for close path, the data path where X and Y are within 1 exp diff res_normed_mant = BitLogicLeftShift(mant_add, add_lzc, precision = add_prec, tag = "res_normed_mant", debug = debug_std) pre_mant_field = SubSignalSelection(res_normed_mant, 2*p+5, 3*p+3, precision = ML_StdLogicVectorFormat(p-1)) ## Helper function to extract a single bit # from a vector of bits signal def BitExtraction(optree, index, **kw): return VectorElementSelection(optree, index, precision = ML_StdLogic, **kw) def IntCst(value): return Constant(value, precision = ML_Integer) round_bit = BitExtraction(res_normed_mant, IntCst(2*p+4)) mant_lsb = BitExtraction(res_normed_mant, IntCst(2*p+5)) sticky_prec = ML_StdLogicVectorFormat(2*p+4) sticky_input = SubSignalSelection( res_normed_mant, 0, 2*p+3, precision = sticky_prec ) sticky_bit = Select( Comparison( sticky_input, Constant(0, precision = sticky_prec), specifier = Comparison.NotEqual, precision = ML_Bool ), Constant(1, precision = ML_StdLogic), Constant(0, precision = ML_StdLogic), precision = ML_StdLogic, tag = "sticky_bit", debug = debug_std ) # increment selection for rouding to nearest (tie to even) round_increment_RN = BitLogicAnd( round_bit, BitLogicOr( sticky_bit, mant_lsb, precision = ML_StdLogic ), precision = ML_StdLogic, tag = "round_increment_RN", debug = debug_std ) rounded_mant = UnsignedAddition( zext(pre_mant_field, 1), round_increment_RN, precision = ML_StdLogicVectorFormat(p), tag = "rounded_mant", debug = debug_std ) rounded_overflow = BitExtraction( rounded_mant, IntCst(p-1), tag = "rounded_overflow", debug = debug_std ) res_mant_field = Select( Comparison( rounded_overflow, Constant(1, precision = ML_StdLogic), specifier = Comparison.Equal, precision = ML_Bool ), SubSignalSelection(rounded_mant, 1, p-1), SubSignalSelection(rounded_mant, 0, p-2), precision = ML_StdLogicVectorFormat(p-1), tag = "final_mant", debug = debug_std ) res_exp_prec_size = base_precision.get_exponent_size() + 2 res_exp_prec = ML_StdLogicVectorFormat(res_exp_prec_size) res_exp_ext = UnsignedAddition( UnsignedSubtraction( UnsignedAddition( zext(exp_vx, 2), Constant(3+p, precision = res_exp_prec), precision = res_exp_prec ), zext(add_lzc, res_exp_prec_size - lzc_width), precision = res_exp_prec ), rounded_overflow, precision = res_exp_prec, tag = "res_exp_ext", debug = debug_std ) res_exp = Truncate(res_exp_ext, precision = ML_StdLogicVectorFormat(base_precision.get_exponent_size()), tag = "res_exp", debug = debug_dec) vr_out = TypeCast( FloatBuild( res_sign, res_exp, res_mant_field, precision = base_precision, ), precision = io_precision, tag = "result", debug = debug_std ) self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def numeric_emulate(self, io_map): vx = io_map["x"] vy = io_map["y"] result = {} base_format = self.precision.get_base_format() result["vr_out"] = sollya.round(vx + vy, base_format.get_sollya_object(), sollya.RN) return result standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)]
class UT_FixedPointPosition(ML_Entity("ml_ut_fixed_point_position"), TestRunner): """ Range Eval Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="ut_fixed_point_position.vhd", entity_name="ut_fixed_point_position", language=VHDL_Code, width=width, passes=[("beforecodegen:size_datapath")], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = UT_FixedPointPosition.get_default_args() if \ arg_template is None else arg_template # initializing base class ML_EntityBasis.__init__(self, base_name="ut_fixed_point_position", arg_template=arg_template) self.accuracy = arg_template.accuracy self.precision = arg_template.precision # extra width parameter self.width = arg_template.width def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = hdl_precision_parser("FU%d.%d" % (int_size, frac_size)) output_precision = hdl_precision_parser("FS%d.%d" % (int_size, frac_size)) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_z = self.implementation.add_input_signal("z", input_precision) abstract_formulae = var_x + var_y * var_z + 7 round_bit = BitSelection( abstract_formulae, FixedPointPosition(abstract_formulae, 0, align=FixedPointPosition.FromPointToLSB), ) msb_bit = BitSelection( abstract_formulae, FixedPointPosition(abstract_formulae, 0, align=FixedPointPosition.FromMSBToLSB)) lsb_bit = BitSelection( abstract_formulae, FixedPointPosition(abstract_formulae, 0, align=FixedPointPosition.FromLSBToLSB)) self.implementation.add_output_signal("round", round_bit) self.implementation.add_output_signal("msb", msb_bit) self.implementation.add_output_signal("lsb", lsb_bit) return [self.implementation] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ raise NotImplementedError @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_fixed_point_position = UT_FixedPointPosition(args) ut_fixed_point_position.gen_implementation() return True
class PipelinedBench(ML_Entity("ut_pipelined_bench_entity"), TestRunner): """ Adaptative Entity unit-test """ @staticmethod def get_default_args(width=32, **kw): """ generate default argument template """ return DefaultEntityArgTemplate( precision=ML_Int32, debug_flag=False, target=VHDLBackend(), output_file="my_adapative_entity.vhd", entity_name="my_adaptative_entity", language=VHDL_Code, width=width, passes=[ ("beforepipelining:dump_with_stages"), ("beforepipelining:size_datapath"), ("beforepipelining:dump_with_stages"), ("beforepipelining:rtl_legalize"), ("beforepipelining:dump_with_stages"), ("beforepipelining:unify_pipeline_stages"), ("beforepipelining:dump_with_stages"), ], ) def __init__(self, arg_template=None): """ Initialize """ # building default arg_template if necessary arg_template = PipelinedBench.get_default_args() if \ arg_template is None else arg_template # initializing I/O precision self.width = arg_template.width precision = arg_template.precision io_precisions = [precision] * 2 Log.report( Log.Info, "generating Adaptative Entity with width={}".format(self.width)) # initializing base class ML_EntityBasis.__init__(self, base_name="adaptative_design", arg_template=arg_template) self.accuracy = arg_template.accuracy self.precision = arg_template.precision int_size = 3 frac_size = 7 self.input_precision = fixed_point(int_size, frac_size) self.output_precision = fixed_point(int_size, frac_size) def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "input_precision is {}".format(self.input_precision)) Log.report(Log.Info, "output_precision is {}".format(self.output_precision)) # declaring main input variable var_x = self.implementation.add_input_signal("x", self.input_precision) var_y = self.implementation.add_input_signal("y", self.input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) self.implementation.start_new_stage() add = var_x + var_y self.implementation.start_new_stage() sub = add - var_y self.implementation.start_new_stage() pre_result = sub - var_x self.implementation.start_new_stage() post_result = pre_result + var_x result = Conversion(pre_result, precision=self.output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation] standard_test_cases = [] def numeric_emulate(self, io_map): """ Meta-Function numeric emulation """ vx = io_map["x"] vy = io_map["y"] result = {"vr_out": vx} return result @staticmethod def __call__(args): # just ignore args here and trust default constructor? # seems like a bad idea. ut_adaptative_entity = PipelinedBench(args) ut_adaptative_entity.gen_implementation() return True