def legalize_invsqrt_seed(optree): """ Legalize an InverseSquareRootSeed optree """ assert isinstance(optree, ReciprocalSquareRootSeed) op_prec = optree.get_precision() # input = 1.m_hi-m_lo * 2^e # approx = 2^(-int(e/2)) * approx_insqrt(1.m_hi) * (e % 2 ? 1.0 : ~2**-0.5) op_input = optree.get_input(0) convert_back = False approx_prec = ML_Binary32 if op_prec != approx_prec: op_input = Conversion(op_input, precision=ML_Binary32) convert_back = True # TODO: fix integer precision selection # as we are in a late code generation stage, every node's precision # must be set op_exp = ExponentExtraction(op_input, tag="op_exp", debug=debug_multi, precision=ML_Int32) neg_half_exp = Division(Negation(op_exp, precision=ML_Int32), Constant(2, precision=ML_Int32), precision=ML_Int32) approx_exp = ExponentInsertion(neg_half_exp, tag="approx_exp", debug=debug_multi, precision=approx_prec) op_exp_parity = Modulo(op_exp, Constant(2, precision=ML_Int32), precision=ML_Int32) approx_exp_correction = Select(Equal(op_exp_parity, Constant(0, precision=ML_Int32)), Constant(1.0, precision=approx_prec), Select(Equal( op_exp_parity, Constant(-1, precision=ML_Int32)), Constant(S2**0.5, precision=approx_prec), Constant(S2**-0.5, precision=approx_prec), precision=approx_prec), precision=approx_prec, tag="approx_exp_correction", debug=debug_multi) table_index = invsqrt_approx_table.get_index_function()(op_input) table_index.set_attributes(tag="invsqrt_index", debug=debug_multi) approx = Multiplication(TableLoad(invsqrt_approx_table, table_index, precision=approx_prec), Multiplication(approx_exp_correction, approx_exp, precision=approx_prec), tag="invsqrt_approx", debug=debug_multi, precision=approx_prec) if approx_prec != op_prec: return Conversion(approx, precision=op_prec) else: return approx
def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = hdl_precision_parser("FU%d.%d" % (int_size, frac_size)) output_precision = hdl_precision_parser("FS%d.%d" % (int_size, frac_size)) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_z = self.implementation.add_input_signal("z", input_precision) abstract_formulae = var_x anchor = FixedPointPosition(abstract_formulae, -3, align=FixedPointPosition.FromPointToMSB, tag="anchor") comp = abstract_formulae > anchor result = Select(comp, Conversion(var_x, precision=self.precision), Conversion(var_y, precision=self.precision)) self.implementation.add_output_signal("result", result) return [self.implementation]
def mantissa_extraction_modifier_from_fields(op, field_op, exp_is_zero, tag="mant_extr"): """ Legalizing a MantissaExtraction node into a sub-graph of basic operation, assuming <field_op> bitfield and <exp_is_zero> flag are already available """ op_precision = op.get_precision().get_base_format() implicit_digit = Select( exp_is_zero, Constant(0, precision=ML_StdLogic), Constant(1, precision=ML_StdLogic), precision=ML_StdLogic, tag=tag + "_implicit_digit", ) result = Concatenation( implicit_digit, TypeCast(field_op, precision=ML_StdLogicVectorFormat( op_precision.get_field_size())), precision=ML_StdLogicVectorFormat(op_precision.get_mantissa_size()), ) return result
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug = debug_fixed) var_y.set_attributes(debug = debug_fixed) test = (var_x > 1) test.set_attributes(tag = "test", debug = debug_std) large_add = (var_x + var_y) pre_result = Select( test, 1, large_add, tag = "pre_result", debug = debug_fixed ) result = Conversion(pre_result, precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def merge_product_in_heap(operand_list, pos_bit_heap, neg_bit_heap): """ generate product operand_list[0] * operand_list[1] and insert all the partial products into the heaps @p pos_bit_heap (positive bits) and @p neg_bit_heap (negative bits) """ a_i, b_i = operand_list if self.booth_mode: booth_radix4_multiply(a_i, b_i, pos_bit_heap, neg_bit_heap) else: # non-booth product generation a_i_precision = a_i.get_precision() b_i_precision = b_i.get_precision() a_i_signed = a_i_precision.get_signed() b_i_signed = b_i.get_precision().get_signed() unsigned_prod = not (a_i_signed) and not (b_i_signed) a_i_size = a_i_precision.get_bit_size() b_i_size = b_i_precision.get_bit_size() for pp_index in range(a_i_size): a_j_signed = a_i_signed and (pp_index == a_i_size - 1) bit_a_j = BitSelection(a_i, pp_index) pp = Select(equal_to(bit_a_j, 1), b_i, 0) offset = pp_index - a_i_precision.get_frac_size() for b_index in range(b_i_size): b_k_signed = b_i_signed and (b_index == b_i_size - 1) pp_signed = a_j_signed ^ b_k_signed pp_weight = offset + b_index local_bit = BitSelection(pp, b_index) if pp_signed: neg_bit_heap.insert_bit(pp_weight, local_bit) else: pos_bit_heap.insert_bit(pp_weight, local_bit)
def generate_scheme(self): """ main scheme generation """ Log.report(Log.Info, "width parameter is {}".format(self.width)) int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) var_y = self.implementation.add_input_signal("y", input_precision) var_x.set_attributes(debug=debug_fixed) var_y.set_attributes(debug=debug_fixed) test = (var_x > 1) test.set_attributes(tag="test", debug=debug_std) sub = var_x - var_y c = Constant(0) pre_result_select = Select(c > sub, Select(c < var_y, sub, Select(LogicalAnd( c > var_x, c < var_y, tag="last_lev_cond"), var_x, c, tag="last_lev_sel"), tag="pre_select"), var_y, tag="pre_result_select") pre_result = Max(0, var_x - var_y, tag="pre_result") result = Conversion(Addition(pre_result, pre_result_select, tag="add"), precision=output_precision) self.implementation.add_output_signal("vr_out", result) return [self.implementation]
def minmax_legalizer(optree): op0 = optree.get_input(0) op1 = optree.get_input(1) bool_prec = get_compatible_bool_format(optree) comp = Comparison(op0, op1, specifier=predicate, precision=bool_prec, tag="minmax_pred") # forward_stage_attributes(optree, comp) result = Select(comp, op0, op1, precision=optree.get_precision()) forward_attributes(optree, result) return result
def generate_scheme(self): int_precision = self.precision.get_integer_format() # We wish to compute vx / vy vx = self.implementation.add_input_variable("x", self.precision, interval=self.input_intervals[0]) vy = self.implementation.add_input_variable("y", self.precision, interval=self.input_intervals[1]) if self.mode is FULL_MODE: quo = self.implementation.add_input_variable("quo", ML_Pointer_Format(int_precision)) i = Variable("i", precision=int_precision, var_type=Variable.Local) q = Variable("q", precision=int_precision, var_type=Variable.Local) CI = lambda v: Constant(v, precision=int_precision) CF = lambda v: Constant(v, precision=self.precision) vx_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="vx_subnormal") vy_subnormal = Test(vy, specifier=Test.IsSubnormal, tag="vy_subnormal") DELTA_EXP = self.precision.get_mantissa_size() scale_factor = Constant(2.0**DELTA_EXP, precision=self.precision) inv_scale_factor = Constant(2.0**-DELTA_EXP, precision=self.precision) normalized_vx = Select(vx_subnormal, vx * scale_factor, vx, tag="scaled_vx") normalized_vy = Select(vy_subnormal, vy * scale_factor, vy, tag="scaled_vy") real_ex = ExponentExtraction(vx, tag="real_ex", precision=int_precision) real_ey = ExponentExtraction(vy, tag="real_ey", precision=int_precision) # if real_e<x/y> is +1023 then it may Overflow in -real_ex for ExponentInsertion # which only supports downto -1022 before falling into subnormal numbers (which are # not supported by ExponentInsertion) real_ex_h0 = real_ex / 2 real_ex_h1 = real_ex - real_ex_h0 real_ey_h0 = real_ey / 2 real_ey_h1 = real_ey - real_ey_h0 EI = lambda v: ExponentInsertion(v, precision=self.precision) mx = Abs((vx * EI(-real_ex_h0)) * EI(-real_ex_h1), tag="mx") my = Abs((vy * EI(-real_ey_h0)) * EI(-real_ey_h1), tag="pre_my") # scale_ey is used to regain the unscaling of mx in the first loop # if real_ey >= real_ex, the first loop is never executed # so a different scaling is required mx_unscaling = Select(real_ey < real_ex, real_ey, real_ex) ey_half0 = (mx_unscaling) / 2 ey_half1 = (mx_unscaling) - ey_half0 scale_ey_half0 = ExponentInsertion(ey_half0, precision=self.precision, tag="scale_ey_half0") scale_ey_half1 = ExponentInsertion(ey_half1, precision=self.precision, tag="scale_ey_half1") # if only vy is subnormal we want to normalize it #normal_cond = LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal)) normal_cond = vy_subnormal #LogicalAnd(vy_subnormal, LogicalNot(vx_subnormal)) my = Select(normal_cond, Abs(MantissaExtraction(vy * scale_factor)), my, tag="my") # vx / vy = vx * 2^-ex * 2^(ex-ey) / (vy * 2^-ey) # vx % vy post_mx = Variable("post_mx", precision=self.precision, var_type=Variable.Local) # scaling for half comparison VY_SCALING = Select(vy_subnormal, 1.0, 0.5, precision=self.precision) VX_SCALING = Select(vy_subnormal, 2.0, 1.0, precision=self.precision) def LogicalXor(a, b): return LogicalOr(LogicalAnd(a, LogicalNot(b)), LogicalAnd(LogicalNot(a), b)) rem_sign = Select(vx < 0, CF(-1), CF(1), precision=self.precision, tag="rem_sign") quo_sign = Select(LogicalXor(vx <0, vy < 0), CI(-1), CI(1), precision=int_precision, tag="quo_sign") loop_watchdog = Variable("loop_watchdog", precision=ML_Int32, var_type=Variable.Local) loop = Statement( real_ex, real_ey, mx, my, loop_watchdog, ReferenceAssign(loop_watchdog, 5000), ReferenceAssign(q, CI(0)), Loop( ReferenceAssign(i, CI(0)), i < (real_ex - real_ey), Statement( ReferenceAssign(i, i+CI(1)), ReferenceAssign(q, ((q << 1) + Select(mx >= my, CI(1), CI(0))).modify_attributes(tag="step1_q")), ReferenceAssign(mx, (CF(2) * (mx - Select(mx >= my, my, CF(0)))).modify_attributes(tag="step1_mx")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), # unscaling remainder ReferenceAssign(mx, ((mx * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem")), ReferenceAssign(my, ((my * scale_ey_half0) * scale_ey_half1).modify_attributes(tag="scaled_rem_my")), Loop( Statement(), (my > Abs(vy)), Statement( ReferenceAssign(q, ((q << 1) + Select(mx >= Abs(my), CI(1), CI(0))).modify_attributes(tag="step2_q")), ReferenceAssign(mx, (mx - Select(mx >= Abs(my), Abs(my), CF(0))).modify_attributes(tag="step2_mx")), ReferenceAssign(my, (my * 0.5).modify_attributes(tag="step2_my")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), ReferenceAssign(q, q << 1), Loop( ReferenceAssign(i, CI(0)), mx > Abs(vy), Statement( ReferenceAssign(q, (q + Select(mx > Abs(vy), CI(1), CI(0))).modify_attributes(tag="step3_q")), ReferenceAssign(mx, (mx - Select(mx > Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="step3_mx")), # loop watchdog ReferenceAssign(loop_watchdog, loop_watchdog - 1), ConditionBlock(loop_watchdog < 0, Return(-1)), ), ), ReferenceAssign(q, q + Select(mx >= Abs(vy), CI(1), CI(0))), ReferenceAssign(mx, (mx - Select(mx >= Abs(vy), Abs(vy), CF(0))).modify_attributes(tag="pre_half_mx")), ConditionBlock( # actual comparison is mx > | abs(vy * 0.5) | to avoid rounding effect when # vy is subnormal we mulitply both side by 2.0**60 ((mx * VX_SCALING) > Abs(vy * VY_SCALING)).modify_attributes(tag="half_test"), Statement( ReferenceAssign(q, q + CI(1)), ReferenceAssign(mx, (mx - Abs(vy))) ) ), ConditionBlock( # if the remainder is exactly half the dividend # we need to make sure the quotient is even LogicalAnd( Equal(mx * VX_SCALING, Abs(vy * VY_SCALING)), Equal(Modulo(q, CI(2)), CI(1)), ), Statement( ReferenceAssign(q, q + CI(1)), ReferenceAssign(mx, (mx - Abs(vy))) ) ), ReferenceAssign(mx, rem_sign * mx), ReferenceAssign(q, Modulo(TypeCast(q, precision=self.precision.get_unsigned_integer_format()), Constant(2**self.quotient_size, precision=self.precision.get_unsigned_integer_format()), tag="mod_q") ), ReferenceAssign(q, quo_sign * q), ) # NOTES: Warning QuotientReturn must always preceeds RemainderReturn if self.mode is QUOTIENT_MODE: # QuotientReturn = Return RemainderReturn = lambda _: Statement() elif self.mode is REMAINDER_MODE: QuotientReturn = lambda _: Statement() RemainderReturn = Return elif self.mode is FULL_MODE: QuotientReturn = lambda v: ReferenceAssign(Dereference(quo, precision=int_precision), v) RemainderReturn = Return else: raise NotImplemented # quotient invalid value QUO_INVALID_VALUE = 0 mod_scheme = Statement( # x or y is NaN, a NaN is returned ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ), ), # ConditionBlock( Test(vy, specifier=Test.IsZero), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ), ), ConditionBlock( Test(vx, specifier=Test.IsZero), Statement( QuotientReturn(0), RemainderReturn(vx) ), ), ConditionBlock( Test(vx, specifier=Test.IsInfty), Statement( QuotientReturn(QUO_INVALID_VALUE), RemainderReturn(FP_QNaN(self.precision)) ) ), ConditionBlock( Test(vy, specifier=Test.IsInfty), Statement( QuotientReturn(0), RemainderReturn(vx), ) ), ConditionBlock( Abs(vx) < Abs(vy * 0.5), Statement( QuotientReturn(0), RemainderReturn(vx), ) ), ConditionBlock( Equal(vx, vy), Statement( QuotientReturn(1), # 0 with the same sign as x RemainderReturn(vx - vx), ), ), ConditionBlock( Equal(vx, -vy), Statement( # quotient is -1 QuotientReturn(-1), # 0 with the same sign as x RemainderReturn(vx - vx), ), ), loop, QuotientReturn(q), RemainderReturn(mx), ) quo_scheme = Statement( # x or y is NaN, a NaN is returned ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsNaN), Test(vy, specifier=Test.IsNaN)), Return(QUO_INVALID_VALUE), ), # ConditionBlock( Test(vy, specifier=Test.IsZero), Return(QUO_INVALID_VALUE), ), ConditionBlock( Test(vx, specifier=Test.IsZero), Return(0), ), ConditionBlock( Test(vx, specifier=Test.IsInfty), Return(QUO_INVALID_VALUE), ), ConditionBlock( Test(vy, specifier=Test.IsInfty), Return(QUO_INVALID_VALUE), ), ConditionBlock( Abs(vx) < Abs(vy * 0.5), Return(0), ), ConditionBlock( Equal(vx, vy), Return(1), ), ConditionBlock( Equal(vx, -vy), Return(-1), ), loop, Return(q), ) return mod_scheme
def generate_scheme(self): # We wish to compute vx / vy vx = self.implementation.add_input_variable( "x", self.precision, interval=self.input_intervals[0]) vy = self.implementation.add_input_variable( "y", self.precision, interval=self.input_intervals[1]) # maximum exponent magnitude (to avoid overflow/ underflow during # intermediary computations int_prec = self.precision.get_integer_format() max_exp_mag = Constant(self.precision.get_emax() - 1, precision=int_prec) exact_ex = ExponentExtraction(vx, tag="exact_ex", precision=int_prec, debug=debug_multi) exact_ey = ExponentExtraction(vy, tag="exact_ey", precision=int_prec, debug=debug_multi) ex = Max(Min(exact_ex, max_exp_mag, precision=int_prec), -max_exp_mag, tag="ex", precision=int_prec) ey = Max(Min(exact_ey, max_exp_mag, precision=int_prec), -max_exp_mag, tag="ey", precision=int_prec) Attributes.set_default_rounding_mode(ML_RoundToNearest) Attributes.set_default_silent(True) # computing the inverse square root init_approx = None scaling_factor_x = ExponentInsertion(-ex, tag="sfx_ei", precision=self.precision, debug=debug_multi) scaling_factor_y = ExponentInsertion(-ey, tag="sfy_ei", precision=self.precision, debug=debug_multi) def test_interval_out_of_bound_risk(x_range, y_range): """ Try to determine from x and y's interval if there is a risk of underflow or overflow """ div_range = abs(x_range / y_range) underflow_risk = sollya.inf(div_range) < S2**( self.precision.get_emin_normal() + 2) overflow_risk = sollya.sup(div_range) > S2**( self.precision.get_emax() - 2) return underflow_risk or overflow_risk out_of_bound_risk = (self.input_intervals[0] is None or self.input_intervals[1] is None ) or test_interval_out_of_bound_risk( self.input_intervals[0], self.input_intervals[1]) Log.report(Log.Debug, "out_of_bound_risk: {}".format(out_of_bound_risk)) # scaled version of vx and vy, to avoid overflow and underflow if out_of_bound_risk: scaled_vx = vx * scaling_factor_x scaled_vy = vy * scaling_factor_y scaled_interval = MetaIntervalList( [MetaInterval(Interval(-2, -1)), MetaInterval(Interval(1, 2))]) scaled_vx.set_attributes(tag="scaled_vx", debug=debug_multi, interval=scaled_interval) scaled_vy.set_attributes(tag="scaled_vy", debug=debug_multi, interval=scaled_interval) seed_interval = 1 / scaled_interval print("seed_interval=1/{}={}".format(scaled_interval, seed_interval)) else: scaled_vx = vx scaled_vy = vy seed_interval = 1 / scaled_vy.get_interval() # We need a first approximation to 1 / scaled_vy dummy_seed = ReciprocalSeed(EmptyOperand(precision=self.precision), precision=self.precision) if self.processor.is_supported_operation(dummy_seed, self.language): init_approx = ReciprocalSeed(scaled_vy, precision=self.precision, tag="init_approx", debug=debug_multi) else: # generate tabulated version of seed raise NotImplementedError current_approx_std = init_approx # correctly-rounded inverse computation num_iteration = self.num_iter Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() # check if inputs are zeros x_zero = Test(vx, specifier=Test.IsZero, likely=False, precision=ML_Bool) y_zero = Test(vy, specifier=Test.IsZero, likely=False, precision=ML_Bool) comp_sign = Test(vx, vy, specifier=Test.CompSign, tag="comp_sign", debug=debug_multi) # check if divisor is NaN y_nan = Test(vy, specifier=Test.IsNaN, likely=False, precision=ML_Bool) # check if inputs are signaling NaNs x_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, precision=ML_Bool) y_snan = Test(vy, specifier=Test.IsSignalingNaN, likely=False, precision=ML_Bool) # check if inputs are infinities x_inf = Test(vx, specifier=Test.IsInfty, likely=False, tag="x_inf", precision=ML_Bool) y_inf = Test(vy, specifier=Test.IsInfty, likely=False, tag="y_inf", debug=debug_multi, precision=ML_Bool) scheme = None gappa_vx, gappa_vy = None, None # initial reciprocal approximation of 1.0 / scaled_vy inv_iteration_list, recp_approx = compute_reduced_reciprocal( init_approx, scaled_vy, self.num_iter) recp_approx.set_attributes(tag="recp_approx", debug=debug_multi) # approximation of scaled_vx / scaled_vy yerr_last, reduced_div_approx, div_iteration_list = compute_reduced_division( scaled_vx, scaled_vy, recp_approx) eval_error_range, div_eval_error_range = self.solve_eval_error( init_approx, recp_approx, reduced_div_approx, scaled_vx, scaled_vy, inv_iteration_list, div_iteration_list, S2**-7, seed_interval) eval_error = sup(abs(eval_error_range)) recp_interval = 1 / scaled_vy.get_interval() + eval_error_range recp_approx.set_interval(recp_interval) div_interval = scaled_vx.get_interval() / scaled_vy.get_interval( ) + div_eval_error_range reduced_div_approx.set_interval(div_interval) reduced_div_approx.set_tag("reduced_div_approx") if out_of_bound_risk: unscaled_result = scaling_div_result(reduced_div_approx, ex, scaling_factor_y, self.precision) subnormal_result = subnormalize_result(recp_approx, reduced_div_approx, ex, ey, yerr_last, self.precision) else: unscaled_result = reduced_div_approx subnormal_result = reduced_div_approx x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False) y_inf_or_nan = Test(vy, specifier=Test.IsInfOrNaN, likely=False, tag="y_inf_or_nan", debug=debug_multi) # generate IEEE exception raising only of libm-compliant # mode is enabled enable_raise = self.libm_compliant # managing special cases # x inf and y inf pre_scheme = ConditionBlock( x_inf_or_nan, ConditionBlock( x_inf, ConditionBlock( y_inf_or_nan, Statement( # signaling NaNs raise invalid operation flags ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)), ), ConditionBlock(comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))), Statement( ConditionBlock(x_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)))), ConditionBlock( x_zero, ConditionBlock( LogicalOr(y_zero, y_nan, precision=ML_Bool), Statement( ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision))), Return(vx)), ConditionBlock( y_inf_or_nan, ConditionBlock( y_inf, Return( Select(comp_sign, FP_MinusZero(self.precision), FP_PlusZero(self.precision))), Statement( ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)))), ConditionBlock( y_zero, Statement( Raise(ML_FPE_DivideByZero) if enable_raise else Statement(), ConditionBlock( comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))), # managing numerical value result cases Statement( recp_approx, reduced_div_approx, ConditionBlock( Test(unscaled_result, specifier=Test.IsSubnormal, likely=False), # result is subnormal Statement( # inexact flag should have been raised when computing yerr_last # ConditionBlock( # Comparison( # yerr_last, 0, # specifier=Comparison.NotEqual, likely=True), # Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow)) #), Return(subnormal_result), ), # result is normal Statement( # inexact flag should have been raised when computing yerr_last #ConditionBlock( # Comparison( # yerr_last, 0, # specifier=Comparison.NotEqual, likely=True), # Raise(ML_FPE_Inexact) #), Return(unscaled_result))), ))))) # managing rounding mode save and restore # to ensure intermediary computations are performed in round-to-nearest # clearing exception before final computation #rnd_mode = GetRndMode() #scheme = Statement( # rnd_mode, # SetRndMode(ML_RoundToNearest), # yerr_last, # SetRndMode(rnd_mode), # unscaled_result, # ClearException(), # pre_scheme #) scheme = pre_scheme return scheme
def generate_scalar_scheme(self, vx, vy): # fixing inputs' node tag vx.set_attributes(tag="x") vy.set_attributes(tag="y") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # pow(x, n) = x^(y) # = exp(y * log(x)) # = 2^(y * log2(x)) # = 2^(y * (log2(m) + e)) # e = ExponentExtraction(vx, tag="e", precision=int_precision) m = MantissaExtraction(vx, tag="m", precision=self.precision) # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter= lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) r = Multiplication(log_approx, vy, tag="r", debug=debug_multi) # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e) # # log_approx = log2(Abs(m)) # r = y * log_approx ~ y * log2(m) # # NOTES: manage cases where e is negative and # (y * log2(m)) AND (y * e) could cancel out # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT # be of opposite signs # log2(m) in [0, 1[ so cancellation can occur only if e == -1 # we split 2^x in 2^x = 2^t0 * 2^t1 # if e < 0: t0 = y * (log2(m) + e), t1=0 # else: t0 = y * log2(m), t1 = y * e t_cond = e < 0 # e_y ~ e * y e_f = Conversion(e, precision=self.precision) #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0") #NearestInteger(t0, precision=self.precision, tag="t0_int") EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision) LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision) t0_int = Select(t_cond, EY + LY, EY, tag="t0_int") t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac") #t0_frac.set_attributes(tag="t0_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True) exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi) exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int") t1 = Select(t_cond, Constant(0, precision=self.precision), r) exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True) exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi) result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) y_int = NearestInteger(vy, precision=self.precision) y_is_integer = Equal(y_int, vy) y_is_even = LogicalOr( # if y is a number (exc. inf) greater than 2**mantissa_size * 2, # then it is an integer multiple of 2 => even Abs(vy) >= 2**(self.precision.get_mantissa_size()+1), LogicalAnd( y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1), # we want to limit the modulo computation to an integer input Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0) ) ) y_is_odd = LogicalAnd( LogicalAnd( Abs(vy) < 2**(self.precision.get_mantissa_size()+1), y_is_integer ), Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1) ) # special cases management special_case_results = Statement( # x is sNaN OR y is sNaN ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)), Return(FP_QNaN(self.precision)) ), # pow(x, ±0) is 1 if x is not a signaling NaN ConditionBlock( Test(vy, specifier=Test.IsZero), Return(Constant(1.0, precision=self.precision)) ), # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)), Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))), ), # pow(±0, −∞) is +∞ with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_MinusInfty(self.precision)), ), # pow(±0, +∞) is +0 with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is ±0 for finite y>0 an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)), Return(vx), ), # pow(−1, ±∞) is 1 with no exception ConditionBlock( LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)), Return(Constant(1.0, precision=self.precision)), ), # pow(+1, y) is 1 for any y (even a quiet NaN) ConditionBlock( vx == 1, Return(Constant(1.0, precision=self.precision)), ), # pow(x, +∞) is +0 for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusZero(self.precision)) ), # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +∞ for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +0 for a number y < 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +∞ for a number y > 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0), Return(FP_PlusInfty(self.precision)) ), # pow(−∞, y) is −0 for finite y < 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)), Return(FP_MinusZero(self.precision)), ), # pow(−∞, y) is −∞ for finite y > 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)), Return(FP_MinusInfty(self.precision)), ), # pow(−∞, y) is +0 for finite y < 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusZero(self.precision)), ), # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer # TODO: signal divideByZero exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +0 for finite y>0 and not an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusZero(self.precision)), ), ) # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = Statement( special_case_results, # fallback default cases Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac)) return result
def generate_scalar_scheme(self, vx): """ Generating implementation script for hyperic tangent meta-function """ # tanh(x) = sinh(x) / cosh(x) # = (e^x - e^-x) / (e^x + e^-x) # = (e^(2x) - 1) / (e^(2x) + 1) # when x -> +inf, tanh(x) -> 1 # when x -> -inf, tanh(x) -> -1 # ~0 e^x ~ 1 + x - x^2 / 2 + x^3 / 6 + ... # e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ... # when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x # We can divide the input interval into 3 parts # positive, around 0, and finally negative # Possible argument reduction # x = m.2^E = k * log(2) + r # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1) # = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k) # # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1) # = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1) # = 1 - 2 / (e^(2x) + 1) # tanh is odd so we reduce the computation to the absolute value of # vx abs_vx = Abs(vx, precision=self.precision) # if p is the expected output precision # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps # where eps < 1/2 * 2^-p p = self.precision.get_mantissa_size() high_bound = (p + 2) * sollya.log(2) / 2 near_zero_bound = 0.125 interval_num = 1024 Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) interval_size = (high_bound - near_zero_bound) / (1024) new_interval_size = S2**int(sollya.log2(interval_size)) interval_num *= 2 high_bound = new_interval_size * interval_num + near_zero_bound Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) ERROR_THRESHOLD = S2**-p Log.report(Log.Info, "ERROR_THRESHOLD={}", ERROR_THRESHOLD) # Near 0 approximation near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero( sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx) # approximation parameters poly_degree = 7 approx_interval = Interval(near_zero_bound, high_bound) sollya.settings.points = 117 approx_scheme, approx_error = piecewise_approximation( sollya.tanh, abs_vx, self.precision, bound_low=near_zero_bound, bound_high=high_bound, num_intervals=interval_num, max_degree=poly_degree, error_threshold=ERROR_THRESHOLD) Log.report(Log.Warning, "approx_error={}".format(approx_error)) comp_near_zero_bound = abs_vx < near_zero_bound comp_near_zero_bound.set_attributes(tag="comp_near_zero_bound", debug=debug_multi) comp_high_bound = abs_vx < high_bound comp_high_bound.set_attributes(tag="comp_high_bound", debug=debug_multi) complete_scheme = Select( comp_near_zero_bound, near_zero_scheme, Select(comp_high_bound, approx_scheme, Constant(1.0, precision=self.precision))) scheme = Return(Select(vx < 0, Negation(complete_scheme), complete_scheme), precision=self.precision) return scheme
def generic_atan2_generate(self, _vx, vy=None): """ if vy is None, compute atan(_vx), else compute atan2(vy / vx) """ if vy is None: # approximation # if abs_vx <= 1.0 then atan(abx_vx) is directly approximated # if abs_vx > 1.0 then atan(abs_vx) = pi/2 - atan(1 / abs_vx) # # for vx >= 0, atan(vx) = atan(abs_vx) # # for vx < 0, atan(vx) = -atan(abs_vx) for vx < 0 # = -pi/2 + atan(1 / abs_vx) vx = _vx sign_cond = vx < 0 abs_vx = Select(vx < 0, -vx, vx, tag="abs_vx", debug=debug_multi) bound_cond = abs_vx > 1 inv_abs_vx = 1 / abs_vx # condition to select subtraction cond = LogicalOr(LogicalAnd(vx < 0, LogicalNot(bound_cond)), vx > 1, tag="cond", debug=debug_multi) # reduced argument red_vx = Select(bound_cond, inv_abs_vx, abs_vx, tag="red_vx", debug=debug_multi) offset = None else: # bound_cond is True iff Abs(vy / _vx) > 1.0 bound_cond = Abs(vy) > Abs(_vx) bound_cond.set_attributes(tag="bound_cond", debug=debug_multi) # vx and vy are of opposite signs #sign_cond = (_vx * vy) < 0 # using cast to int(signed) and bitwise xor # to determine if _vx and vy are of opposite sign rapidly fast_sign_cond = BitLogicXor( TypeCast(_vx, precision=self.precision.get_integer_format()), TypeCast(vy, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format()) < 0 # sign_cond = (_vx * vy) < 0 sign_cond = fast_sign_cond sign_cond.set_attributes(tag="sign_cond", debug=debug_multi) # condition to select subtraction # TODO: could be accelerated if LogicalXor existed slow_cond = LogicalOr( LogicalAnd(sign_cond, LogicalNot(bound_cond)), # 1 < (vy / _vx) < 0 LogicalAnd(bound_cond, LogicalNot(sign_cond)), # (vy / _vx) > 1 tag="cond", debug=debug_multi) cond = slow_cond numerator = Select(bound_cond, _vx, vy, tag="numerator", debug=debug_multi) denominator = Select(bound_cond, vy, _vx, tag="denominator", debug=debug_multi) # reduced argument red_vx = Abs(numerator) / Abs(denominator) red_vx.set_attributes(tag="red_vx", debug=debug_multi) offset = Select( _vx > 0, Constant(0, precision=self.precision), # vx < 0 Select( sign_cond, # vy > 0 Constant(sollya.pi, precision=self.precision), Constant(-sollya.pi, precision=self.precision), precision=self.precision), precision=self.precision, tag="offset") approx_fct = sollya.atan(sollya.x) if self.method == "piecewise": sign_vx = Select(cond, -1, 1, precision=self.precision, tag="sign_vx", debug=debug_multi) cst_sign = Select(sign_cond, -1, 1, precision=self.precision, tag="cst_sign", debug=debug_multi) cst = cst_sign * Select( bound_cond, sollya.pi / 2, 0, precision=self.precision) cst.set_attributes(tag="cst", debug=debug_multi) bound_low = 0.0 bound_high = 1.0 num_intervals = self.num_sub_intervals error_threshold = S2**-(self.precision.get_mantissa_size() + 8) approx, eval_error = piecewise_approximation( approx_fct, red_vx, self.precision, bound_low=bound_low, bound_high=bound_high, max_degree=None, num_intervals=num_intervals, error_threshold=error_threshold, odd=True) result = cst + sign_vx * approx result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) elif self.method == "single": approx_interval = Interval(0, 1.0) # determining the degree of the polynomial approximation poly_degree_range = sollya.guessdegree( approx_fct / sollya.x, approx_interval, S2**-(self.precision.get_field_size() + 2)) poly_degree = int(sollya.sup(poly_degree_range)) + 4 Log.report(Log.Info, "poly_degree={}".format(poly_degree)) # arctan is an odd function, so only odd coefficient must be non-zero poly_degree_list = list(range(1, poly_degree + 1, 2)) poly_object, poly_error = Polynomial.build_from_approximation_with_error( approx_fct, poly_degree_list, [1] + [self.precision.get_sollya_object()] * (len(poly_degree_list) - 1), approx_interval) odd_predicate = lambda index, _: ((index - 1) % 4 != 0) even_predicate = lambda index, _: (index != 1 and (index - 1) % 4 == 0) poly_odd_object = poly_object.sub_poly_cond(odd_predicate, offset=1) poly_even_object = poly_object.sub_poly_cond(even_predicate, offset=1) sollya.settings.display = sollya.hexadecimal Log.report(Log.Info, "poly_error: {}".format(poly_error)) Log.report(Log.Info, "poly_odd: {}".format(poly_odd_object)) Log.report(Log.Info, "poly_even: {}".format(poly_even_object)) poly_odd = PolynomialSchemeEvaluator.generate_horner_scheme( poly_odd_object, abs_vx) poly_odd.set_attributes(tag="poly_odd", debug=debug_multi) poly_even = PolynomialSchemeEvaluator.generate_horner_scheme( poly_even_object, abs_vx) poly_even.set_attributes(tag="poly_even", debug=debug_multi) exact_sum = poly_odd + poly_even exact_sum.set_attributes(tag="exact_sum", debug=debug_multi) # poly_even should be (1 + poly_even) result = vx + vx * exact_sum result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) else: raise NotImplementedError if not offset is None: result = result + offset std_scheme = Statement(Return(result)) scheme = std_scheme return scheme
def generate_scalar_scheme(self, vx, n): # fixing inputs' node tag vx.set_attributes(tag="x") n.set_attributes(tag="n") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # rootn(x, n) = x^(1/n) # = exp(1/n * log(x)) # = 2^(1/n * log2(x)) # = 2^(1/n * (log2(m) + e)) # # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) use_reciprocal = False # non-scaled vx used to compute vx^1 unmodified_vx = vx is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal") exp_correction_factor = self.precision.get_mantissa_size() mantissa_factor = Constant(2**exp_correction_factor, tag="mantissa_factor") vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx") m = MantissaExtraction(vx, tag="m", precision=self.precision) e = ExponentExtraction(vx, tag="e", precision=int_precision) e = Select(is_subnormal, e - exp_correction_factor, e, tag="corrected_e") ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table( log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split( Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) # floating-point version of n n_f = Conversion(n, precision=self.precision, tag="n_f") inv_n = Division(Constant(1, precision=self.precision), n_f) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) if use_reciprocal: r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi) else: r = Division(log_approx, n_f, tag="r", debug=debug_multi) # e_n ~ e / n e_f = Conversion(e, precision=self.precision, tag="e_f") if use_reciprocal: e_n = Multiplication(e_f, inv_n, tag="e_n") else: e_n = Division(e_f, n_f, tag="e_n") error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n") e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int") pre_e_n_frac = e_n - e_n_int pre_e_n_frac.set_attributes(tag="pre_e_n_frac") e_n_frac = pre_e_n_frac + error_e_n * inv_n e_n_frac.set_attributes(tag="e_n_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True) exp2_r.set_attributes(tag="exp2_r", debug=debug_multi) exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac, inline_select=True) exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi) exp2_e_n_int = ExponentInsertion(Conversion(e_n_int, precision=int_precision), precision=self.precision, tag="exp2_e_n_int") n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi) n_is_odd = LogicalNot(n_is_even, tag="n_is_odd") result_sign = Select( n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) # managing n == -1 if self.expand_div: ml_division_args = ML_Division.get_default_args( precision=self.precision, input_formats=[self.precision] * 2) ml_division = ML_Division(ml_division_args) self.division_implementation = ml_division.implementation self.division_implementation.set_scheme( ml_division.generate_scheme()) ml_division_fct = self.division_implementation.get_function_object( ) else: ml_division_fct = Division # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = ConditionBlock( LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)), LogicalAnd(n_is_even, vx < 0)), Return(FP_QNaN(self.precision)), Statement( ConditionBlock( Equal(n, -1, tag="n_is_mone"), #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), Return( ml_division_fct(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), ), ConditionBlock( # rootn( ±inf, n) is +∞ for even n< 0. Test(vx, specifier=Test.IsInfty), Statement( ConditionBlock( n < 0, #LogicalAnd(n_is_odd, n < 0), Return( Select(Test(vx, specifier=Test.IsPositiveInfty), Constant(FP_PlusZero(self.precision), precision=self.precision), Constant(FP_MinusZero(self.precision), precision=self.precision), precision=self.precision)), Return(vx), ), ), ), ConditionBlock( # rootn(±0, n) is ±∞ for odd n < 0. LogicalAnd(LogicalAnd(n_is_odd, n < 0), Equal(vx, 0), tag="n_is_odd_and_neg"), Return( Select(Test(vx, specifier=Test.IsPositiveZero), Constant(FP_PlusInfty(self.precision), precision=self.precision), Constant(FP_MinusInfty(self.precision), precision=self.precision), precision=self.precision)), ), ConditionBlock( # rootn( ±0, n) is +∞ for even n< 0. LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)), Return(FP_PlusInfty(self.precision))), ConditionBlock( # rootn(±0, n) is +0 for even n > 0. LogicalAnd(n_is_even, Equal(vx, 0)), Return(vx)), ConditionBlock( Equal(n, 1), Return(unmodified_vx), Return(result_sign * exp2_r * exp2_e_n_int * exp2_e_n_frac)))) return result
def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) expected_interval = {} # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) x_interval = Interval(-10.3, 10.7) var_x.set_interval(x_interval) expected_interval[var_x] = x_interval var_y = self.implementation.add_input_signal("y", input_precision) y_interval = Interval(-17.9, 17.2) var_y.set_interval(y_interval) expected_interval[var_y] = y_interval var_z = self.implementation.add_input_signal("z", input_precision) z_interval = Interval(-7.3, 7.7) var_z.set_interval(z_interval) expected_interval[var_z] = z_interval cst = Constant(42.5, tag="cst") expected_interval[cst] = Interval(42.5) conv_ceil = Ceil(var_x, tag="ceil") expected_interval[conv_ceil] = sollya.ceil(x_interval) conv_floor = Floor(var_y, tag="floor") expected_interval[conv_floor] = sollya.floor(y_interval) mult = var_z * var_x mult.set_tag("mult") mult_interval = z_interval * x_interval expected_interval[mult] = mult_interval large_add = (var_x + var_y) - mult large_add.set_attributes(tag="large_add") large_add_interval = (x_interval + y_interval) - mult_interval expected_interval[large_add] = large_add_interval reduced_result = Max(0, Min(large_add, 13)) reduced_result.set_tag("reduced_result") reduced_result_interval = interval_max( Interval(0), interval_min(large_add_interval, Interval(13))) expected_interval[reduced_result] = reduced_result_interval select_result = Select(var_x > var_y, reduced_result, var_z, tag="select_result") select_interval = interval_union(reduced_result_interval, z_interval) expected_interval[select_result] = select_interval # checking interval evaluation for var in [ cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor ]: interval = evaluate_range(var) expected = expected_interval[var] print("{}: {} vs expected {}".format(var.get_tag(), interval, expected)) assert not interval is None assert interval == expected return [self.implementation]
def generate_scheme(self): """ Generating implementation script for hyperic tangent meta-function """ # registering the single input variable to the function vx = self.implementation.add_input_variable("x", self.precision) #Log.set_dump_stdout(True) # tanh(x) = sinh(x) / cosh(x) # = (e^x - e^-x) / (e^x + e^-x) # = (e^(2x) - 1) / (e^(2x) + 1) # when x -> +inf, tanh(x) -> 1 # when x -> -inf, tanh(x) -> -1 # ~0 e^x ~ 1 + x - x^2 / 2 + x^3 / 6 + ... # e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ... # when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x # We can divide the input interval into 3 parts # positive, around 0, and finally negative # Possible argument reduction # x = m.2^E = k * log(2) + r # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1) # = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k) # # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1) # = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1) # = 1 - 2 / (e^(2x) + 1) # tanh is odd so we reduce the computation to the absolute value of # vx abs_vx = Abs(vx, precision=self.precision) # if p is the expected output precision # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps # where eps < 1/2 * 2^-p p = self.precision.get_mantissa_size() high_bound = (p + 2) * sollya.log(2) / 2 near_zero_bound = 0.125 interval_num = 1024 interval_size = (high_bound - near_zero_bound) / (1024) new_interval_size = sollya.S2**int(sollya.log2(interval_size)) interval_num *= 2 high_bound = new_interval_size * interval_num + near_zero_bound # Near 0 approximation near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero( sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx) # approximation parameters poly_degree = 5 approx_interval = Interval(near_zero_bound, high_bound) sollya.settings.points = 117 approx_scheme, approx_error = piecewise_approximation( sollya.tanh, abs_vx, self.precision, bound_low=near_zero_bound, bound_high=high_bound, num_intervals=interval_num, max_degree=5, error_threshold=sollya.S2**-p) Log.report(Log.Warning, "approx_error={}".format(approx_error)) complete_scheme = Select( abs_vx < near_zero_bound, near_zero_scheme, Select(abs_vx < high_bound, approx_scheme, Constant(1.0, precision=self.precision))) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") scheme = Return(Select(vx < 0, Negation(complete_scheme), complete_scheme), precision=self.precision) return scheme
def booth_radix4_multiply(lhs, rhs, pos_bit_heap, neg_bit_heap): """ Compute the multiplication @p lhs x @p rhs using radix 4 Booth recoding and drop the generated partial product in @p pos_bit_heap and @p neg_bit_heap based on their sign """ # booth recoded partial product for n-th digit # is based on digit from n-1 to n+1 # (n+1) | (n) | (n-1) | PP | # ------|-----|-------|------| # 0 | 0 | 0 | +0 | # 0 | 0 | 1 | +X | # 0 | 1 | 0 | +X | # 0 | 1 | 1 | +2x | # 1 | 0 | 0 | -2X | # 1 | 0 | 1 | -X | # 1 | 1 | 0 | -X | # 1 | 1 | 1 | +0 | # ------|-----|-------|------| assert lhs.get_precision().get_bit_size() >= 2 # lhs is the recoded operand # RECODING DIGITS # first recoded digit is padded right by 0 first_digit = Concatenation(SubSignalSelection( lhs, 0, 1, precision=ML_StdLogicVectorFormat(2)), Constant(0, precision=ML_StdLogic), precision=ML_StdLogicVectorFormat(3), debug=debug_std, tag="booth_digit_0") digit_list = [(first_digit, 0)] for digit_index in range(2, lhs.get_precision().get_bit_size(), 2): if digit_index + 1 < lhs.get_precision().get_bit_size(): # digits exist completely in lhs digit = SubSignalSelection(lhs, digit_index - 1, digit_index + 1, tag="booth_digit_%d" % digit_index, debug=debug_std) else: # MSB padding required sign_ext = Constant(0, precision=ML_StdLogic) if not ( lhs.get_precision().get_signed()) else BitSelection( lhs, lhs.get_precision().get_bit_size() - 1) digit = Concatenation(sign_ext, SubSignalSelection(lhs, digit_index - 1, digit_index), precision=ML_StdLogicVectorFormat(3), debug=debug_std, tag="booth_digit_%d" % digit_index) digit_list.append((digit, digit_index)) # if lhs size is a mutiple of two and it is unsigned # than an extra digit must be generated to ensure a positive result if lhs.get_precision().get_bit_size() % 2 == 0 and not ( lhs.get_precision().get_signed()): digit_index = lhs.get_precision().get_bit_size() - 1 digit = Concatenation(Constant(0, precision=ML_StdLogicVectorFormat(2)), BitSelection(lhs, digit_index), precision=ML_StdLogicVectorFormat(3), debug=debug_std, tag="booth_digit_%d" % (digit_index + 1)) digit_list.append((digit, digit_index + 1)) def DCV(value): """ Digit Constante Value """ return Constant(value, precision=ML_StdLogicVectorFormat(3)) # PARTIAL PRODUCT GENERATION # Radix-4 booth recoding requires the following Partial Products # -2.rhs, -rhs, 0, rhs and 2.rhs # Negative PP are obtained by 1's complement of the value correctly shifted # adding a positive one to the LSB (inserted separately) and assuming # MSB digit has a negative weight for digit, index in digit_list: pp_zero = LogicalOr(Equal(digit, DCV(0), precision=ML_Bool), Equal(digit, DCV(7), precision=ML_Bool), precision=ML_Bool) pp_shifted = LogicalOr(Equal(digit, DCV(3), precision=ML_Bool), Equal(digit, DCV(4), precision=ML_Bool), precision=ML_Bool) # excluding zero case pp_neg_bit = BitSelection(digit, 2) pp_neg = equal_to(pp_neg_bit, 1) pp_neg_lsb_carryin = Select(LogicalAnd(pp_neg, LogicalNot(pp_zero)), Constant(1, precision=ML_StdLogic), Constant(0, precision=ML_StdLogic), tag="pp_%d_neg_lsb_carryin" % index, debug=debug_std) # LSB digit lsb_pp_digit = Select(pp_shifted, Constant(0, precision=ML_StdLogic), BitSelection(rhs, 0), precision=ML_StdLogic) lsb_local_pp = Select(pp_zero, Constant(0, precision=ML_StdLogic), Select(pp_neg, BitLogicNegate(lsb_pp_digit), lsb_pp_digit, precision=ML_StdLogic), debug=debug_std, tag="lsb_local_pp_%d" % index, precision=ML_StdLogic) pos_bit_heap.insert_bit(index, lsb_local_pp) pos_bit_heap.insert_bit(index, pp_neg_lsb_carryin) # other digits rhs_size = rhs.get_precision().get_bit_size() for k in range(1, rhs_size): pp_digit = Select(pp_shifted, BitSelection(rhs, k - 1), BitSelection(rhs, k), precision=ML_StdLogic) local_pp = Select(pp_zero, Constant(0, precision=ML_StdLogic), Select(pp_neg, BitLogicNegate(pp_digit), pp_digit, precision=ML_StdLogic), debug=debug_std, tag="local_pp_%d_%d" % (index, k), precision=ML_StdLogic) pos_bit_heap.insert_bit(index + k, local_pp) # MSB digit msb_pp_digit = pp_digit = Select( pp_shifted, BitSelection(rhs, rhs_size - 1), # TODO: fix for signed rhs Constant(0, precision=ML_StdLogic) if not (rhs.get_precision().get_signed()) else BitSelection( rhs, rhs_size - 1), precision=ML_StdLogic) msb_pp = Select(pp_zero, Constant(0, precision=ML_StdLogic), Select(pp_neg, BitLogicNegate(msb_pp_digit), msb_pp_digit, precision=ML_StdLogic), debug=debug_std, tag="msb_pp_%d" % (index), precision=ML_StdLogic) if rhs.get_precision().get_signed(): neg_bit_heap.insert_bit(index + rhs_size, msb_pp) else: pos_bit_heap.insert_bit(index + rhs_size, msb_pp) # MSB negative digit, # 'rhs_size + index) is the position of the MSB digit of rhs shifted by 1 # we add +1 to get to the sign position neg_bit_heap.insert_bit(index + rhs_size + 1, pp_neg_lsb_carryin)
def generate_scheme(self): """ main scheme generation """ int_size = 3 frac_size = self.width - int_size input_precision = fixed_point(int_size, frac_size) output_precision = fixed_point(int_size, frac_size) expected_interval = {} # declaring main input variable var_x = self.implementation.add_input_signal("x", input_precision) x_interval = Interval(-10.3,10.7) var_x.set_interval(x_interval) expected_interval[var_x] = x_interval var_y = self.implementation.add_input_signal("y", input_precision) y_interval = Interval(-17.9,17.2) var_y.set_interval(y_interval) expected_interval[var_y] = y_interval var_z = self.implementation.add_input_signal("z", input_precision) z_interval = Interval(-7.3,7.7) var_z.set_interval(z_interval) expected_interval[var_z] = z_interval cst = Constant(42.5, tag = "cst") expected_interval[cst] = Interval(42.5) conv_ceil = Ceil(var_x, tag = "ceil") expected_interval[conv_ceil] = sollya.ceil(x_interval) conv_floor = Floor(var_y, tag = "floor") expected_interval[conv_floor] = sollya.floor(y_interval) mult = var_z * var_x mult.set_tag("mult") mult_interval = z_interval * x_interval expected_interval[mult] = mult_interval large_add = (var_x + var_y) - mult large_add.set_attributes(tag = "large_add") large_add_interval = (x_interval + y_interval) - mult_interval expected_interval[large_add] = large_add_interval var_x_lzc = CountLeadingZeros(var_x, tag="var_x_lzc") expected_interval[var_x_lzc] = Interval(0, input_precision.get_bit_size()) reduced_result = Max(0, Min(large_add, 13)) reduced_result.set_tag("reduced_result") reduced_result_interval = interval_max( Interval(0), interval_min( large_add_interval, Interval(13) ) ) expected_interval[reduced_result] = reduced_result_interval select_result = Select( var_x > var_y, reduced_result, var_z, tag = "select_result" ) select_interval = interval_union(reduced_result_interval, z_interval) expected_interval[select_result] = select_interval # floating-point operation on mantissa and exponents fp_x_range = Interval(-0.01, 100) unbound_fp_var = Variable("fp_x", precision=ML_Binary32, interval=fp_x_range) mant_fp_x = MantissaExtraction(unbound_fp_var, tag="mant_fp_x", precision=ML_Binary32) exp_fp_x = ExponentExtraction(unbound_fp_var, tag="exp_fp_x", precision=ML_Int32) ins_exp_fp_x = ExponentInsertion(exp_fp_x, tag="ins_exp_fp_x", precision=ML_Binary32) expected_interval[unbound_fp_var] = fp_x_range expected_interval[exp_fp_x] = Interval( sollya.floor(sollya.log2(sollya.inf(abs(fp_x_range)))), sollya.floor(sollya.log2(sollya.sup(abs(fp_x_range)))) ) expected_interval[mant_fp_x] = Interval(1, 2) expected_interval[ins_exp_fp_x] = Interval( S2**sollya.inf(expected_interval[exp_fp_x]), S2**sollya.sup(expected_interval[exp_fp_x]) ) # checking interval evaluation for var in [var_x_lzc, exp_fp_x, unbound_fp_var, mant_fp_x, ins_exp_fp_x, cst, var_x, var_y, mult, large_add, reduced_result, select_result, conv_ceil, conv_floor]: interval = evaluate_range(var) expected = expected_interval[var] print("{}: {}".format(var.get_tag(), interval)) print(" vs expected {}".format(expected)) assert not interval is None assert interval == expected return [self.implementation]