def check_dTF(self, tf, margin=0, prec=165): """ Check if a transfer function satisfy the Gabarit This is done using Sollya and gabarit.sol Parameters: - tf: (dTF) transfer function we want to check - margin: margin we can tolerate in the check (not in dB) - prec: (int) precision in bits given to Sollya.checkModulusFilterInSpecification Returns a tuple (isOk, res) - isOk: True if the transfer function is in the gabarit - res: sollya object embedded the result """ Gabarit.readyToRunWithSollya() # get num,den as sollya objects num, den = tf.to_Sollya() # build the constraints to verify constraints = [b.sollyaConstraint(margin) for b in self._bands] # run sollya check # print("-> calling checkModulusFilterInSpecification") res = sollya.parse("checkModulusFilterInSpecification")(num, den, constraints, prec) sollya.parse("presentResults")(res) return dict(res)["okay"], res
def parse_gappa_interval(interval_value): # search for middle "," end_index = len(interval_value) tmp_str = re.sub("[ \[\]]", lambda _: "", interval_value) while "{" in tmp_str: start = tmp_str.index("{") end = tmp_str.index("}") tmp_str = tmp_str[:start] + tmp_str[end + 1:] v0, v1 = tmp_str.split(",") return sollya.Interval(sollya.parse(v0), sollya.parse(v1))
def parse_with_error(s): """ parse string s as a SollyaObject, raise an error if the value conversion fails """ v = sollya.parse(s) if v == sollya.error: Log.report(Log.Error, "not able to parse value {} => {}", s, v) return v
class FunctionTemplate(ScalarUnaryFunction): function_name = "func_template" def __init__(self, args=DefaultArgTemplate): # initializing base class super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for FunctionTemplate, builtin from a default argument mapping overloaded with @p kw """ default_args_exp = { "output_file": "func_template.c", "function_name": "func_template", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance() } default_args_exp.update(kw) return DefaultArgTemplate(**default_args_exp) def generate_scalar_scheme(self, vx): scheme = Statement(Return(vx), ) return scheme def numeric_emulate(self, input_value): """ Numeric emaluation of exponential """ return input_value standard_test_cases = [ (sollya.parse("0x1.1p0"), ), ]
def standard_test_cases(self): generic_list = [ # test-case #1 (sollya.parse("0x1.bbe2f2p-1"), sollya.parse("0x1.2d34ep+9")), # test-case #2 (sollya.parse("0x0p+0"), sollya.parse("0x1.a45a2ep-56"), FP_PlusZero(self.precision)), # test-case #0 (sollya.parse("0x1.5d20b8p-115"), sollya.parse("0x1.c20048p+0")), # special cases (sollya.parse("0x0p+0"), 1), (sollya.parse("0x0p+0"), 0), ] fp64_list = [ # subnormal output (sollya.parse("0x1.21998d0c5039bp-976"), sollya.parse("0x1.bc68e3d0ffd24p+3")), ] return generic_list + (fp64_list if self.precision.get_bit_size() >= 64 else [])
def numeric_emulate(self, x): """ numeric emulation """ # extracting mantissa from x # abs_x = abs(x) # mantissa = abs_x / S2**sollya.floor(sollya.log2(abs_x)) # index = sollya.floor((mantissa - 1.0) * 2**8) # result = sollya.round(1/sollya.sqrt(1.0 + index * S2**-8), 9, sollya.RN) if x == 0: return sollya.parse("infty") result = sollya.round(1.0 / x, 9, sollya.RN) return result
def parse(s, precision=None): """ parse a numerical value from a string """ obj = sollya.parse(s) if obj == SOLLYA_INFTY: return FP_PlusInfty(precision) elif obj == -SOLLYA_INFTY: return FP_MinusInfty(precision) elif obj != obj: # by default Sollya's NaNs are assumed to be quiet NaNs return FP_QNaN(precision) else: return NumericValue(obj)
def standard_test_cases(self): return [ (sollya.parse("0xbf50bc3a"),), (sollya.parse("0x1.0p-126"),), (sollya.parse("0x1.0p-127"),), (sollya.parse("-0x1.fffffep126"),), (sollya.parse("-infty"),), (sollya.parse("infty"),), (FP_QNaN(self.precision),), # issue in generic newlib implementation (sollya.parse("0x1.62e302p+6"),), ]
def WCPGmp(self, delta=2**-53): """ This functions computes the WCPG of the state-space system with absolute error bounded by delta. The result is given as a list W of sollya objects, which represents a p x q WCPG matrix. Parameters ---------- delta - bound of the absolute Returns ------- W - a list of sollya objects representing elemnts of the WCPG matrix """ import sollya # load gabarit.sol # sollya.suppressmessage(57, 174, 130, 457) sollya.execute("fipogen/LTI/wcpg.sol") wcpg = sollya.parse("wcpg") # construct the inputs for the wcpg function in sollyaObject format A, _, _ = mpf_matrix_to_sollya(self._A) B, _, _ = mpf_matrix_to_sollya(self._B) C, _, _ = mpf_matrix_to_sollya(self._C) D, _, _ = mpf_matrix_to_sollya(self._D) # W = sollya.parse("wcpg")(A, B, C, D, self._n, self._p, self._q, eps) W = wcpg(A, B, C, D, self._n, self._p, self._q, delta) return W
class FP_Divider(ML_Entity("fp_div")): def __init__( self, arg_template=DefaultEntityArgTemplate, ): # initializing base class ML_EntityBasis.__init__(self, arg_template=arg_template) self.disable_sub_testing = arg_template.disable_sub_testing self.disable_sv_testing = arg_template.disable_sv_testing self.pipelined = arg_template.pipelined ## default argument template generation @staticmethod def get_default_args(**kw): default_dict = { "precision": ML_Binary32, "target": VHDLBackend(), "output_file": "my_fp_div.vhd", "entity_name": "my_fp_div", "language": VHDL_Code, "pipelined": False, } default_dict.update(kw) return DefaultEntityArgTemplate(**default_dict) def generate_scheme(self): def get_virtual_cst(prec, value, language): return prec.get_support_format().get_cst( prec.get_base_format().get_integer_coding(value, language)) ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = VirtualFormat(base_format=self.precision, support_format=ML_StdLogicVectorFormat( self.precision.get_bit_size()), get_cst=get_virtual_cst) # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) # rounding mode input rnd_mode = self.implementation.add_input_signal( "rnd_mode", rnd_mode_format) if self.pipelined: self.implementation.add_input_signal("reset", ML_StdLogic) vx_precision = self.precision p = vx_precision.get_mantissa_size() exp_size = vx_precision.get_exponent_size() exp_vx_precision = ML_StdLogicVectorFormat( vx_precision.get_exponent_size()) mant_vx_precision = ML_StdLogicVectorFormat(p) # fixed-point precision for operand's exponent exp_fixed_precision = fixed_point(exp_size, 0, signed=False) # mantissa extraction mant_vx = TypeCast(MantissaExtraction(vx, precision=mant_vx_precision, tag="extracted_mantissa"), precision=fixed_point(1, p - 1, signed=False), debug=debug_fixed, tag="mant_vx") # exponent extraction exp_vx = TypeCast(RawExponentExtraction(vx, precision=exp_vx_precision, tag="exp_vx"), precision=exp_fixed_precision) approx_index_size = 8 approx_precision = fixed_point( 2, approx_index_size, ) # selecting table index from input mantissa MSBs tab_index = SubSignalSelection(mant_vx, p - 2 - approx_index_size + 1, p - 2, tag="tab_index") # declaring reciprocal approximation table inv_approx_table = ML_NewTable(dimensions=[2**approx_index_size], storage_precision=approx_precision, tag="inv_approx_table") for i in range(2**approx_index_size): num_input = 1 + i * S2**-approx_index_size table_value = io_precision.get_base_format().round_sollya_object( 1 / num_input) inv_approx_table[i] = table_value # extracting initial reciprocal approximation inv_approx_value = TableLoad(inv_approx_table, tab_index, precision=approx_precision, tag="inv_approx_value", debug=debug_fixed) #inv_approx_value = TypeCast(inv_approx_value, precision = approx_precision) pre_it0_input = zext( SubSignalSelection(mant_vx, p - 1 - approx_index_size, p - 1, tag="it0_input"), 1) it0_input = TypeCast(pre_it0_input, precision=approx_precision, tag="it0_input", debug=debug_fixed) it1_precision = RTL_FixedPointFormat( 2, 2 * approx_index_size, support_format=ML_StdLogicVectorFormat(2 + 2 * approx_index_size)) it1_input = mant_vx final_approx = generate_NR_iteration( mant_vx, inv_approx_value, (2, approx_index_size * 2), # mult precision (-3, 2 * approx_index_size), # error precision (2, approx_index_size * 3), # new-approx mult (2, approx_index_size * 2), # new approx precision self.implementation, pipelined=0, #1 if self.pipelined else 0, tag_suffix="_first") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, # mult precision (2, approx_index_size * 3), # error precision (-6, approx_index_size * 3), # approx mult precision (2, approx_index_size * 3), # new approx precision (2, approx_index_size * 3), self.implementation, pipelined=1 if self.pipelined else 0, tag_suffix="_second") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, # mult-precision (2, 2 * p - 1), # error precision (-(3 * approx_index_size) / 2, approx_index_size * 2 + p - 1), # mult approx mult precision (2, approx_index_size * 2 + p - 1), # approx precision (2, p), self.implementation, pipelined=2 if self.pipelined else 0, tag_suffix="_third") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx = generate_NR_iteration( mant_vx, final_approx, (2, 2 * p), (-(4 * p) / 5, 2 * p), (2, 2 * p), (2, 2 * p), self.implementation, pipelined=2 if self.pipelined else 0, tag_suffix="_last") # Inserting post-input pipeline stage if self.pipelined: self.implementation.start_new_stage() final_approx.set_attributes(tag="final_approx", debug=debug_hex) last_approx_norm = final_approx offset_bit = BitSelection(last_approx_norm, FixedPointPosition( last_approx_norm, 0, align=FixedPointPosition.FromPointToLSB), tag="offset_bit", debug=debug_std) # extracting bit to determine if result should be left-shifted and # exponent incremented not_decrement = offset_bit final_approx_reduced = SubSignalSelection( final_approx, FixedPointPosition(final_approx, -(p - 1), align=FixedPointPosition.FromPointToLSB), FixedPointPosition(final_approx, 0, align=FixedPointPosition.FromPointToLSB), precision=fixed_point(p, 0, signed=False)) final_approx_reduced_shifted = SubSignalSelection( final_approx, FixedPointPosition(final_approx, -p, align=FixedPointPosition.FromPointToLSB), FixedPointPosition(final_approx, -1, align=FixedPointPosition.FromPointToLSB), precision=fixed_point(p, 0, signed=False)) # unrounded mantissa field excluding leading digit unrounded_mant_field = Select( equal_to(not_decrement, 1), final_approx_reduced, final_approx_reduced_shifted, precision=fixed_point(p, 0, signed=False), tag="unrounded_mant_field", debug=debug_hex, ) def get_bit(optree, bit_index): bit_sel = BitSelection( optree, FixedPointPosition(optree, -bit_index, align=FixedPointPosition.FromPointToLSB)) return bit_sel mant_lsb = Select( equal_to(not_decrement, 1), get_bit(final_approx, p - 1), get_bit(final_approx, p), precision=ML_StdLogic, tag="mant_lsb", debug=debug_std, ) round_bit = Select( equal_to(not_decrement, 1), get_bit(final_approx, p), get_bit(final_approx, p + 1), precision=ML_StdLogic, tag="round_bit", debug=debug_std, ) sticky_bit_input = Select( equal_to(not_decrement, 1), SubSignalSelection(final_approx, 0, FixedPointPosition( final_approx, -(p + 1), align=FixedPointPosition.FromPointToLSB), precision=None, tag="sticky_bit_input"), SubSignalSelection(final_approx, 0, FixedPointPosition( final_approx, -(p + 2), align=FixedPointPosition.FromPointToLSB), precision=None, tag="sticky_bit_input"), ) sticky_bit = Select(Equal(sticky_bit_input, Constant(0, precision=None)), Constant(0, precision=ML_StdLogic), Constant(1, precision=ML_StdLogic), precision=ML_StdLogic, tag="sticky_bit", debug=debug_std) # TODO: manage leading digit (in case of subnormal result) pre_result = unrounded_mant_field # real_exp = exp_vx - bias # - real_exp = bias - exp_vx # encoded negated exp = bias - exp_vx + bias = 2 * bias - exp_vx fp_io_precision = io_precision.get_base_format() neg_exp = -2 * fp_io_precision.get_bias() - exp_vx neg_exp.set_attributes(tag="neg_exp", debug=debug_fixed) res_exp = Subtraction(neg_exp, Select(equal_to(not_decrement, 1), Constant(0, precision=exp_fixed_precision), Constant(1, precision=exp_fixed_precision), precision=None, tag="exp_offset", debug=debug_fixed), tag="res_exp", debug=debug_fixed) res_exp_field = SubSignalSelection( res_exp, FixedPointPosition(res_exp, 0, align=FixedPointPosition.FromPointToLSB, tag="res_exp_field LSB"), FixedPointPosition(res_exp, exp_size - 1, align=FixedPointPosition.FromPointToLSB, tag="res_exp_field MSB"), precision=None, tag="res_exp_field", # debug=debug_fixed ) result_sign = CopySign(vx, precision=ML_StdLogic) exp_mant_precision = ML_StdLogicVectorFormat( io_precision.get_bit_size() - 1) rnd_mode_is_rne = Equal(rnd_mode, rnd_rne, precision=ML_Bool) rnd_mode_is_ru = Equal(rnd_mode, rnd_ru, precision=ML_Bool) rnd_mode_is_rd = Equal(rnd_mode, rnd_rd, precision=ML_Bool) rnd_mode_is_rz = Equal(rnd_mode, rnd_rz, precision=ML_Bool) round_incr = Conversion( logical_or_reduce([ logical_and_reduce([ rnd_mode_is_rne, equal_to(round_bit, 1), equal_to(sticky_bit, 1) ]), logical_and_reduce([ rnd_mode_is_rne, equal_to(round_bit, 1), equal_to(sticky_bit, 0), equal_to(mant_lsb, 1) ]), logical_and_reduce([ rnd_mode_is_ru, equal_to(result_sign, 0), LogicalOr(equal_to(round_bit, 1), equal_to(sticky_bit, 1), precision=ML_Bool) ]), logical_and_reduce([ rnd_mode_is_rd, equal_to(result_sign, 1), LogicalOr(equal_to(round_bit, 1), equal_to(sticky_bit, 1), precision=ML_Bool) ]), ]), precision=fixed_point(1, 0, signed=False), tag="round_incr", #debug=debug_fixed ) # Precision for result without sign unsigned_result_prec = fixed_point((p - 1) + exp_size, 0) unrounded_mant_field_nomsb = Conversion( unrounded_mant_field, precision=fixed_point(p - 1, 0, signed=False), tag="unrounded_mant_field_nomsb", debug=debug_hex) pre_rounded_unsigned_result = Concatenation( res_exp_field, unrounded_mant_field_nomsb, precision=unsigned_result_prec, tag="pre_rounded_unsigned_result") unsigned_result_rounded = Addition(pre_rounded_unsigned_result, round_incr, precision=unsigned_result_prec, tag="unsigned_result") vr_out = TypeCast(Concatenation( result_sign, TypeCast(unsigned_result_rounded, precision=ML_StdLogicVectorFormat(p - 1 + exp_size)), precision=ML_StdLogicVectorFormat(io_precision.get_bit_size())), precision=io_precision, debug=debug_hex, tag="vr_out") self.implementation.add_output_signal("vr_out", vr_out) return [self.implementation] def init_test_generator(self): """ Initialize test case generator """ weight_map = { FPRandomGen.Category.SpecialValues: 0.0 if self.disable_sv_testing else 0.1, FPRandomGen.Category.Subnormal: 0.0 if self.disable_sub_testing else 0.2, FPRandomGen.Category.Normal: 0.7, } self.input_generator = FPRandomGen(self.precision, weight_map=weight_map) def generate_test_case(self, input_signals, io_map, index, test_range=None): """ specific test case generation for K1C TCA BLAU """ rnd_mode = random.randrange(4) input_values = { "rnd_mode": rnd_mode, "x": self.input_generator.get_new_value() } return input_values def numeric_emulate(self, io_map): vx = io_map["x"] rnd_mode_i = io_map["rnd_mode"] def div_numeric_emulate(vx): sollya_format = self.precision.get_sollya_object() return sollya.round(1.0 / vx, sollya_format, rnd_mode) rnd_mode = { 0: sollya.RN, 1: sollya.RU, 2: sollya.RD, 3: sollya.RZ }[rnd_mode_i] value_mapping = { is_plus_infty: lambda _: 0.0, is_nan: lambda _: FP_QNaN(self.precision), is_minus_infty: lambda _: FP_QNaN(self.precision), is_plus_zero: lambda _: FP_PlusInfty(self.precision), is_minus_zero: lambda _: FP_MinusInfty(self.precision), is_sv_omega: lambda op: lambda _: div_numeric_emulate(op.get_value()), lambda op: not (FP_SpecialValue.is_special_value(op)): div_numeric_emulate, } result = {} for predicate in value_mapping: if predicate(vx): result["vr_out"] = value_mapping[predicate](vx) return result Log.report(Log.Error, "no predicate fits {} in numeric_emulate\n".format(vx)) #standard_test_cases = [({"x": 1.0, "y": (S2**-11 + S2**-17)}, None)] standard_test_cases = [ ({ "x": 2.0, "rnd_mode": 0 }, None), ({ "x": sollya.parse("0x1.24f608p0"), "rnd_mode": 0 }, None), ({ "x": 1.5, "rnd_mode": 0 }, None), ]
class ML_Exp2(ScalarUnaryFunction): function_name = "ml_exp2" def __init__(self, args=DefaultArgTemplate): # initializing base class super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Exponential, builtin from a default argument mapping overloaded with @p kw """ default_args_exp2 = { "output_file": "ml_exp2.c", "function_name": "ml_exp2", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance() } default_args_exp2.update(kw) return DefaultArgTemplate(**default_args_exp2) def generate_scalar_scheme(self, vx, inline_select=False): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # r_interval = Interval(0, 1.0) index_size = 3 r_interval = Interval(-2**(-index_size), 2**-index_size) local_ulp = sup(ulp(2**r_interval, self.precision)) Log.report(Log.Info, "ulp: ", local_ulp) error_goal = S2**-1 * local_ulp Log.report(Log.Info, "error goal: ", error_goal) sollya_precision = { ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] # Argument Reduction # r = x - floor(x), r >= 0 vx_floor = Floor(vx, precision=self.precision, tag='vx_floor', debug=debug_multi) vx_int = Conversion(vx_floor, precision=int_precision, tag="vx_int", debug=debug_multi) vx_intf = vx_floor # Conversion(vx_int, precision = self.precision) vx_r = vx - vx_intf r_hi = NearestInteger(vx_r * 2**index_size, precision=self.precision, tag="r_hi", debug=debug_multi) # clamping r_hi_int within table-size to make sure # it does not exceeds hi_part_table when used to index it r_hi_int = Max( Min( Conversion(r_hi, precision=int_precision, tag="r_hi_int", debug=debug_multi), 2**index_size + 1), 0) r_lo = vx_r - r_hi * 2**-index_size r_lo.set_attributes(tag="r_lo", debug=debug_multi) vx_r.set_attributes(tag="vx_r", debug=debug_multi) degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2 precision_list = [1] + [self.precision] * degree exp_X = ExponentInsertion(vx_int, tag="exp_X", debug=debug_multi, precision=self.precision) #Polynomial Approx polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_object, poly_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x) - 1, degree, precision_list, r_interval, sollya.absolute) Log.report(Log.Info, "Poly : %s" % poly_object) Log.report(Log.Info, "poly_error : ", poly_error) poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), r_lo, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) hi_part_table = ML_NewTable(dimensions=[2**index_size + 1], storage_precision=self.precision, tag=self.uniquify_name("exp2_table"), const=True) for i in range(2**index_size + 1): input_value = i * 2**-index_size tab_value = self.precision.round_sollya_object( sollya.SollyaObject(2)**(input_value)) hi_part_table[i] = tab_value hi_part_value = TableLoad(hi_part_table, r_hi_int, precision=self.precision, tag="hi_part_value", debug=debug_multi) #Handling special cases oflow_bound = Constant(self.precision.get_emax() + 1, precision=self.precision) subnormal_bound = self.precision.get_emin_subnormal() uflow_bound = self.precision.get_emin_normal() Log.report(Log.Info, "oflow : ", oflow_bound) #print "uflow : ", uflow_bound #print "sub : ", subnormal_bound test_overflow = Comparison(vx, oflow_bound, specifier=Comparison.GreaterOrEqual) test_overflow.set_attributes(tag="oflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less) test_underflow.set_attributes(tag="uflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_subnormal = Comparison(vx, subnormal_bound, specifier=Comparison.Greater) test_subnormal.set_attributes(tag="sub_test", debug=debug_multi, likely=False, precision=ML_Bool) subnormal_offset = -(uflow_bound - vx_int) subnormal_offset.set_attributes(tag="offset", debug=debug_multi) exp_offset = ExponentInsertion(subnormal_offset, precision=self.precision, debug=debug_multi, tag="exp_offset") exp_min = ExponentInsertion(uflow_bound, precision=self.precision, debug=debug_multi, tag="exp_min") subnormal_result = hi_part_value * exp_offset * exp_min * poly + hi_part_value * exp_offset * exp_min test_std = LogicalOr(test_overflow, test_underflow, precision=ML_Bool, tag="std_test", likely=False, debug=debug_multi) #Reconstruction result = hi_part_value * exp_X * poly + hi_part_value * exp_X result.set_attributes(tag="result", debug=debug_multi) C0 = Constant(0, precision=self.precision) if inline_select: scheme = Select( test_std, Select(test_overflow, FP_PlusInfty(self.precision), Select( test_subnormal, subnormal_result, C0, )), result, ) return scheme else: return_inf = Return(FP_PlusInfty(self.precision)) return_C0 = Return(C0) return_sub = Return(subnormal_result) return_std = Return(result) non_std_statement = Statement( ConditionBlock( test_overflow, return_inf, ConditionBlock(test_subnormal, return_sub, return_C0))) scheme = Statement( ConditionBlock(test_std, non_std_statement, return_std)) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_exp" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): return sollya.SollyaObject(2)**(input_value) standard_test_cases = [[sollya.parse(x)] for x in [ "0x1.ffead1bac7ad2p+9", "-0x1.ee9cb4p+1", "-0x1.db0928p+3", "0x1.c3a07c4c711cfp-1", "0x1.e79d45fd647f3p-1", "-infty" ]]
class ML_Erf(ScalarUnaryFunction): """ Meta implementation of the error-function """ function_name = "ml_erf" def __init__(self, args): super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Erf, builtin from a default argument mapping overloaded with @p kw """ default_args_erf = { "output_file": "my_erf.c", "function_name": "my_erf", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance(), "passes": [("start:instantiate_abstract_prec"), ("start:instantiate_prec"), ("start:basic_legalization"), ("start:expand_multi_precision")], } default_args_erf.update(kw) return DefaultArgTemplate(**default_args_erf) def generate_scalar_scheme(self, vx): abs_vx = Abs(vx, precision=self.precision) FCT_LIMIT = 1.0 one_limit = search_bound_threshold(sollya.erf, FCT_LIMIT, 1.0, 10.0, self.precision) one_limit_exp = int(sollya.floor(sollya.log2(one_limit))) Log.report(Log.Debug, "erf(x) = 1.0 limit is {}, with exp={}", one_limit, one_limit_exp) upper_approx_bound = 10 # empiral numbers eps_exp = {ML_Binary32: -3, ML_Binary64: -5}[self.precision] eps = S2**eps_exp Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(0, eps) # fonction to approximate is erf(x) / x # it is an even function erf(x) / x = erf(-x) / (-x) approx_fct = sollya.erf(sollya.x) - (sollya.x) poly_degree = int( sup( guessdegree(approx_fct, approx_interval, S2** -(self.precision.get_field_size() + 5)))) + 1 poly_degree_list = list(range(1, poly_degree, 2)) Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree, poly_degree_list) global_poly_object = Polynomial.build_from_approximation( approx_fct, poly_degree_list, [self.precision] * len(poly_degree_list), approx_interval, sollya.relative) Log.report( Log.Debug, "inform is {}", dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(), approx_interval)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) ext_precision = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, }[self.precision] pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, abs_vx, unified_precision=self.precision) result = FMA(pre_poly, abs_vx, abs_vx) result.set_attributes(tag="result", debug=debug_multi) eps_target = S2**-(self.precision.get_field_size() + 5) def offset_div_function(fct): return lambda offset: fct(sollya.x + offset) # empiral numbers field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision] near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision) near_approx = generic_poly_split(offset_div_function(sollya.erf), near_indexing, eps_target, self.precision, abs_vx) near_approx.set_attributes(tag="near_approx", debug=debug_multi) def offset_function(fct): return lambda offset: fct(sollya.x + offset) medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision) medium_approx = generic_poly_split(offset_function(sollya.erf), medium_indexing, eps_target, self.precision, abs_vx) medium_approx.set_attributes(tag="medium_approx", debug=debug_multi) # approximation for positive values scheme = ConditionBlock( abs_vx < eps, Return(result), ConditionBlock( abs_vx < near_indexing.get_max_bound(), Return(near_approx), ConditionBlock(abs_vx < medium_indexing.get_max_bound(), Return(medium_approx), Return(Constant(1.0, precision=self.precision))))) return scheme def numeric_emulate(self, input_value): return sollya.erf(input_value) standard_test_cases = [ (sollya.parse("0x1.4c0d4e9f58p-8"), ), (1.0, None), (4.0, None), (0.5, None), (1.5, None), (1024.0, None), (sollya.parse("0x1.13b2c6p-2"), None), (sollya.parse("0x1.2cb10ap-5"), None), (0.0, None), (sollya.parse("0x1.07e08ep+1"), None), ]
import sys #sys.path.append("/home/lauter/pythonsollya-install/lib/python2.7/site-packages") import sollya sollya.execute("wcpg.sol") wcpg = sollya.parse("wcpg")
def function(self, fct_expr="exp(x)", io_format="binary32", vector_size=1, target="generic", registered_pass_list="", sub_vector_size="default", debug=False, language="c", range_nan="false", range_lo="-infty", range_hi="+infty", bench="false", eval_error="false"): total_time = None input_url = ("{localhost}/function?fct_expr={fct_expr}&io_format={io_format}&" +\ "vector_size={vector_size}&target={target}&" +\ "registered_pass_list={registered_pass_list}&" + \ "debug={debug}&language={language}&eval_error={eval_error}").format( localhost=self.mwa.LOCALHOST, fct_expr=fct_expr, io_format=io_format, vector_size=vector_size, target=target, registered_pass_list=registered_pass_list, sub_vector_size=sub_vector_size, debug=debug, language=language, eval_error=eval_error) # generate git commentary (indicating which version of metalibm was # used to generate code) ml_code_configuration.GLOBAL_GET_GIT_COMMENT = custom_get_common_git_comment( self.mwa.LOCALHOST, lambda: input_url) registered_pass_list = [ tag for tag in registered_pass_list.split(",") if tag != "" ] error = None source_code = "" build_cmd = "" report_issue_url = "" # function results max_error = None # checking inputs class KnownError(Exception): """ known error exception which can are raised when a manageable error is detected """ pass try: no_error = False if not ml_function_expr.check_fct_expr(fct_expr): source_code = "invalid function expression \"{}\"".format( fct_expr) elif not all((pass_tag in self.mwa.ALLOWED_PASS_LIST) for pass_tag in registered_pass_list): source_code = "unknown pass in {}".format([ pass_tag for pass_tag in registered_pass_list if not pass_tag in self.mwa.ALLOWED_PASS_LIST ]) print(source_code) # no allowed target list for now elif not io_format in self.mwa.format_list: source_code = ("forbidden format {}".format(io_format)) print(source_code) elif not int(vector_size) in self.mwa.vector_size_list: source_code = ("forbidden vector_size {}".format(vector_size)) print(source_code) elif sub_vector_size != "default" and not int( sub_vector_size) in self.mwa.sub_vector_size_list: source_code = ( "forbidden sub_vector_size {}".format(sub_vector_size)) print(source_code) elif not language in self.mwa.LANGUAGE_MAP: source_code = ("forbidden language {}".format(language)) print(source_code) elif not range_nan.lower() in ["true", "false"]: source_code = ("invalid range NaN flag {}".format(range_nan)) print(source_code) elif not bench.lower() in ["true", "false"]: source_code = ("invalid bench flag {}".format(bench)) print(source_code) elif not eval_error.lower() in ["true", "false"]: source_code = ("invalid eval_error flag {}".format(bench)) print(source_code) else: no_error = True if not no_error: raise KnownError(source_code) except KnownError as e: # stat counter self.stats.num_known_errors += 1 error = e self.log_msg(e, tag="error") except: # stat counter self.stats.num_unknwon_errors += 1 e = sys.exc_info() error = "Exception:\n {}".format("".join( traceback.format_exception(*e))).replace('\n', '<br/>') source_code = "" self.log_msg(error, tag="error") else: # clearing logs ml_log_report.Log.log_stream.log_output = "" try: start_time = time.perf_counter() fct_ctor = ml_function_expr.FunctionExpression arity = ml_function_expr.count_expr_arity(fct_expr) fct_extra_args = {} language_object = self.mwa.LANGUAGE_MAP[language] precision = precision_parser(io_format) vector_size = int(vector_size) sub_vector_size = None if sub_vector_size == "default" else int( sub_vector_size) range_nan = range_nan.lower() in ["true"] eval_error = eval_error.lower() in ["true"] bench = bench.lower() in ["true"] if range_nan: input_interval = None else: input_interval = sollya.Interval(sollya.parse(range_lo), sollya.parse(range_hi)) debug = bool(debug) target_class = target_parser(target) target_inst = target_class() passes = [ "beforecodegen:{}".format(pass_tag) for pass_tag in registered_pass_list if pass_tag in self.mwa.ALLOWED_PASS_LIST ] args = fct_ctor.get_default_args( function_expr_str=[fct_expr], precision=precision, input_precisions=(precision, ) * arity, input_intervals=(input_interval, ) * arity, vector_size=vector_size, sub_vector_size=sub_vector_size, passes=passes, language=language_object, debug=debug, bench_test_number=100 if bench else None, compute_max_error=eval_error, execute_trigger=eval_error, bench_test_range=input_interval, target=target_inst, **fct_extra_args) # function instance object fct_instance = fct_ctor(args=args) # principal scheme function_only_group = fct_instance.generate_function_list() function_only_group = fct_instance.transform_function_group( function_only_group) function_only_code_obj = fct_instance.get_new_main_code_object( ) function_only_code_obj = fct_instance.generate_code( function_only_code_obj, function_only_group, language=fct_instance.language) # actual source code source_code = function_only_code_obj.get( fct_instance.main_code_generator) with open("source_code.dump.c", "w") as output_stream: output_stream.write(source_code) if eval_error: fct_instance.main_code_generator.clear_memoization_map() main_pre_statement, main_statement, function_group = fct_instance.instrument_function_group( function_only_group, enable_subexpr_sharing=True) EMBEDDING_BINARY = True fct_instance.main_code_object = fct_instance.get_new_main_code_object( ) bench_source_code_obj = fct_instance.generate_output( EMBEDDING_BINARY, main_pre_statement, main_statement, function_group) execute_result = fct_instance.build_and_execute_source_code( function_group, bench_source_code_obj) max_error = execute_result["max_error"] # constructing build command build_cmd = SourceFile.get_build_command("<source_path>", target_inst, bin_name="ml_bench", shared_object=False, link=True, expand_env_var=False) total_time = time.perf_counter() - start_time except: self.stats.num_gen_errors += 1 e = sys.exc_info() error = "Output: \n{}\nException:\n {}".format( ml_log_report.Log.log_stream.log_output, "".join(traceback.format_exception(*e))).replace( '\n', '<br/>') source_code = "" self.log_msg(error, tag="error") report_issue_url = gen_report_issue_url( MetalibmWebApp.REPORT_ISSUE_BASE_URL, precision=io_format, fct_expr=fct_expr, target=target, vector_size=vector_size, debug=debug, language=language, sub_vector_size=sub_vector_size, registered_pass_list=registered_pass_list, ) else: self.stats.num_generated_function += 1 self.log_msg(input_url, tag="info") return dict(code=source_code, build_cmd=build_cmd, precision=io_format, fct_expr=fct_expr, target=target, vector_size=vector_size, debug=debug, language=language, sub_vector_size=sub_vector_size, registered_pass_list=registered_pass_list, report_issue_url=report_issue_url, error=error, range_lo=range_lo, range_hi=range_hi, range_nan=range_nan, total_time=total_time, max_error=max_error, eval_error=eval_error, **self.mwa.option_dict)
class ML_HyperbolicSine(ScalarUnaryFunction): function_name = "ml_sinh" """ Implementation of hyperbolic sine function """ def __init__(self, args=DefaultArgTemplate): # initializing base class super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_HyperbolicSine, builtin from a default argument mapping overloaded with @p kw """ default_args_sinh = { "output_file": "my_sinh.c", "function_name": "my_sinh", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance() } default_args_sinh.update(kw) return DefaultArgTemplate(**default_args_sinh) def generate_scalar_scheme(self, vx): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") index_size = 5 comp_lo = (vx < 0) comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool) sign = Select(comp_lo, -1, 1, precision = self.precision) # as sinh is an odd function, we can simplify the input to its absolute # value once the sign has been extracted vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2)/2**index_size inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN) inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN) log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True) r_hi = vx - k_log2 r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag = "r", debug = debug_multi) if is_gappa_installed(): r_eval_error = self.get_eval_error(r_hi, variable_copy_map = { vx: Variable("vx", interval = Interval(0, 715), precision = self.precision), k: Variable("k", interval = Interval(0, 1024), precision = self.precision) }) Log.report(Log.Verbose, "r_eval_error: ", r_eval_error) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3 precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi) exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() - 8) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value)* 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN) neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # sinh(x) = 1/2 * (exp(x) - exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function) Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error)))) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision) poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi) poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision) poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) # 2^(h-1) pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi) # 2^(-h-1) pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi) hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi) pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi) neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi) result = Addition( Subtraction( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi ) # ov_value ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater) # main scheme scheme = Statement( Return( Select( ov_flag, sign*FP_PlusInfty(self.precision), sign*result ))) return scheme def numeric_emulate(self, input_value): return sinh(input_value) standard_test_cases =[[sollya.parse(x)] for x in [ "0x1.8d3694p-5", "0x1.efc2cp-6", "0x1.f55ddap-5"] ]
# created: Dec 23rd, 2013 # last-modified: Oct 6th, 2015 # # author(s): Nicolas Brunie ([email protected]) ############################################################################### import sollya from ..utility.log_report import Log from ..code_generation.code_constant import * import re S2 = sollya.SollyaObject(2) # numerical floating-point constants ml_nan = sollya.parse("nan") ml_infty = sollya.parse("infty") ## class for floating-point exception class ML_FloatingPointException: pass ## class for type of floating-point exceptions class ML_FloatingPointException_Type(object): ## dummy placeholder to generate C constant for FP exception (should raise error) def get_cst(self, value, language = C_Code): return "NONE" def is_cst_decl_required(self): return False ## ML object for floating-point exception type ML_FPE_Type = ML_FloatingPointException_Type()
def numeric_emulate(self, x, y): if x != 0 and y == 0: # multiplication to correct the sign return x * sollya.parse("infty") return x / y
class ML_Division(ML_FunctionBasis): function_name = "ml_div" arity = 2 def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args=args) self.num_iter = args.num_iter @staticmethod def get_default_args(**args): """ Generate a default argument structure set specifically for the Hyperbolic Cosine """ default_div_args = { "precision": ML_Binary32, "accuracy": ML_CorrectlyRounded, "target": GenericProcessor.get_target_instance(), "output_file": "my_div.c", "function_name": "my_div", "input_intervals": [DefaultArgTemplate.input_intervals[0]] * 2, "auto_test_range": DefaultArgTemplate.auto_test_range * 2, "bench_test_range": DefaultArgTemplate.bench_test_range * 2, "language": C_Code, "num_iter": 3, "passes": [ "typing:basic_legalization", "beforecodegen:expand_multi_precision" ], "vector_size": 1, } default_div_args.update(args) return DefaultArgTemplate(**default_div_args) def generate_scheme(self): # We wish to compute vx / vy vx = self.implementation.add_input_variable( "x", self.precision, interval=self.input_intervals[0]) vy = self.implementation.add_input_variable( "y", self.precision, interval=self.input_intervals[1]) # maximum exponent magnitude (to avoid overflow/ underflow during # intermediary computations int_prec = self.precision.get_integer_format() max_exp_mag = Constant(self.precision.get_emax() - 1, precision=int_prec) exact_ex = ExponentExtraction(vx, tag="exact_ex", precision=int_prec, debug=debug_multi) exact_ey = ExponentExtraction(vy, tag="exact_ey", precision=int_prec, debug=debug_multi) ex = Max(Min(exact_ex, max_exp_mag, precision=int_prec), -max_exp_mag, tag="ex", precision=int_prec) ey = Max(Min(exact_ey, max_exp_mag, precision=int_prec), -max_exp_mag, tag="ey", precision=int_prec) Attributes.set_default_rounding_mode(ML_RoundToNearest) Attributes.set_default_silent(True) # computing the inverse square root init_approx = None scaling_factor_x = ExponentInsertion(-ex, tag="sfx_ei", precision=self.precision, debug=debug_multi) scaling_factor_y = ExponentInsertion(-ey, tag="sfy_ei", precision=self.precision, debug=debug_multi) def test_interval_out_of_bound_risk(x_range, y_range): """ Try to determine from x and y's interval if there is a risk of underflow or overflow """ div_range = abs(x_range / y_range) underflow_risk = sollya.inf(div_range) < S2**( self.precision.get_emin_normal() + 2) overflow_risk = sollya.sup(div_range) > S2**( self.precision.get_emax() - 2) return underflow_risk or overflow_risk out_of_bound_risk = (self.input_intervals[0] is None or self.input_intervals[1] is None ) or test_interval_out_of_bound_risk( self.input_intervals[0], self.input_intervals[1]) Log.report(Log.Debug, "out_of_bound_risk: {}".format(out_of_bound_risk)) # scaled version of vx and vy, to avoid overflow and underflow if out_of_bound_risk: scaled_vx = vx * scaling_factor_x scaled_vy = vy * scaling_factor_y scaled_interval = MetaIntervalList( [MetaInterval(Interval(-2, -1)), MetaInterval(Interval(1, 2))]) scaled_vx.set_attributes(tag="scaled_vx", debug=debug_multi, interval=scaled_interval) scaled_vy.set_attributes(tag="scaled_vy", debug=debug_multi, interval=scaled_interval) seed_interval = 1 / scaled_interval print("seed_interval=1/{}={}".format(scaled_interval, seed_interval)) else: scaled_vx = vx scaled_vy = vy seed_interval = 1 / scaled_vy.get_interval() # We need a first approximation to 1 / scaled_vy dummy_seed = ReciprocalSeed(EmptyOperand(precision=self.precision), precision=self.precision) if self.processor.is_supported_operation(dummy_seed, self.language): init_approx = ReciprocalSeed(scaled_vy, precision=self.precision, tag="init_approx", debug=debug_multi) else: # generate tabulated version of seed raise NotImplementedError current_approx_std = init_approx # correctly-rounded inverse computation num_iteration = self.num_iter Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() # check if inputs are zeros x_zero = Test(vx, specifier=Test.IsZero, likely=False, precision=ML_Bool) y_zero = Test(vy, specifier=Test.IsZero, likely=False, precision=ML_Bool) comp_sign = Test(vx, vy, specifier=Test.CompSign, tag="comp_sign", debug=debug_multi) # check if divisor is NaN y_nan = Test(vy, specifier=Test.IsNaN, likely=False, precision=ML_Bool) # check if inputs are signaling NaNs x_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, precision=ML_Bool) y_snan = Test(vy, specifier=Test.IsSignalingNaN, likely=False, precision=ML_Bool) # check if inputs are infinities x_inf = Test(vx, specifier=Test.IsInfty, likely=False, tag="x_inf", precision=ML_Bool) y_inf = Test(vy, specifier=Test.IsInfty, likely=False, tag="y_inf", debug=debug_multi, precision=ML_Bool) scheme = None gappa_vx, gappa_vy = None, None # initial reciprocal approximation of 1.0 / scaled_vy inv_iteration_list, recp_approx = compute_reduced_reciprocal( init_approx, scaled_vy, self.num_iter) recp_approx.set_attributes(tag="recp_approx", debug=debug_multi) # approximation of scaled_vx / scaled_vy yerr_last, reduced_div_approx, div_iteration_list = compute_reduced_division( scaled_vx, scaled_vy, recp_approx) eval_error_range, div_eval_error_range = self.solve_eval_error( init_approx, recp_approx, reduced_div_approx, scaled_vx, scaled_vy, inv_iteration_list, div_iteration_list, S2**-7, seed_interval) eval_error = sup(abs(eval_error_range)) recp_interval = 1 / scaled_vy.get_interval() + eval_error_range recp_approx.set_interval(recp_interval) div_interval = scaled_vx.get_interval() / scaled_vy.get_interval( ) + div_eval_error_range reduced_div_approx.set_interval(div_interval) reduced_div_approx.set_tag("reduced_div_approx") if out_of_bound_risk: unscaled_result = scaling_div_result(reduced_div_approx, ex, scaling_factor_y, self.precision) subnormal_result = subnormalize_result(recp_approx, reduced_div_approx, ex, ey, yerr_last, self.precision) else: unscaled_result = reduced_div_approx subnormal_result = reduced_div_approx x_inf_or_nan = Test(vx, specifier=Test.IsInfOrNaN, likely=False) y_inf_or_nan = Test(vy, specifier=Test.IsInfOrNaN, likely=False, tag="y_inf_or_nan", debug=debug_multi) # generate IEEE exception raising only of libm-compliant # mode is enabled enable_raise = self.libm_compliant # managing special cases # x inf and y inf pre_scheme = ConditionBlock( x_inf_or_nan, ConditionBlock( x_inf, ConditionBlock( y_inf_or_nan, Statement( # signaling NaNs raise invalid operation flags ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)), ), ConditionBlock(comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))), Statement( ConditionBlock(x_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)))), ConditionBlock( x_zero, ConditionBlock( LogicalOr(y_zero, y_nan, precision=ML_Bool), Statement( ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision))), Return(vx)), ConditionBlock( y_inf_or_nan, ConditionBlock( y_inf, Return( Select(comp_sign, FP_MinusZero(self.precision), FP_PlusZero(self.precision))), Statement( ConditionBlock(y_snan, Raise(ML_FPE_Invalid)) if enable_raise else Statement(), Return(FP_QNaN(self.precision)))), ConditionBlock( y_zero, Statement( Raise(ML_FPE_DivideByZero) if enable_raise else Statement(), ConditionBlock( comp_sign, Return(FP_MinusInfty(self.precision)), Return(FP_PlusInfty(self.precision)))), # managing numerical value result cases Statement( recp_approx, reduced_div_approx, ConditionBlock( Test(unscaled_result, specifier=Test.IsSubnormal, likely=False), # result is subnormal Statement( # inexact flag should have been raised when computing yerr_last # ConditionBlock( # Comparison( # yerr_last, 0, # specifier=Comparison.NotEqual, likely=True), # Statement(Raise(ML_FPE_Inexact, ML_FPE_Underflow)) #), Return(subnormal_result), ), # result is normal Statement( # inexact flag should have been raised when computing yerr_last #ConditionBlock( # Comparison( # yerr_last, 0, # specifier=Comparison.NotEqual, likely=True), # Raise(ML_FPE_Inexact) #), Return(unscaled_result))), ))))) # managing rounding mode save and restore # to ensure intermediary computations are performed in round-to-nearest # clearing exception before final computation #rnd_mode = GetRndMode() #scheme = Statement( # rnd_mode, # SetRndMode(ML_RoundToNearest), # yerr_last, # SetRndMode(rnd_mode), # unscaled_result, # ClearException(), # pre_scheme #) scheme = pre_scheme return scheme def numeric_emulate(self, x, y): if x != 0 and y == 0: # multiplication to correct the sign return x * sollya.parse("infty") return x / y def solve_eval_error(self, gappa_init_approx, gappa_current_approx, div_approx, gappa_vx, gappa_vy, inv_iteration_list, div_iteration_list, seed_accuracy, seed_interval): """ compute the evaluation error of reciprocal approximation of (1 / gappa_vy) :param seed_accuracy: absolute error for seed value :type seed_accuracy: SollyaObject """ seed_var = Variable("seed", precision=self.precision, interval=seed_interval) cg_eval_error_copy_map = { gappa_init_approx.get_handle().get_node(): seed_var, gappa_vy.get_handle().get_node(): Variable("y", precision=self.precision, interval=Interval(1, 2)), gappa_vx.get_handle().get_node(): Variable("x", precision=self.precision, interval=Interval(1, 2)), } yerr_last = div_iteration_list[-1].yerr # copying cg_eval_error_copy_map to allow mutation during # optimise_scheme while keeping a clean copy for later use optimisation_copy_map = cg_eval_error_copy_map.copy() gappa_current_approx = self.optimise_scheme(gappa_current_approx, copy=optimisation_copy_map) div_approx = self.optimise_scheme(div_approx, copy=optimisation_copy_map) yerr_last = self.optimise_scheme(yerr_last, copy=optimisation_copy_map) yerr_last.get_handle().set_node(yerr_last) G1 = Constant(1, precision=ML_Exact) exact_recp = G1 / gappa_vy exact_recp.set_precision(ML_Exact) exact_recp.set_tag("exact_recp") recp_approx_error_goal = gappa_current_approx - exact_recp recp_approx_error_goal.set_attributes(precision=ML_Exact, tag="recp_approx_error_goal") gappacg = GappaCodeGenerator(self.processor, declare_cst=False, disable_debug=True) gappa_code = GappaCodeObject() exact_div = gappa_vx * exact_recp exact_div.set_attributes(precision=ML_Exact, tag="exact_div") div_approx_error_goal = div_approx - exact_div div_approx_error_goal.set_attributes(precision=ML_Exact, tag="div_approx_error_goal") bound_list = [op for op in cg_eval_error_copy_map] gappacg.add_goal(gappa_code, yerr_last) gappa_code = gappacg.get_interval_code( [recp_approx_error_goal, div_approx_error_goal], bound_list, cg_eval_error_copy_map, gappa_code=gappa_code, register_bound_hypothesis=False) for node in bound_list: gappacg.add_hypothesis(gappa_code, cg_eval_error_copy_map[node], cg_eval_error_copy_map[node].get_interval()) new_exact_recp_node = exact_recp.get_handle().get_node() new_exact_div_node = exact_div.get_handle().get_node() # adding specific hints for Newton-Raphson reciprocal iteration for nr in inv_iteration_list: nr.get_hint_rules(gappacg, gappa_code, new_exact_recp_node) for div_iter in div_iteration_list: div_iter.get_hint_rules(gappacg, gappa_code, new_exact_recp_node, new_exact_div_node) seed_wrt_exact = seed_var - new_exact_recp_node seed_wrt_exact.set_attributes(precision=ML_Exact, tag="seed_wrt_exact") gappacg.add_hypothesis(gappa_code, seed_wrt_exact, Interval(-seed_accuracy, seed_accuracy)) try: gappa_results = execute_gappa_script_extract( gappa_code.get(gappacg)) recp_eval_error = gappa_results["recp_approx_error_goal"] div_eval_error = gappa_results["div_approx_error_goal"] print("eval error(s): recp={}, div={}".format( recp_eval_error, div_eval_error)) except: print("error during gappa run") raise recp_eval_error = None div_eval_error = None return recp_eval_error, div_eval_error standard_test_cases = [ (1.0, sollya.parse("0x1.fffffffffffffp+1023"), sollya.parse("0x1p-1024")), (sollya.parse("-0x1.34a246p-2"), sollya.parse("-0x1.26e2e2p-1")), (sollya.parse("0x1.p0"), sollya.parse("0x1.e0ef5ep-49")), (sollya.parse("0x1.7fddbp0"), sollya.parse("0x1.e0ef5ep-49")), (sollya.parse("0x1.7fddbp-126"), sollya.parse("0x1.e0ef5ep-49")), (1.0, sollya.parse("-0x1.fffffffffffffp+1023"), sollya.parse("-0x1p-1024")), ]
class MetaAtan(ScalarUnaryFunction): """ Meta implementation of arctangent function """ function_name = "ml_atan" default_args_atan = { "output_file": "my_atan.c", "function_name": "my_atan", "precision": ML_Binary32, "accuracy": ML_Faithful, "num_sub_intervals": 8, "method": "piecewise", "target": GenericProcessor.get_target_instance() } def __init__(self, args): super().__init__(args) self.method = args.method self.num_sub_intervals = args.num_sub_intervals @classmethod def get_default_args(cls, **kw): """ Return a structure containing the arguments for MetaAtan, builtin from a default argument mapping overloaded with @p kw """ arg_dict = cls.default_args_atan.copy() arg_dict.update(kw) return DefaultArgTemplate(**arg_dict) def generate_scalar_scheme(self, vx): """ Evaluation scheme generation """ return self.generic_atan2_generate(vx) def generic_atan2_generate(self, _vx, vy=None): """ if vy is None, compute atan(_vx), else compute atan2(vy / vx) """ if vy is None: # approximation # if abs_vx <= 1.0 then atan(abx_vx) is directly approximated # if abs_vx > 1.0 then atan(abs_vx) = pi/2 - atan(1 / abs_vx) # # for vx >= 0, atan(vx) = atan(abs_vx) # # for vx < 0, atan(vx) = -atan(abs_vx) for vx < 0 # = -pi/2 + atan(1 / abs_vx) vx = _vx sign_cond = vx < 0 abs_vx = Select(vx < 0, -vx, vx, tag="abs_vx", debug=debug_multi) bound_cond = abs_vx > 1 inv_abs_vx = 1 / abs_vx # condition to select subtraction cond = LogicalOr(LogicalAnd(vx < 0, LogicalNot(bound_cond)), vx > 1, tag="cond", debug=debug_multi) # reduced argument red_vx = Select(bound_cond, inv_abs_vx, abs_vx, tag="red_vx", debug=debug_multi) offset = None else: # bound_cond is True iff Abs(vy / _vx) > 1.0 bound_cond = Abs(vy) > Abs(_vx) bound_cond.set_attributes(tag="bound_cond", debug=debug_multi) # vx and vy are of opposite signs #sign_cond = (_vx * vy) < 0 # using cast to int(signed) and bitwise xor # to determine if _vx and vy are of opposite sign rapidly fast_sign_cond = BitLogicXor( TypeCast(_vx, precision=self.precision.get_integer_format()), TypeCast(vy, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format()) < 0 # sign_cond = (_vx * vy) < 0 sign_cond = fast_sign_cond sign_cond.set_attributes(tag="sign_cond", debug=debug_multi) # condition to select subtraction # TODO: could be accelerated if LogicalXor existed slow_cond = LogicalOr( LogicalAnd(sign_cond, LogicalNot(bound_cond)), # 1 < (vy / _vx) < 0 LogicalAnd(bound_cond, LogicalNot(sign_cond)), # (vy / _vx) > 1 tag="cond", debug=debug_multi) cond = slow_cond numerator = Select(bound_cond, _vx, vy, tag="numerator", debug=debug_multi) denominator = Select(bound_cond, vy, _vx, tag="denominator", debug=debug_multi) # reduced argument red_vx = Abs(numerator) / Abs(denominator) red_vx.set_attributes(tag="red_vx", debug=debug_multi) offset = Select( _vx > 0, Constant(0, precision=self.precision), # vx < 0 Select( sign_cond, # vy > 0 Constant(sollya.pi, precision=self.precision), Constant(-sollya.pi, precision=self.precision), precision=self.precision), precision=self.precision, tag="offset") approx_fct = sollya.atan(sollya.x) if self.method == "piecewise": sign_vx = Select(cond, -1, 1, precision=self.precision, tag="sign_vx", debug=debug_multi) cst_sign = Select(sign_cond, -1, 1, precision=self.precision, tag="cst_sign", debug=debug_multi) cst = cst_sign * Select( bound_cond, sollya.pi / 2, 0, precision=self.precision) cst.set_attributes(tag="cst", debug=debug_multi) bound_low = 0.0 bound_high = 1.0 num_intervals = self.num_sub_intervals error_threshold = S2**-(self.precision.get_mantissa_size() + 8) approx, eval_error = piecewise_approximation( approx_fct, red_vx, self.precision, bound_low=bound_low, bound_high=bound_high, max_degree=None, num_intervals=num_intervals, error_threshold=error_threshold, odd=True) result = cst + sign_vx * approx result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) elif self.method == "single": approx_interval = Interval(0, 1.0) # determining the degree of the polynomial approximation poly_degree_range = sollya.guessdegree( approx_fct / sollya.x, approx_interval, S2**-(self.precision.get_field_size() + 2)) poly_degree = int(sollya.sup(poly_degree_range)) + 4 Log.report(Log.Info, "poly_degree={}".format(poly_degree)) # arctan is an odd function, so only odd coefficient must be non-zero poly_degree_list = list(range(1, poly_degree + 1, 2)) poly_object, poly_error = Polynomial.build_from_approximation_with_error( approx_fct, poly_degree_list, [1] + [self.precision.get_sollya_object()] * (len(poly_degree_list) - 1), approx_interval) odd_predicate = lambda index, _: ((index - 1) % 4 != 0) even_predicate = lambda index, _: (index != 1 and (index - 1) % 4 == 0) poly_odd_object = poly_object.sub_poly_cond(odd_predicate, offset=1) poly_even_object = poly_object.sub_poly_cond(even_predicate, offset=1) sollya.settings.display = sollya.hexadecimal Log.report(Log.Info, "poly_error: {}".format(poly_error)) Log.report(Log.Info, "poly_odd: {}".format(poly_odd_object)) Log.report(Log.Info, "poly_even: {}".format(poly_even_object)) poly_odd = PolynomialSchemeEvaluator.generate_horner_scheme( poly_odd_object, abs_vx) poly_odd.set_attributes(tag="poly_odd", debug=debug_multi) poly_even = PolynomialSchemeEvaluator.generate_horner_scheme( poly_even_object, abs_vx) poly_even.set_attributes(tag="poly_even", debug=debug_multi) exact_sum = poly_odd + poly_even exact_sum.set_attributes(tag="exact_sum", debug=debug_multi) # poly_even should be (1 + poly_even) result = vx + vx * exact_sum result.set_attributes(tag="result", precision=self.precision, debug=debug_multi) else: raise NotImplementedError if not offset is None: result = result + offset std_scheme = Statement(Return(result)) scheme = std_scheme return scheme def numeric_emulate(self, input_value): return sollya.atan(input_value) standard_test_cases = [[sollya.parse(x)] for x in ["0x1.107a78p+0", "0x1.9e75a6p+0"]]
class MetaAtan2(ScalarBinaryFunction, MetaAtan): """ Meta-function for 2-argument arc tangent (atan2) """ arity = 2 function_name = "ml_atan2" def __init__(self, args): ScalarBinaryFunction.__init__(self, args) self.method = args.method @classmethod def get_default_args(cls, **kw): """ Return a structure containing the arguments for MetaAtan, builtin from a default argument mapping overloaded with @p kw """ arg_dict = cls.default_args_atan.copy() arg_dict.update({ "output_file": "my_atan2.c", "function_name": "my_atan2", "input_intervals": [Interval(-5, 5)] * 2, }) arg_dict.update(kw) return DefaultArgTemplate(**arg_dict) def generate_scalar_scheme(self, vy, vx): # as in standard library atan2(y, x), take y as first # parameter and x as second, we inverse vy and vx in method # argument list # extract of atan2 specification from man page # If y is +0 (-0) and x is less than 0, +pi (-pi) is returned. # If y is +0 (-0) and x is greater than 0, +0 (-0) is returned. # If y is less than 0 and x is +0 or -0, -pi/2 is returned. # If y is greater than 0 and x is +0 or -0, pi/2 is returned. # If either x or y is NaN, a NaN is returned. # If y is +0 (-0) and x is -0, +pi (-pi) is returned. # If y is +0 (-0) and x is +0, +0 (-0) is returned. # If y is a finite value greater (less) than 0, and x is negative infinity, +pi (-pi) is # returned. # If y is a finite value greater (less) than 0, and x is positive infinity, +0 (-0) is returned. # If y is positive infinity (negative infinity), and x is finite, pi/2 (-pi/2) is returned. # If y is positive infinity (negative infinity) and x is negative infinity, +3*pi/4 (-3*pi/4) is # returned. # If y is positive infinity (negative infinity) and x is positive infinity, +pi/4 (-pi/4) is # returned. vy.set_attributes(tag="y") vx.set_attributes(tag="x") return self.generic_atan2_generate(vx, vy) def numeric_emulate(self, vy, vx): if vx > 0: return sollya.atan(vy / vx) elif vy < 0: # vy / vx > 0 return -sollya.pi + sollya.atan(vy / vx) else: # vy > 0, vy / vx < 0 return sollya.pi + sollya.atan(vy / vx) standard_test_cases = [ (sollya.parse("0x1.08495cp+2"), sollya.parse("-0x1.88569ep+1")), (sollya.parse("0x1.08495cp+2"), sollya.parse("-0x1.88569ep+1")), (sollya.parse("0x1.08495cp+2"), sollya.parse("-0x1.88569ep+1")), (sollya.parse("0x1.08495cp+2"), sollya.parse("-0x1.88569ep+1")), ]
class ML_HyperbolicTangent(ScalarUnaryFunction): """ Implementation of hyperbolic tangent function """ function_name = "ml_tanh" def __init__(self, args=DefaultArgTemplate): # initializing base class super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_HyperbolicTangent, builtin from a default argument mapping overloaded with @p kw """ default_args_tanh = { "output_file": "my_tanh.c", "function_name": "my_tanh", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance() } default_args_tanh.update(kw) return DefaultArgTemplate(**default_args_tanh) def generate_approx_poly_near_zero(self, function, high_bound, error_bound, variable): """ Generate polynomial approximation scheme """ error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai) # Some issues encountered when 0 is one of the interval bound # so we use a symetric interval around it approx_interval = Interval(2**-100, high_bound) local_function = function / sollya.x degree = sollya.sup( sollya.guessdegree(local_function, approx_interval, error_bound)) degree_list = range(0, int(degree) + 4, 2) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function / sollya.x, degree_list, [1] + [self.precision] * (len(degree_list) - 1), approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "approximation poly: {}\n with error {}".format( poly_object, approx_error)) poly_scheme = Multiplication( variable, PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, variable, self.precision)) return poly_scheme, approx_error def generate_scalar_scheme(self, vx): """ Generating implementation script for hyperic tangent meta-function """ # tanh(x) = sinh(x) / cosh(x) # = (e^x - e^-x) / (e^x + e^-x) # = (e^(2x) - 1) / (e^(2x) + 1) # when x -> +inf, tanh(x) -> 1 # when x -> -inf, tanh(x) -> -1 # ~0 e^x ~ 1 + x - x^2 / 2 + x^3 / 6 + ... # e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ... # when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x # We can divide the input interval into 3 parts # positive, around 0, and finally negative # Possible argument reduction # x = m.2^E = k * log(2) + r # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1) # = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k) # # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1) # = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1) # = 1 - 2 / (e^(2x) + 1) # tanh is odd so we reduce the computation to the absolute value of # vx abs_vx = Abs(vx, precision=self.precision) # if p is the expected output precision # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps # where eps < 1/2 * 2^-p p = self.precision.get_mantissa_size() high_bound = (p + 2) * sollya.log(2) / 2 near_zero_bound = 0.125 interval_num = 1024 Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) interval_size = (high_bound - near_zero_bound) / (1024) new_interval_size = S2**int(sollya.log2(interval_size)) interval_num *= 2 high_bound = new_interval_size * interval_num + near_zero_bound Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) ERROR_THRESHOLD = S2**-p Log.report(Log.Info, "ERROR_THRESHOLD={}", ERROR_THRESHOLD) # Near 0 approximation near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero( sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx) # approximation parameters poly_degree = 7 approx_interval = Interval(near_zero_bound, high_bound) sollya.settings.points = 117 approx_scheme, approx_error = piecewise_approximation( sollya.tanh, abs_vx, self.precision, bound_low=near_zero_bound, bound_high=high_bound, num_intervals=interval_num, max_degree=poly_degree, error_threshold=ERROR_THRESHOLD) Log.report(Log.Warning, "approx_error={}".format(approx_error)) comp_near_zero_bound = abs_vx < near_zero_bound comp_near_zero_bound.set_attributes(tag="comp_near_zero_bound", debug=debug_multi) comp_high_bound = abs_vx < high_bound comp_high_bound.set_attributes(tag="comp_high_bound", debug=debug_multi) complete_scheme = Select( comp_near_zero_bound, near_zero_scheme, Select(comp_high_bound, approx_scheme, Constant(1.0, precision=self.precision))) scheme = Return(Select(vx < 0, Negation(complete_scheme), complete_scheme), precision=self.precision) return scheme def numeric_emulate(self, input_value): return tanh(input_value) standard_test_cases = [[sollya.parse(x)] for x in [ "-0x1.572306p+0", "0x1.af0bf2p+1", "-0x1.af0bf2p+1", "-0x1.51b618p-13", "0x1.ffb99ep-1", "0x1.f68b2cp-4" ]]
############################################################################### # created: Apr 23th, 2014 # last-modified: Mar 7th, 2018 # # author(s): Nicolas Brunie ([email protected]) ############################################################################### """ command-line argument templates """ import sys import os import argparse import traceback from sollya import Interval import sollya ml_infty = sollya.parse("infty") from .arg_utils import extract_option_value, test_flag_option from .log_report import Log from ..core.ml_formats import * from ..core.precisions import * from ..code_generation.generic_processor import GenericProcessor from ..core.target import TargetRegister from ..targets import * from ..code_generation.code_constant import * from ..core.passes import Pass from ..core.ml_hdl_format import (fixed_point, ML_StdLogicVectorFormat, RTL_FixedPointFormat, HdlVirtualFormat)
class ML_ExponentialM1_Red(ML_FunctionBasis): function_name = "ml_expm1" def __init__(self, args): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_ExponentialM1_Red, builtin from a default argument mapping overloaded with @p kw """ default_args_expm1 = { "output_file": "my_expm1.c", "function_name": "my_expm1", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_expm1.update(kw) return DefaultArgTemplate(**default_args_expm1) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme def numeric_emulate(self, input_value): return expm1(input_value) standard_test_cases = [[sollya.parse(x)] for x in ["0x1.9b3216p-2", "0x1.8c108p-2"]]
def standard_test_cases(self): general_list = [ # ERROR: rootn: inf ulp error at {inf, -2}: *0x0p+0 vs. inf (0x7f800000) at index: 1226 (FP_PlusInfty(self.precision), -2, FP_PlusZero(self.precision)), # ERROR: rootn: inf ulp error at {inf, -2147483648}: *0x0.0000000000000p+0 vs. inf (FP_PlusInfty(self.precision), -2147483648, FP_PlusZero(self.precision)), # (FP_PlusZero(self.precision), -1, FP_PlusInfty(self.precision)), (FP_MinusInfty(self.precision), 1, FP_MinusInfty(self.precision)), (FP_MinusInfty(self.precision), -1, FP_MinusZero(self.precision)), # ERROR coucou7: rootn: -inf ulp error at {inf 7f800000, 479638026}: *inf vs. 0x1.000018p+0 (0x3f80000c) at index: 2367 (FP_PlusInfty(self.precision), 479638026, FP_PlusInfty(self.precision)), (FP_MinusInfty(self.precision), 479638026), #(FP_MinusInfty(self.precision), -479638026), #(FP_PlusInfty(self.precision), -479638026), # rootn( ±0, n) is ±∞ for odd n< 0. (FP_PlusZero(self.precision), -1337, FP_PlusInfty(self.precision)), (FP_MinusZero(self.precision), -1337, FP_MinusInfty(self.precision)), # rootn( ±0, n) is +∞ for even n< 0. (FP_PlusZero(self.precision), -1330, FP_PlusInfty(self.precision)), # rootn( ±0, n) is +0 for even n> 0. (FP_PlusZero(self.precision), random.randrange(0, 2**31, 2), FP_PlusZero(self.precision)), (FP_MinusZero(self.precision), random.randrange(0, 2**31, 2), FP_PlusZero(self.precision)), # rootn( ±0, n) is ±0 for odd n> 0. (FP_PlusZero(self.precision), random.randrange(1, 2**31, 2), FP_PlusZero(self.precision)), (FP_MinusZero(self.precision), random.randrange(1, 2**31, 2), FP_MinusZero(self.precision)), # rootn( x, n) returns a NaN for x< 0 and n is even. (-random.random(), 2 * random.randrange(1, 2**30), FP_QNaN(self.precision)), # rootn( x, 0 ) returns a NaN (random.random(), 0, FP_QNaN(self.precision)), # vx=nan (sollya.parse("-nan"), -1811577079, sollya.parse("nan")), (sollya.parse("-nan"), 832501219, sollya.parse("nan")), (sollya.parse("-nan"), -857435762, sollya.parse("nan")), (sollya.parse("-nan"), -1503049611, sollya.parse("nan")), (sollya.parse("-nan"), 2105620996, sollya.parse("nan")), #ERROR: rootn: inf ulp error at {-nan, 832501219}: *-nan vs. -0x1.00000df2bed98p+1 #ERROR: rootn: inf ulp error at {-nan, -857435762}: *-nan vs. 0x1.0000000000000p+1 #ERROR: rootn: inf ulp error at {-nan, -1503049611}: *-nan vs. -0x1.0000000000000p+1 #ERROR: rootn: inf ulp error at {-nan, 2105620996}: *-nan vs. 0x1.00000583c4b7ap+1 (sollya.parse("-0x1.cd150ap-105"), 105297051), (sollya.parse("0x1.ec3bf8p+71"), -1650769017), # test-case #12 (0.1, 17), # test-case #11, fails in OpenCL CTS (sollya.parse("0x0.000000001d600p-1022"), 14), # test-case #10, fails test with dar(2**-23) (sollya.parse("-0x1.20aadp-114"), 17), # test-case #9 (sollya.parse("0x1.a44d8ep+121"), 7), # test-case #8 (sollya.parse("-0x1.3ef124p+103"), 3), # test-case #7 (sollya.parse("-0x1.01047ep-2"), 39), # test-case #6 (sollya.parse("-0x1.0105bp+67"), 23), # test-case #5 (sollya.parse("0x1.c1f72p+51"), 6), # special cases (sollya.parse("0x0p+0"), 1), (sollya.parse("0x0p+0"), 0), # test-case #3, catastrophic error for n=1 (sollya.parse("0x1.fc61a2p-121"), 1.0), # test-case #4 , k=14 < 0 not supported by bigfloat # (sollya.parse("0x1.ad067ap-66"), -14), ] # NOTE: expected value assumed 32-bit precision output fp_32_only = [ # (sollya.parse("0x1.80bb0ep+70"), 377778829, sollya.parse("0x1.000002p+0")), ] # NOTE: the following test-case are only valid if meta-function supports 64-bit integer # 2nd_input fp_64_only = [ (sollya.parse("0x1.fffffffffffffp+1023"), -1, sollya.parse("0x0.4000000000000p-1022")), (sollya.parse("-0x1.fffffffffffffp1023"), -1, sollya.parse("-0x0.4000000000000p-1022")), #(sollya.parse("-0x1.fffffffffffffp+1023"), 1), #(sollya.parse("0x1.fffffffffffffp+1023"), -1), # ERROR coucou8: rootn: inf ulp error at {-inf, 1854324695}: *-inf vs. -0x1.0000066bfdd60p+0 (FP_MinusInfty(self.precision), 1854324695, FP_MinusInfty(self.precision)), # ERROR: rootn: -60.962402 ulp error at {0x0.000000001d600p-1022, 14}: *0x1.67d4ff97d1fd9p-76 vs. 0x1.67d4ff97d1f9cp-76 (sollya.parse("0x0.000000001d600p-1022"), 14, sollya.parse("0x1.67d4ff97d1fd9p-76")), # ERROR: rootn: -430452000.000000 ulp error at {0x1.ffffffff38c00p-306, 384017876}: *0x1.ffffed870ff01p-1 vs. 0x1.ffffebec8d1d2p-1 (sollya.parse("0x1.ffffffff38c00p-306"), 384017876, sollya.parse("0x1.ffffed870ff01p-1")), # vs. 0x1.ffffebec8d1d2p-1 # ERROR: rootn: 92996584.000000 ulp error at {0x1.ffffffffdae80p-858, -888750231}: *0x1.00000b36b1173p+0 vs. 0x1.00000b8f6155ep+0 (sollya.parse("0x1.ffffffffdae80p-858"), -888750231, sollya.parse("0x1.00000b36b1173p+0")), # ERROR: rootn: 379474.906250 ulp error at {0x0.0000000000022p-1022, -1538297900}: *0x1.00000814a68ffp+0 vs. 0x1.0000081503352p+0 (sollya.parse("0x0.00000006abfffp-1022"), -1221802473, sollya.parse("0x1.00000a01818a4p+0")), (sollya.parse("0x1.ffffffffd0a00p-260"), 1108043946, sollya.parse("0x1.fffffa9042997p-1")), (sollya.parse("0x1.3fffffffff1c0p-927"), -1997086266, sollya.parse("0x1.0000056564c5ep+0")), (sollya.parse("0x1.ffffffff38c00p-306"), 384017876, sollya.parse("0x1.ffffed870ff01p-1")), (sollya.parse("0x0.15c000000002ap-1022"), 740015941, sollya.parse("0x1.ffffdfc47b57ep-1")), (sollya.parse("0x0.00000000227ffp-1022"), -1859058847, sollya.parse("0x1.0000069c7a01bp+0")), (sollya.parse("0x0.0568000000012p-1022"), -447352599, sollya.parse("0x1.00001ab640c38p+0")), (sollya.parse("0x0.000000000000dp-1022"), 132283432, sollya.parse("0x1.ffff43d1db82ap-1")), (sollya.parse("-0x1.c80000000026ap+1023"), 275148531, sollya.parse("-0x1.00002b45a7314p+0")), (sollya.parse("0x0.022200000000ep-1022"), -1969769414, sollya.parse("0x1.000006130e858p+0")), (sollya.parse("0x0.0000000000011p-1022"), 851990770, sollya.parse("0x1.ffffe2cafaff6p-1")), (sollya.parse("0x1.8fffffffff348p-1010"), 526938360, sollya.parse("0x1.ffffd372e2b81p-1")), (sollya.parse("0x0.0000000000317p-1022"), -1315106194, sollya.parse("0x1.0000096973ac9p+0")), (sollya.parse("0x1.1ffffffff2d20p-971"), 378658008, sollya.parse("0x1.ffffc45e803b2p-1")), # (sollya.parse("0x0.0568000000012p-1022"), -447352599, sollya.parse("0x1.00001ab640c38p+0")), # (sollya.parse("0x1.ffffffffd0a00p-260"), 1108043946, sollya.parse("0x1.fffffa9042997p-1")), (FP_MinusZero(self.precision), -21015979, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -85403731, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -180488973, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -1365227287, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -1802885579, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -1681209663, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -1152797721, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -1614890585, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -812655517, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -628647891, FP_MinusInfty(self.precision)), (sollya.parse("0x1.ffffffffdae80p-858"), -888750231, sollya.parse("0x1.00000b36b1173p+0")), (sollya.parse("0x0.0568000000012p-1022"), -447352599, sollya.parse("0x1.00001ab640c38p+0")), (sollya.parse("0x0.00000006abfffp-1022"), -1221802473, sollya.parse("0x1.00000a01818a4p+0")), (sollya.parse("0x0.0000000000022p-1022"), -1538297900, sollya.parse("0x1.00000814a68ffp+0")), #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -1889147085}: *-inf vs. inf #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -373548013}: *-inf vs. inf (FP_MinusZero(self.precision), -1889147085, FP_MinusInfty(self.precision)), (FP_MinusZero(self.precision), -373548013, FP_MinusInfty(self.precision)), #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -1889147085}: *-inf vs. inf #ERROR: rootn: inf ulp error at {-0x0.0000000000000p+0, -373548013}: *-inf vs. inf # [email protected]: PE 0: error[84]: ml_rootn(-0x1.b1a6765727e72p-902, -7.734955e+08/-773495525), result is -0x1.00000d8cb5b3cp+0 vs expected [nan;nan] (sollya.parse("-0x1.b1a6765727e72p-902"), -773495525), # ERROR: rootn: -40564819207303340847894502572032.000000 ulp error at {-0x0.fffffffffffffp-1022, 1}: *-0x0.fffffffffffffp-1022 vs. -0x1.ffffffffffffep-970 (sollya.parse("-0x0.fffffffffffffp-1022 "), 1, sollya.parse("-0x0.fffffffffffffp-1022 ")), # ERROR: rootn: 1125899906842624.000000 ulp error at {-0x1.fffffffffffffp+1023, -1}: *-0x0.4000000000000p-1022 vs. -0x0.0000000000000p+0 (sollya.parse("-0x1.fffffffffffffp+1023"), -1, sollya.parse("-0x0.4000000000000p-1022")), (sollya.parse("0x1.fffffffffffffp+1023"), -1, sollya.parse("0x0.4000000000000p-1022")), ] return (fp_64_only if self.precision.get_bit_size() >= 64 else []) \ + (fp_32_only if self.precision.get_bit_size() == 32 else []) \ + general_list
class ML_Gamma(ScalarUnaryFunction): """ Meta implementation of the error-function """ function_name = "gamma" def __init__(self, args): super().__init__(args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Gamma, builtin from a default argument mapping overloaded with @p kw """ default_args_erf = { "output_file": "gamma.c", "function_name": "gamma", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance(), "passes": [("start:instantiate_abstract_prec"), ("start:instantiate_prec"), ("start:basic_legalization"), ("start:expand_multi_precision")], } default_args_erf.update(kw) return DefaultArgTemplate(**default_args_erf) def generate_scalar_scheme(self, vx): # approximation the gamma function abs_vx = Abs(vx, precision=self.precision) FCT_LIMIT = 1.0 omega_value = self.precision.get_omega() def sollya_wrap_bigfloat_fct(bfct): """ wrap bigfloat's function <bfct> such that is can be used on SollyaObject inputs and returns SollyaObject results """ def fct(x): return sollya.SollyaObject(bfct(SollyaObject(x).bigfloat())) return fct sollya_gamma = sollya_wrap_bigfloat_fct(bigfloat.gamma) sollya_digamma = sollya_wrap_bigfloat_fct(bigfloat.digamma) # first derivative of gamma is digamma * gamma bigfloat_gamma_d0 = lambda x: bigfloat.gamma(x) * bigfloat.digamma(x) sollya_gamma_d0 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d0) # approximating trigamma with straightforward derivatives formulae of digamma U = 2**-64 bigfloat_trigamma = lambda x: ( (bigfloat.digamma(x * (1 + U)) - bigfloat.digamma(x)) / (x * U)) sollya_trigamma = sollya_wrap_bigfloat_fct(bigfloat_trigamma) bigfloat_gamma_d1 = lambda x: (bigfloat_trigamma(x) * bigfloat.gamma( x) + bigfloat_gamma_d0(x) * bigfloat.digamma(x)) sollya_gamma_d1 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d1) def sollya_gamma_fct(x, diff_order, prec): """ wrapper to use bigfloat implementation of exponential rather than sollya's implementation directly. This wrapper implements sollya's function API. :param x: numerical input value (may be an Interval) :param diff_order: differential order :param prec: numerical precision expected (min) """ fct = None if diff_order == 0: fct = sollya_gamma elif diff_order == 1: fct = sollya_gamma_d0 elif diff_order == 2: fct = sollya_gamma_d1 else: raise NotImplementedError with bigfloat.precision(prec): if x.is_range(): lo = sollya.inf(x) hi = sollya.sup(x) return sollya.Interval(fct(lo), fct(hi)) else: return fct(x) # search the lower x such that gamma(x) >= omega omega_upper_limit = search_bound_threshold(sollya_gamma, omega_value, 2, 1000.0, self.precision) Log.report(Log.Debug, "gamma(x) = {} limit is {}", omega_value, omega_upper_limit) # evaluate gamma(<min-normal-value>) lower_x_bound = self.precision.get_min_normal_value() value_min = sollya_gamma(lower_x_bound) Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound, value_min, int(sollya.log2(value_min))) # evaluate gamma(<min-subnormal-value>) lower_x_bound = self.precision.get_min_subnormal_value() value_min = sollya_gamma(lower_x_bound) Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound, value_min, int(sollya.log2(value_min))) # Gamma is defined such that gamma(x+1) = x * gamma(x) # # we approximate gamma over [1, 2] # y in [1, 2] # gamma(y) = (y-1) * gamma(y-1) # gamma(y-1) = gamma(y) / (y-1) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(1, 2) approx_fct = sollya.function(sollya_gamma_fct) poly_degree = int( sup( guessdegree(approx_fct, approx_interval, S2** -(self.precision.get_field_size() + 5)))) + 1 Log.report(Log.Debug, "approximation's poly degree over [1, 2] is {}", poly_degree) sys.exit(1) poly_degree_list = list(range(1, poly_degree, 2)) Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree, poly_degree_list) global_poly_object = Polynomial.build_from_approximation( approx_fct, poly_degree_list, [self.precision] * len(poly_degree_list), approx_interval, sollya.relative) Log.report( Log.Debug, "inform is {}", dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(), approx_interval)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) ext_precision = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, }[self.precision] pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, abs_vx, unified_precision=self.precision) result = FMA(pre_poly, abs_vx, abs_vx) result.set_attributes(tag="result", debug=debug_multi) eps_target = S2**-(self.precision.get_field_size() + 5) def offset_div_function(fct): return lambda offset: fct(sollya.x + offset) # empiral numbers field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision] near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision) near_approx = generic_poly_split(offset_div_function(sollya.erf), near_indexing, eps_target, self.precision, abs_vx) near_approx.set_attributes(tag="near_approx", debug=debug_multi) def offset_function(fct): return lambda offset: fct(sollya.x + offset) medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision) medium_approx = generic_poly_split(offset_function(sollya.erf), medium_indexing, eps_target, self.precision, abs_vx) medium_approx.set_attributes(tag="medium_approx", debug=debug_multi) # approximation for positive values scheme = ConditionBlock( abs_vx < eps, Return(result), ConditionBlock( abs_vx < near_indexing.get_max_bound(), Return(near_approx), ConditionBlock(abs_vx < medium_indexing.get_max_bound(), Return(medium_approx), Return(Constant(1.0, precision=self.precision))))) return scheme def numeric_emulate(self, input_value): return bigfloat.gamma(sollya.SollyaObject(input_value).bigfloat()) standard_test_cases = [ (1.0, None), (sollya.parse("0x1.13b2c6p-2"), None), ]
class ML_Log1p(ML_FunctionBasis): function_name = "ml_log1p" def __init__(self, args): ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log1p, builtin from a default argument mapping overloaded with @p kw """ default_args_log1p = { "output_file": "my_log1p.c", "function_name": "my_log1pf", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor.get_target_instance(), "passes": [("start:instantiate_abstract_prec"), ("start:instantiate_prec"), ("start:basic_legalization"), ("start:expand_multi_precision")], } default_args_log1p.update(kw) return DefaultArgTemplate(**default_args_log1p) def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.get_input_precision().sollya_object # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # 2-limb approximation of log(2) # hi part precision is reduced to provide exact operation # when multiplied by an exponent value log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) int_precision = self.precision.get_integer_format() # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map) # table creation table_index_size = inv_approx_table.index_size log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision) # storing accurate logarithm approximation of value returned # by the fast reciprocal operation for i in range(0, 2**table_index_size): inv_value = inv_approx_table[i] value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") # for x = m.2^e, such that e >= 0 # # log(1+x) = log(1 + m.2^e) # = log(2^e . 2^-e + m.2^e) # = log(2^e . (2^-e + m)) # = log(2^e) + log(2^-e + m) # = e . log(2) + log (2^-e + m) # # t = (2^-e + m) # t = m_t . 2^e_t # r ~ 1 / m_t => r.m_t ~ 1 ~ 0 # # t' = t . 2^-e_t # = 2^-e-e_t + m . 2^-e_t # # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3 # r = m_r . 2^e_r # # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r) # = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r) # = (e+e_t).log(2) + log(r . t') - log(r) # = (e+e_t).log(2) + log(r . t') - log(r) # = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r) # # # argument reduction m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi) e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi) # 2^-e TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi) t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi) m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi) e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi) # 2^(-e-e_t) TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision) TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi) rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi) INDEX_SIZE = table_index_size table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t) table_index.set_attributes(tag="table_index", debug=debug_multi) log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi) inv_err = S2**-6 # TODO: link to target DivisionSeed precision Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) approx_fct = sollya.log1p(sollya.x) / (sollya.x) poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1 Log.report(Log.Debug, "poly_degree is {}", poly_degree) global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute) poly_object = global_poly_object # .sub_poly(start_index=1) EXT_PRECISION_MAP = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, ML_SingleSingle: ML_TripleSingle, ML_DoubleDouble: ML_TripleDouble } if not self.precision in EXT_PRECISION_MAP: Log.report(Log.Error, "no extended precision available for {}", self.precision) ext_precision = EXT_PRECISION_MAP[self.precision] # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t pre_rtp = Addition( rcp_mt * TwoMinusEEt, Multiplication( rcp_mt, Multiplication( m, TwoMinusEt, precision=self.precision, tag="pre_mult", debug=debug_multi, ), precision=ext_precision, tag="pre_mult2", debug=debug_multi, ), precision=ext_precision, tag="pre_rtp", debug=debug_multi ) pre_red_vx = Addition( pre_rtp, -1, precision=ext_precision, ) red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi) Log.report(Log.Info, "generating polynomial evaluation scheme") poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, red_vx, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object()) fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi) ext_poly = Multiplication(red_vx, poly, precision=ext_precision) pre_result = Addition( Addition( fp_e * log2_hi, fp_e * log2_lo, precision=ext_precision ), Addition( Addition( -log_inv_hi, -log_inv_lo, precision=ext_precision ), ext_poly, precision=ext_precision ), precision=ext_precision ) result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock(neg_input, Statement( ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision)) ), ConditionBlock(vx_nan_or_inf, ConditionBlock(vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement( ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid) ), Return(FP_QNaN(self.precision)) ) ), Return(result) ) ) scheme = pre_scheme return scheme def numeric_emulate(self, input_value): return log1p(input_value) standard_test_cases = [ (1.0, None), (1.0, None), (1.0, None), (1.0, None), ] _ = [ (4.0, None), (1.0, None), (0.5, None), (1.5, None), (1024.0, None), (sollya.parse("0x1.13b2c6p-2"), None), (sollya.parse("0x1.2cb10ap-5"), None), (0.0, None), (sollya.parse("0x1.ce4492p-21"), None), ]
class ML_Log2(ML_Function("ml_log2")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log2, builtin from a default argument mapping overloaded with @p kw """ default_args_log2 = { "output_file": "my_log2f.c", "function_name": "my_log2f", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_log2.update(kw) return DefaultArgTemplate(**default_args_log2) def generate_emulate(self, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode Deprecated: the new test bench uses numeric_emulate method """ emulate_func_name = "mpfr_log2" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Result(0), 1: FO_Arg(0), 2: FO_Arg(1) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32], ML_Mpfr_t, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # testing special value inputs test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") # if input is a signaling NaN, raise an invalid exception and returns # a quiet NaN return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = self.precision.get_integer_format() # log2(vx) # r = vx_mant # e = vx_exp # vx reduced to r in [1, 2[ # log2(vx) = log2(r * 2^e) # = log2(r) + e # ## log2(r) is approximated by # log2(r) = log2(inv_seed(r) * r / inv_seed(r) # = log2(inv_seed(r) * r) - log2(inv_seed(r)) # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0] # # inv_seed(r) * r ~ 1 # we can easily tabulate -log2(inv_seed(r)) # # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) # value for index 0 is set to 0.0 log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 #print inv_approx_table[i][0], inv_value inv_value = inv_approx_table[i][0] value_high_bitsize = self.precision.get_field_size() - ( self.precision.get_exponent_size() + 1) value_high = round(log2(inv_value), value_high_bitsize, sollya.RN) value_low = round( log2(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) # The main table is indexed by the 7 most significant bits # of the mantissa table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debuglld) # argument reduction # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) _red_vx = FMA(arg_red_index, _vx_mant, -1.0) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-inv_approx_table.index_size red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() * 1.1))) + 1 sollya.settings.display = sollya.hexadecimal global_poly_object, approx_error = Polynomial.build_from_approximation_with_error( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute, error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai)) Log.report( Log.Info, "poly_degree={}, approx_error={}".format( poly_degree, approx_error)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) #poly_object = global_poly_object.sub_poly(start_index=0,offset=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) Log.report(Log.Verbose, "generating polynomial evaluation scheme") pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly = FMA(pre_poly, _red_vx, global_poly_object.get_cst_coeff(0, self.precision)) _poly.set_attributes(tag="poly", debug=debug_lftolx) Log.report( Log.Verbose, "sollya global_poly_object: {}".format( global_poly_object.get_sollya_object())) Log.report( Log.Verbose, "sollya poly_object: {}".format( poly_object.get_sollya_object())) corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision) exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx) result.set_attributes(tag="result", debug=debug_lftolx) # specific input value predicate neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="vx_snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="vx_inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # Specific specific for the case exp == -1 # log2(x) = log2(m) - 1 # # as m in [1, 2[, log2(m) in [0, 1[ # if r is close to 2, a catastrophic cancellation can occur # # r = seed(m) # log2(x) = log2(seed(m) * m / seed(m)) - 1 # = log2(seed(m) * m) - log2(seed(m)) - 1 # # for m really close to 2 => seed(m) = 0.5 # => log2(x) = log2(0.5 * m) # = result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo) result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) result_subnormal.set_attributes(tag="result_subnormal", debug=debug_lftolx) one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + x) / x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Statement(ClearException(), result_subnormal, Return(result_subnormal))), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result_exp_m1), Return(result)))))) scheme = Statement(result, pre_scheme) return scheme standard_test_cases = [(sollya.parse("0x1.ffd6906acffc7p-1"), )] def numeric_emulate(self, input_value): """ Numeric emulation to generate expected value corresponding to input_value input """ return log2(input_value)
class ML_Log(ML_Function("ml_log")): def __init__(self, args): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log, builtin from a default argument mapping overloaded with @p kw """ default_args_log = { "output_file": "my_logf.c", "function_name": "my_log", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_log.update(kw) return DefaultArgTemplate(**default_args_log) def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_log" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.precision.sollya_object # constant computation invlog2 = round(1 / log(2), sollya_precision, sollya.RN) invlog2_cst = Constant(invlog2, precision=self.precision) #v_log2_hi = round(log(2), 16, sollya.RN) #v_log2_lo = round(log(2) - v_log2_hi, sollya_precision, sollya.RN) #log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi") #log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) v_log2_hi = round( log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) v_log2_lo = round( log(2) - v_log2_hi, self.precision.sollya_object, sollya.RN) log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi") log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo") vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi) int_precision = self.precision.get_integer_format() # table creation table_index_size = 7 log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) log_table[0][0] = 0.0 log_table[0][1] = 0.0 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) integer_precision = { ML_Binary32: ML_UInt32, ML_Binary64: ML_UInt64 }[self.precision] for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = inv_approx_table[ i] # (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 value_high = round( log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_multi, precision=self.precision) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debug_multi), self.precision.get_field_size() - 7, debug=debug_multi), 0x7f, tag="table_index", debug=debug_multi) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=integer_precision), Constant(-2, precision=integer_precision), precision=integer_precision), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index) #_red_vx = arg_red_index * _vx_mant - 1.0 _red_vx = FusedMultiplyAdd(arg_red_index, _vx_mant, 1.0, specifier=FusedMultiplyAdd.Subtract) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi) inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree, [1] + [self.precision] * (poly_degree), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Verbose, "generating polynomial evaluation scheme") #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) _poly = PolynomialSchemeEvaluator.generate_estrin_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) corr_exp = Conversion( _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_multi) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (corr_exp * log2_lo - _log_inv_lo))) pre_result.set_attributes(tag="pre_result", debug=debug_multi) exact_log2_hi_exp = corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp", debug=debug_multi) cancel_part = (corr_exp * log2_hi - _log_inv_hi) cancel_part.set_attributes(tag="cancel_part", debug=debug_multi) sub_part = red_vx_hi + cancel_part sub_part.set_attributes(tag="sub_part", debug=debug_multi) #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part = ((red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part.set_attributes(tag="result_one_low_part", debug=debug_multi) _result_one = ( (sub_part) + red_vx_hi * _poly) + result_one_low_part return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one = compute_log( vx) result.set_attributes(tag="result", debug=debug_multi) new_result_one.set_attributes(tag="new_result_one", debug=debug_multi) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debug_multi, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debug_multi, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debug_multi) # exp=-1 case Log.report(Log.Verbose, "managing exp=-1 case") result2 = (-log_inv_hi - log2_hi) + ( (red_vx + poly * red_vx) - log2_lo - log_inv_lo) result2.set_attributes(tag="result2", debug=debug_multi) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) Log.report(Log.Verbose, "managing close to 1.0 cases") one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, sollya.absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_multi) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debug_multi, likely=False) # main scheme pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Return(result_subnormal)), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result2), Return(result)) #ConditionBlock(cond_one, #Return(new_result_one), #ConditionBlock(exp_mone, #Return(result2), #Return(result) #) #) )))) scheme = pre_scheme return scheme standard_test_cases = [(sollya.parse("0x1.fe9a5p-1"), ), (sollya.parse("0x1.fe9a5p-1"), )] def numeric_emulate(self, input_value): return log(input_value)
# float step = 0x1.p-11; # unsigned index_size = 11; # for (int i = 0; i < (1<<index_size); ++i) { # float input = 1.0f + i * step; # float approx = 0.0f; # _mm_store_ss(&approx, _mm_rcp_ss (_mm_set_ss(input))); # printf("\"%a\", ", approx); # if (i % 5 == 4) printf("\n"); # } # return 0; # } x86_rcp_table = ML_ApproxTable( dimensions = [2**11], index_size=11, storage_precision = ML_Binary32, init_data = [sollya.parse(v) for v in [ "0x1.ffep-1", "0x1.ffap-1", "0x1.ff6p-1", "0x1.ff2p-1", "0x1.feep-1", "0x1.feap-1", "0x1.fe6p-1", "0x1.fe2p-1", "0x1.fdep-1", "0x1.fdap-1", "0x1.fd6p-1", "0x1.fd2p-1", "0x1.fcep-1", "0x1.fcap-1", "0x1.fc6p-1", "0x1.fc2p-1", "0x1.fbfp-1", "0x1.fbbp-1", "0x1.fb7p-1", "0x1.fb3p-1", "0x1.fafp-1", "0x1.fabp-1", "0x1.fa7p-1", "0x1.fa3p-1", "0x1.f9fp-1", "0x1.f9bp-1", "0x1.f97p-1", "0x1.f93p-1", "0x1.f9p-1", "0x1.f8cp-1", "0x1.f88p-1", "0x1.f84p-1", "0x1.f8p-1", "0x1.f7cp-1", "0x1.f78p-1", "0x1.f74p-1", "0x1.f71p-1", "0x1.f6dp-1", "0x1.f69p-1", "0x1.f65p-1", "0x1.f61p-1", "0x1.f5dp-1", "0x1.f59p-1", "0x1.f56p-1", "0x1.f52p-1", "0x1.f4ep-1", "0x1.f4ap-1", "0x1.f46p-1", "0x1.f42p-1", "0x1.f3fp-1", "0x1.f3bp-1", "0x1.f37p-1", "0x1.f33p-1", "0x1.f2fp-1", "0x1.f2cp-1", "0x1.f28p-1", "0x1.f24p-1", "0x1.f2p-1", "0x1.f1cp-1", "0x1.f19p-1", "0x1.f15p-1", "0x1.f11p-1", "0x1.f0dp-1", "0x1.f0ap-1", "0x1.f06p-1", "0x1.f02p-1", "0x1.efep-1", "0x1.efbp-1", "0x1.ef7p-1", "0x1.ef3p-1", "0x1.eefp-1", "0x1.eecp-1", "0x1.ee8p-1", "0x1.ee4p-1", "0x1.eep-1",
class ML_Exp2(ML_FunctionBasis): function_name = "ml_exp2" def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Exponential, builtin from a default argument mapping overloaded with @p kw """ default_args_exp2 = { "output_file": "ml_exp2.c", "function_name": "ml_exp2", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_exp2.update(kw) return DefaultArgTemplate(**default_args_exp2) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) r_interval = Interval(-0.5, 0.5) local_ulp = sup(ulp(2**r_interval, self.precision)) Log.report(Log.Info, "ulp: ", local_ulp) error_goal = S2**-1 * local_ulp Log.report(Log.Info, "error goal: ", error_goal) sollya_precision = { ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] #Argument Reduction vx_int = NearestInteger(vx, precision=int_precision, tag='vx_int', debug=debug_multi) vx_intf = Conversion(vx_int, precision=self.precision) vx_r = vx - vx_intf vx_r.set_attributes(tag="vx_r", debug=debug_multi) degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2 precision_list = [1] + [self.precision] * degree exp_X = ExponentInsertion(vx_int, tag="exp_X", debug=debug_multi, precision=self.precision) #Polynomial Approx polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_object, poly_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x) - 1, degree, precision_list, r_interval, sollya.absolute) Log.report(Log.Info, "Poly : %s" % poly_object) Log.report(Log.Info, "poly_error : ", poly_error) poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_r, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) #Handling special cases oflow_bound = Constant(self.precision.get_emax() + 1, precision=self.precision) subnormal_bound = self.precision.get_emin_subnormal() uflow_bound = self.precision.get_emin_normal() Log.report(Log.Info, "oflow : ", oflow_bound) #print "uflow : ", uflow_bound #print "sub : ", subnormal_bound test_overflow = Comparison(vx, oflow_bound, specifier=Comparison.GreaterOrEqual) test_overflow.set_attributes(tag="oflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less) test_underflow.set_attributes(tag="uflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_subnormal = Comparison(vx, subnormal_bound, specifier=Comparison.Greater) test_subnormal.set_attributes(tag="sub_test", debug=debug_multi, likely=False, precision=ML_Bool) subnormal_offset = -(uflow_bound - vx_int) subnormal_offset.set_attributes(tag="offset", debug=debug_multi) exp_offset = ExponentInsertion(subnormal_offset, precision=self.precision, debug=debug_multi, tag="exp_offset") exp_min = ExponentInsertion(uflow_bound, precision=self.precision, debug=debug_multi, tag="exp_min") subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min test_std = LogicalOr(test_overflow, test_underflow, precision=ML_Bool, tag="std_test", likely=False) #Reconstruction result = exp_X * poly + exp_X result.set_attributes(tag="result", debug=debug_multi) C0 = Constant(0, precision=self.precision) return_inf = Return(FP_PlusInfty(self.precision)) return_C0 = Return(C0) return_sub = Return(subnormal_result) return_std = Return(result) non_std_statement = Statement( ConditionBlock( test_overflow, return_inf, ConditionBlock(test_subnormal, return_sub, return_C0))) scheme = Statement( ConditionBlock(test_std, non_std_statement, return_std)) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_exp" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): return sollya.SollyaObject(2)**(input_value) standard_test_cases = [[ sollya.parse(x) ] for x in ["0x1.ffead1bac7ad2p+9", "-0x1.ee9cb4p+1", "-0x1.db0928p+3"]]