def generate_approx_poly_near_zero(self, function, high_bound, error_bound, variable): """ Generate polynomial approximation scheme """ error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai) # Some issues encountered when 0 is one of the interval bound # so we use a symetric interval around it approx_interval = Interval(2**-100, high_bound) local_function = function / sollya.x degree = sollya.sup( sollya.guessdegree(local_function, approx_interval, error_bound)) degree_list = range(0, int(degree) + 4, 2) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function / sollya.x, degree_list, [1] + [self.precision] * (len(degree_list) - 1), approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "approximation poly: {}\n with error {}".format( poly_object, approx_error)) poly_scheme = Multiplication( variable, PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, variable, self.precision)) return poly_scheme, approx_error
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) # The main table is indexed by the 7 most significant bits # of the mantissa table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debuglld) # argument reduction # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) _red_vx = FMA(arg_red_index, _vx_mant, -1.0) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-inv_approx_table.index_size red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() * 1.1))) + 1 sollya.settings.display = sollya.hexadecimal global_poly_object, approx_error = Polynomial.build_from_approximation_with_error( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute, error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai)) Log.report( Log.Info, "poly_degree={}, approx_error={}".format( poly_degree, approx_error)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) #poly_object = global_poly_object.sub_poly(start_index=0,offset=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) Log.report(Log.Verbose, "generating polynomial evaluation scheme") pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly = FMA(pre_poly, _red_vx, global_poly_object.get_cst_coeff(0, self.precision)) _poly.set_attributes(tag="poly", debug=debug_lftolx) Log.report( Log.Verbose, "sollya global_poly_object: {}".format( global_poly_object.get_sollya_object())) Log.report( Log.Verbose, "sollya poly_object: {}".format( poly_object.get_sollya_object())) corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision) exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme
def piecewise_approximation(function, variable, precision, bound_low=-1.0, bound_high=1.0, num_intervals=16, max_degree=2, error_threshold=S2**-24, odd=False, even=False): """ Generate a piecewise approximation :param function: function to be approximated :type function: SollyaObject :param variable: input variable :type variable: Variable :param precision: variable's format :type precision: ML_Format :param bound_low: lower bound for the approximation interval :param bound_high: upper bound for the approximation interval :param num_intervals: number of sub-interval / sub-division of the main interval :param max_degree: maximum degree for an approximation on any sub-interval :param error_threshold: error bound for an approximation on any sub-interval :return: pair (scheme, error) where scheme is a graph node for an approximation scheme of function evaluated at variable, and error is the maximum approximation error encountered :rtype tuple(ML_Operation, SollyaObject): """ degree_generator = piecewise_approximation_degree_generator( function, bound_low, bound_high, num_intervals=num_intervals, error_threshold=error_threshold, ) degree_list = list(degree_generator) # if max_degree is None then we determine it locally if max_degree is None: max_degree = max(degree_list) # table to store coefficients of the approximation on each segment coeff_table = ML_NewTable( dimensions=[num_intervals, max_degree + 1], storage_precision=precision, tag="coeff_table", const=True # by default all approximation coeff table are const ) error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai) max_approx_error = 0.0 interval_size = (bound_high - bound_low) / num_intervals for i in range(num_intervals): subint_low = bound_low + i * interval_size subint_high = bound_low + (i + 1) * interval_size local_function = function(sollya.x + subint_low) local_interval = Interval(-interval_size, interval_size) local_degree = degree_list[i] if local_degree > max_degree: Log.report( Log.Warning, "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation", local_degree, max_degree) # as max_degree defines the size of the table we can use # it as the degree for each sub-interval polynomial # as there is nothing to gain (yet) by using a smaller polynomial degree = max_degree # min(max_degree, local_degree) if function(subint_low) == 0.0: # if the lower bound is a zero to the function, we # need to force value=0 for the constant coefficient # and extend the approximation interval local_poly_degree_list = list( range(1 if even else 0, degree + 1, 2 if odd or even else 1)) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function(sollya.x) / sollya.x, local_poly_degree_list, [precision] * len(local_poly_degree_list), Interval(-subint_high * 0.95, subint_high), sollya.absolute, error_function=error_function) # multiply by sollya.x poly_object = poly_object.sub_poly(offset=-1) else: try: poly_object, approx_error = Polynomial.build_from_approximation_with_error( local_function, degree, [precision] * (degree + 1), local_interval, sollya.absolute, error_function=error_function) except SollyaError as err: # try to see if function is constant on the interval (possible # failure cause for fpminmax) cst_value = precision.round_sollya_object( function(subint_low), sollya.RN) accuracy = error_threshold diff_with_cst_range = sollya.supnorm(cst_value, local_function, local_interval, sollya.absolute, accuracy) diff_with_cst = sup(abs(diff_with_cst_range)) if diff_with_cst < error_threshold: Log.report(Log.Info, "constant polynomial detected") poly_object = Polynomial([function(subint_low)] + [0] * degree) approx_error = diff_with_cst else: Log.report( Log.error, "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ", degree, i, diff_with_cst, error_threshold, error=err) for ci in range(max_degree + 1): if ci in poly_object.coeff_map: coeff_table[i][ci] = poly_object.coeff_map[ci] else: coeff_table[i][ci] = 0.0 if approx_error > error_threshold: Log.report( Log.Warning, "piecewise_approximation on index {} exceeds error threshold: {} > {}", i, approx_error, error_threshold) max_approx_error = max(max_approx_error, abs(approx_error)) # computing offset diff = Subtraction(variable, Constant(bound_low, precision=precision), tag="diff", debug=debug_multi, precision=precision) int_prec = precision.get_integer_format() # delta = bound_high - bound_low delta_ratio = Constant(num_intervals / (bound_high - bound_low), precision=precision) # computing table index # index = nearestint(diff / delta * <num_intervals>) index = Max(0, Min( NearestInteger( Multiplication(diff, delta_ratio, precision=precision), precision=int_prec, ), num_intervals - 1), tag="index", debug=debug_multi, precision=int_prec) poly_var = Subtraction(diff, Multiplication( Conversion(index, precision=precision), Constant(interval_size, precision=precision)), precision=precision, tag="poly_var", debug=debug_multi) # generating indexed polynomial coeffs = [(ci, TableLoad(coeff_table, index, ci)) for ci in range(max_degree + 1)][::-1] poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2( coeffs, poly_var, precision, {}, precision) return poly_scheme, max_approx_error
def generate_scalar_scheme(self, vx): # approximation the gamma function abs_vx = Abs(vx, precision=self.precision) FCT_LIMIT = 1.0 omega_value = self.precision.get_omega() def sollya_wrap_bigfloat_fct(bfct): """ wrap bigfloat's function <bfct> such that is can be used on SollyaObject inputs and returns SollyaObject results """ def fct(x): return sollya.SollyaObject(bfct(SollyaObject(x).bigfloat())) return fct sollya_gamma = sollya_wrap_bigfloat_fct(bigfloat.gamma) sollya_digamma = sollya_wrap_bigfloat_fct(bigfloat.digamma) # first derivative of gamma is digamma * gamma bigfloat_gamma_d0 = lambda x: bigfloat.gamma(x) * bigfloat.digamma(x) sollya_gamma_d0 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d0) # approximating trigamma with straightforward derivatives formulae of digamma U = 2**-64 bigfloat_trigamma = lambda x: ( (bigfloat.digamma(x * (1 + U)) - bigfloat.digamma(x)) / (x * U)) sollya_trigamma = sollya_wrap_bigfloat_fct(bigfloat_trigamma) bigfloat_gamma_d1 = lambda x: (bigfloat_trigamma(x) * bigfloat.gamma( x) + bigfloat_gamma_d0(x) * bigfloat.digamma(x)) sollya_gamma_d1 = sollya_wrap_bigfloat_fct(bigfloat_gamma_d1) def sollya_gamma_fct(x, diff_order, prec): """ wrapper to use bigfloat implementation of exponential rather than sollya's implementation directly. This wrapper implements sollya's function API. :param x: numerical input value (may be an Interval) :param diff_order: differential order :param prec: numerical precision expected (min) """ fct = None if diff_order == 0: fct = sollya_gamma elif diff_order == 1: fct = sollya_gamma_d0 elif diff_order == 2: fct = sollya_gamma_d1 else: raise NotImplementedError with bigfloat.precision(prec): if x.is_range(): lo = sollya.inf(x) hi = sollya.sup(x) return sollya.Interval(fct(lo), fct(hi)) else: return fct(x) # search the lower x such that gamma(x) >= omega omega_upper_limit = search_bound_threshold(sollya_gamma, omega_value, 2, 1000.0, self.precision) Log.report(Log.Debug, "gamma(x) = {} limit is {}", omega_value, omega_upper_limit) # evaluate gamma(<min-normal-value>) lower_x_bound = self.precision.get_min_normal_value() value_min = sollya_gamma(lower_x_bound) Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound, value_min, int(sollya.log2(value_min))) # evaluate gamma(<min-subnormal-value>) lower_x_bound = self.precision.get_min_subnormal_value() value_min = sollya_gamma(lower_x_bound) Log.report(Log.Debug, "gamma({}) = {}(log2={})", lower_x_bound, value_min, int(sollya.log2(value_min))) # Gamma is defined such that gamma(x+1) = x * gamma(x) # # we approximate gamma over [1, 2] # y in [1, 2] # gamma(y) = (y-1) * gamma(y-1) # gamma(y-1) = gamma(y) / (y-1) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(1, 2) approx_fct = sollya.function(sollya_gamma_fct) poly_degree = int( sup( guessdegree(approx_fct, approx_interval, S2** -(self.precision.get_field_size() + 5)))) + 1 Log.report(Log.Debug, "approximation's poly degree over [1, 2] is {}", poly_degree) sys.exit(1) poly_degree_list = list(range(1, poly_degree, 2)) Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree, poly_degree_list) global_poly_object = Polynomial.build_from_approximation( approx_fct, poly_degree_list, [self.precision] * len(poly_degree_list), approx_interval, sollya.relative) Log.report( Log.Debug, "inform is {}", dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(), approx_interval)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) ext_precision = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, }[self.precision] pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, abs_vx, unified_precision=self.precision) result = FMA(pre_poly, abs_vx, abs_vx) result.set_attributes(tag="result", debug=debug_multi) eps_target = S2**-(self.precision.get_field_size() + 5) def offset_div_function(fct): return lambda offset: fct(sollya.x + offset) # empiral numbers field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision] near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision) near_approx = generic_poly_split(offset_div_function(sollya.erf), near_indexing, eps_target, self.precision, abs_vx) near_approx.set_attributes(tag="near_approx", debug=debug_multi) def offset_function(fct): return lambda offset: fct(sollya.x + offset) medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision) medium_approx = generic_poly_split(offset_function(sollya.erf), medium_indexing, eps_target, self.precision, abs_vx) medium_approx.set_attributes(tag="medium_approx", debug=debug_multi) # approximation for positive values scheme = ConditionBlock( abs_vx < eps, Return(result), ConditionBlock( abs_vx < near_indexing.get_max_bound(), Return(near_approx), ConditionBlock(abs_vx < medium_indexing.get_max_bound(), Return(medium_approx), Return(Constant(1.0, precision=self.precision))))) return scheme
def generate_scalar_scheme(self, vx): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") index_size = 5 comp_lo = (vx < 0) comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool) sign = Select(comp_lo, -1, 1, precision = self.precision) # as sinh is an odd function, we can simplify the input to its absolute # value once the sign has been extracted vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2)/2**index_size inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN) inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN) log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True) r_hi = vx - k_log2 r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag = "r", debug = debug_multi) if is_gappa_installed(): r_eval_error = self.get_eval_error(r_hi, variable_copy_map = { vx: Variable("vx", interval = Interval(0, 715), precision = self.precision), k: Variable("k", interval = Interval(0, 1024), precision = self.precision) }) Log.report(Log.Verbose, "r_eval_error: ", r_eval_error) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3 precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi) exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() - 8) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value)* 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN) neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # sinh(x) = 1/2 * (exp(x) - exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function) Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error)))) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision) poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi) poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision) poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) # 2^(h-1) pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi) # 2^(-h-1) pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi) hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi) pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi) neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi) result = Addition( Subtraction( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi ) # ov_value ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater) # main scheme scheme = Statement( Return( Select( ov_flag, sign*FP_PlusInfty(self.precision), sign*result ))) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def SincosRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 8 cw_hi_precision = self.precision.get_field_size() - 4 ext_precision = { ML_Binary32: ML_Binary64, ML_Binary64: ML_Binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] if self.precision is ML_Binary32: ph_bound = S2**10 else: ph_bound = S2**33 test_ph_bound = Comparison(vx, ph_bound, specifier=Comparison.GreaterOrEqual, precision=ML_Bool, likely=False) # argument reduction # m frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision] C0 = Constant(0, precision=int_precision) C1 = Constant(1, precision=int_precision) C_offset = Constant(3 * S2**(frac_pi_index - 1), precision=int_precision) # 2^m / pi frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN) frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision, sollya.RN) # pi / 2^m, high part inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN) # pi / 2^m, low part inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k vx.set_attributes(tag="vx", debug=debug_multi) vx_pi = Addition(Multiplication(vx, Constant(frac_pi, precision=self.precision), precision=self.precision), Multiplication(vx, Constant(frac_pi_lo, precision=self.precision), precision=self.precision), precision=self.precision, tag="vx_pi", debug=debug_multi) k = NearestInteger(vx_pi, precision=int_precision, tag="k", debug=debug_multi) # k in floating-point precision fk = Conversion(k, precision=self.precision, tag="fk", debug=debug_multi) inv_frac_pi_cst = Constant(inv_frac_pi, tag="inv_frac_pi", precision=self.precision, debug=debug_multi) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag="inv_frac_pi_lo", precision=self.precision, debug=debug_multi) # Cody-Waite reduction red_coeff1 = Multiplication(fk, inv_frac_pi_cst, precision=self.precision, exact=True) red_coeff2 = Multiplication(Negation(fk, precision=self.precision), inv_frac_pi_lo_cst, precision=self.precision, exact=True) # Should be exact / Sterbenz' Lemma pre_sub_mul = Subtraction(vx, red_coeff1, precision=self.precision, exact=True) # Fast2Sum s = Addition(pre_sub_mul, red_coeff2, precision=self.precision, unbreakable=True, tag="s", debug=debug_multi) z = Subtraction(s, pre_sub_mul, precision=self.precision, unbreakable=True, tag="z", debug=debug_multi) t = Subtraction(red_coeff2, z, precision=self.precision, unbreakable=True, tag="t", debug=debug_multi) red_vx_std = Addition(s, t, precision=self.precision) red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi) # To compute sine we offset x by 3pi/2 # which means add 3 * S2^(frac_pi_index-1) to k if self.sin_output: Log.report(Log.Info, "Computing Sin") offset_k = Addition(k, C_offset, precision=int_precision, tag="offset_k") else: Log.report(Log.Info, "Computing Cos") offset_k = k modk = Variable("modk", precision=int_precision, var_type=Variable.Local) red_vx = Variable("red_vx", precision=self.precision, var_type=Variable.Local) # Faster modulo using bitwise logic modk_std = BitLogicAnd(offset_k, 2**(frac_pi_index + 1) - 1, precision=int_precision, tag="modk", debug=debug_multi) approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)), pi / S2**(frac_pi_index + 1)) red_vx.set_interval(approx_interval) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) Log.report(Log.Info, "building tabulated approximation for sin and cos") error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme table_index_size = frac_pi_index + 1 cos_table = ML_NewTable(dimensions=[2**table_index_size, 1], storage_precision=self.precision, tag=self.uniquify_name("cos_table")) for i in range(2**(frac_pi_index + 1)): local_x = i * pi / S2**frac_pi_index cos_local = round(cos(local_x), self.precision.get_sollya_object(), sollya.RN) cos_table[i][0] = cos_local sin_index = Modulo(modk + 2**(frac_pi_index - 1), 2**(frac_pi_index + 1), precision=int_precision, tag="sin_index") #, debug = debug_multi) tabulated_cos = TableLoad(cos_table, modk, C0, precision=self.precision, tag="tab_cos", debug=debug_multi) tabulated_sin = -TableLoad(cos_table, sin_index, C0, precision=self.precision, tag="tab_sin", debug=debug_multi) poly_degree_cos = sup( guessdegree(cos(sollya.x), approx_interval, S2** -self.precision.get_precision()) + 2) poly_degree_sin = sup( guessdegree( sin(sollya.x) / sollya.x, approx_interval, S2** -self.precision.get_precision()) + 2) poly_degree_cos_list = range(0, int(poly_degree_cos) + 3) poly_degree_sin_list = range(0, int(poly_degree_sin) + 3) # cosine polynomial: limiting first and second coefficient precision to 1-bit poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list) # sine polynomial: limiting first coefficient precision to 1-bit poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building mathematical polynomials for sin and cos") # Polynomial approximations Log.report(Log.Info, "cos") poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error( cos(sollya.x), poly_degree_cos_list, poly_cos_prec_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "sin") poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error( sin(sollya.x), poly_degree_sin_list, poly_sin_prec_list, approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "poly error cos: {} / {:d}".format( poly_error_cos, int(sollya.log2(poly_error_cos)))) Log.report( Log.Info, "poly error sin: {0} / {1:d}".format( poly_error_sin, int(sollya.log2(poly_error_sin)))) Log.report(Log.Info, "poly cos : %s" % poly_object_cos) Log.report(Log.Info, "poly sin : %s" % poly_object_sin) # Polynomial evaluation scheme poly_cos = polynomial_scheme_builder( poly_object_cos.sub_poly(start_index=1), red_vx, unified_precision=self.precision) poly_sin = polynomial_scheme_builder( poly_object_sin.sub_poly(start_index=2), red_vx, unified_precision=self.precision) poly_cos.set_attributes(tag="poly_cos", debug=debug_multi) poly_sin.set_attributes(tag="poly_sin", debug=debug_multi, unbreakable=True) # TwoProductFMA mul_cos_x = tabulated_cos * poly_cos mul_cos_y = FusedMultiplyAdd(tabulated_cos, poly_cos, -mul_cos_x, precision=self.precision) mul_sin_x = tabulated_sin * poly_sin mul_sin_y = FusedMultiplyAdd(tabulated_sin, poly_sin, -mul_sin_x, precision=self.precision) mul_coeff_sin_hi = tabulated_sin * red_vx mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx, -mul_coeff_sin_hi) mul_cos = Addition(mul_cos_x, mul_cos_y, precision=self.precision, tag="mul_cos") #, debug = debug_multi) mul_sin = Negation(Addition(mul_sin_x, mul_sin_y, precision=self.precision), precision=self.precision, tag="mul_sin") #, debug = debug_multi) mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi, mul_coeff_sin_lo, precision=self.precision), precision=self.precision, tag="mul_coeff_sin") #, debug = debug_multi) mul_cos_x.set_attributes( tag="mul_cos_x", precision=self.precision) #, debug = debug_multi) mul_cos_y.set_attributes( tag="mul_cos_y", precision=self.precision) #, debug = debug_multi) mul_sin_x.set_attributes( tag="mul_sin_x", precision=self.precision) #, debug = debug_multi) mul_sin_y.set_attributes( tag="mul_sin_y", precision=self.precision) #, debug = debug_multi) cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos) cos_eval_d_1.set_attributes(tag="cos_eval_d_1", precision=self.precision, debug=debug_multi) result_1 = Statement(Return(cos_eval_d_1)) ####################################################################### # LARGE ARGUMENT MANAGEMENT # # (lar: Large Argument Reduction) # ####################################################################### # payne and hanek argument reduction for large arguments ph_k = frac_pi_index ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN) ph_inv_frac_pi = pi / S2**ph_k ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx, ph_frac_pi, self.precision, n=100, k=ph_k) # assigning Large Argument Reduction reduced variable lar_vx = Variable("lar_vx", precision=self.precision, var_type=Variable.Local) lar_red_vx = Addition(Multiplication(lar_vx, inv_frac_pi, precision=self.precision), Multiplication(lar_vx, inv_frac_pi_lo, precision=self.precision), precision=self.precision, tag="lar_red_vx", debug=debug_multi) C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32") ph_acc_int_red = Select(ph_acc_int < C0, C32 + ph_acc_int, ph_acc_int, precision=int_precision, tag="ph_acc_int_red") if self.sin_output: lar_offset_k = Addition(ph_acc_int_red, C_offset, precision=int_precision, tag="lar_offset_k") else: lar_offset_k = ph_acc_int_red ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi) lar_modk = BitLogicAnd(lar_offset_k, 2**(frac_pi_index + 1) - 1, precision=int_precision, tag="lar_modk", debug=debug_multi) lar_statement = Statement(ph_statement, ReferenceAssign(lar_vx, ph_acc, debug=debug_multi), ReferenceAssign(red_vx, lar_red_vx, debug=debug_multi), ReferenceAssign(modk, lar_modk), prevent_optimization=True) test_NaN_or_Inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, tag="NaN_or_Inf", debug=debug_multi) return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision))) scheme = ConditionBlock( test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf), Statement( modk, red_vx, ConditionBlock( test_ph_bound, lar_statement, Statement( ReferenceAssign(modk, modk_std), ReferenceAssign(red_vx, red_vx_std), )), result_1)) return scheme
def generate_scalar_scheme(self, vx): abs_vx = Abs(vx, precision=self.precision) FCT_LIMIT = 1.0 one_limit = search_bound_threshold(sollya.erf, FCT_LIMIT, 1.0, 10.0, self.precision) one_limit_exp = int(sollya.floor(sollya.log2(one_limit))) Log.report(Log.Debug, "erf(x) = 1.0 limit is {}, with exp={}", one_limit, one_limit_exp) upper_approx_bound = 10 # empiral numbers eps_exp = {ML_Binary32: -3, ML_Binary64: -5}[self.precision] eps = S2**eps_exp Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(0, eps) # fonction to approximate is erf(x) / x # it is an even function erf(x) / x = erf(-x) / (-x) approx_fct = sollya.erf(sollya.x) - (sollya.x) poly_degree = int( sup( guessdegree(approx_fct, approx_interval, S2** -(self.precision.get_field_size() + 5)))) + 1 poly_degree_list = list(range(1, poly_degree, 2)) Log.report(Log.Debug, "poly_degree is {} and list {}", poly_degree, poly_degree_list) global_poly_object = Polynomial.build_from_approximation( approx_fct, poly_degree_list, [self.precision] * len(poly_degree_list), approx_interval, sollya.relative) Log.report( Log.Debug, "inform is {}", dirtyinfnorm(approx_fct - global_poly_object.get_sollya_object(), approx_interval)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) ext_precision = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, }[self.precision] pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, abs_vx, unified_precision=self.precision) result = FMA(pre_poly, abs_vx, abs_vx) result.set_attributes(tag="result", debug=debug_multi) eps_target = S2**-(self.precision.get_field_size() + 5) def offset_div_function(fct): return lambda offset: fct(sollya.x + offset) # empiral numbers field_size = {ML_Binary32: 6, ML_Binary64: 8}[self.precision] near_indexing = SubFPIndexing(eps_exp, 0, 6, self.precision) near_approx = generic_poly_split(offset_div_function(sollya.erf), near_indexing, eps_target, self.precision, abs_vx) near_approx.set_attributes(tag="near_approx", debug=debug_multi) def offset_function(fct): return lambda offset: fct(sollya.x + offset) medium_indexing = SubFPIndexing(1, one_limit_exp, 7, self.precision) medium_approx = generic_poly_split(offset_function(sollya.erf), medium_indexing, eps_target, self.precision, abs_vx) medium_approx.set_attributes(tag="medium_approx", debug=debug_multi) # approximation for positive values scheme = ConditionBlock( abs_vx < eps, Return(result), ConditionBlock( abs_vx < near_indexing.get_max_bound(), Return(near_approx), ConditionBlock(abs_vx < medium_indexing.get_max_bound(), Return(medium_approx), Return(Constant(1.0, precision=self.precision))))) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) table_size_log = self.table_size_log integer_size = 31 integer_precision = ML_Int32 max_bound = sup(abs(self.input_intervals[0])) max_bound_log = int(ceil(log2(max_bound))) Log.report(Log.Info, "max_bound_log=%s " % max_bound_log) scaling_power = integer_size - max_bound_log Log.report(Log.Info, "scaling power: %s " % scaling_power) storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True) Log.report(Log.Info, "tabulating cosine and sine") # cosine and sine fused table fused_table = ML_NewTable( dimensions=[2**table_size_log, 2], storage_precision=storage_precision, tag="fast_lib_shared_table") # self.uniquify_name("cossin_table")) # filling table for i in range(2**table_size_log): local_x = i / S2**table_size_log * S2**max_bound_log cos_local = cos( local_x ) # nearestint(cos(local_x) * S2**storage_precision.get_frac_size()) sin_local = sin( local_x ) # nearestint(sin(local_x) * S2**storage_precision.get_frac_size()) fused_table[i][0] = cos_local fused_table[i][1] = sin_local # argument reduction evaluation scheme # scaling_factor = Constant(S2**scaling_power, precision = self.precision) red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power, scaling_power, signed=True) Log.report( Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" % red_vx_precision.get_c_bit_size()) # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision) red_vx = Conversion(vx, precision=red_vx_precision, tag="red_vx", debug=debug_fixed32) computation_precision = red_vx_precision # self.precision output_precision = self.get_output_precision() Log.report(Log.Info, "computation_precision is %s" % computation_precision) Log.report(Log.Info, "storage_precision is %s" % storage_precision) Log.report(Log.Info, "output_precision is %s" % output_precision) hi_mask_value = 2**32 - 2**(32 - table_size_log - 1) hi_mask = Constant(hi_mask_value, precision=ML_Int32) Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value) red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32), hi_mask, precision=ML_Int32, tag="red_vx_hi_int", debug=debugd) red_vx_hi = TypeCast(red_vx_hi_int, precision=red_vx_precision, tag="red_vx_hi", debug=debug_fixed32) red_vx_lo = red_vx - red_vx_hi red_vx_lo.set_attributes(precision=red_vx_precision, tag="red_vx_lo", debug=debug_fixed32) table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32), scaling_power - (table_size_log - max_bound_log), precision=ML_Int32, tag="table_index", debug=debugd) tabulated_cos = TableLoad(fused_table, table_index, 0, tag="tab_cos", precision=storage_precision, debug=debug_fixed32) tabulated_sin = TableLoad(fused_table, table_index, 1, tag="tab_sin", precision=storage_precision, debug=debug_fixed32) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building polynomial approximation for cosine") # cosine polynomial approximation poly_interval = Interval(0, S2**(max_bound_log - table_size_log)) Log.report(Log.Info, "poly_interval=%s " % poly_interval) cos_poly_degree = 2 # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal))) Log.report(Log.Verbose, "cosine polynomial approximation") cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error( cos(sollya.x), [0, 2], [0] + [computation_precision.get_bit_size()], poly_interval, sollya.absolute, error_function=error_function) #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision) Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error) cos_coeff_list = cos_poly_object.get_ordered_coeff_list() coeff_C0 = cos_coeff_list[0][1] coeff_C2 = Constant(cos_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) Log.report(Log.Info, "building polynomial approximation for sine") # sine polynomial approximation sin_poly_degree = 2 # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal))) Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree) Log.report(Log.Verbose, "sine polynomial approximation") sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error( sin(sollya.x) / sollya.x, [0, 2], [0] + [computation_precision.get_bit_size()] * (sin_poly_degree + 1), poly_interval, sollya.absolute, error_function=error_function) sin_coeff_list = sin_poly_object.get_ordered_coeff_list() coeff_S0 = sin_coeff_list[0][1] coeff_S2 = Constant(sin_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) # scheme selection between sine and cosine if self.cos_output: scheme = self.generate_cos_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) else: scheme = self.generate_sin_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) result = Conversion(scheme, precision=self.get_output_precision()) Log.report( Log.Verbose, "result operation tree :\n %s " % result.get_str( display_precision=True, depth=None, memoization_map={})) scheme = Statement(Return(result)) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = Abs(self.implementation.add_input_variable("x", self.precision), tag = "vx") Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) debug_precision = {ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx}[self.precision] test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf") test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test") test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign") test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan") return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 3 # argument reduction frac_pi_index = 3 frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN) inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN) inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k = E(x * frac_pi) vx_pi = Multiplication(vx, frac_pi, precision = self.precision) k = NearestInteger(vx_pi, precision = ML_Int32, tag = "k", debug = True) fk = Conversion(k, precision = self.precision, tag = "fk") inv_frac_pi_cst = Constant(inv_frac_pi, tag = "inv_frac_pi", precision = self.precision) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag = "inv_frac_pi_lo", precision = self.precision) red_vx_hi = (vx - inv_frac_pi_cst * fk) red_vx_hi.set_attributes(tag = "red_vx_hi", debug = debug_precision, precision = self.precision) red_vx_lo_sub = inv_frac_pi_lo_cst * fk red_vx_lo_sub.set_attributes(tag = "red_vx_lo_sub", debug = debug_precision, unbreakable = True, precision = self.precision) vx_d = Conversion(vx, precision = ML_Binary64, tag = "vx_d") pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk) pre_red_vx_d_hi.set_attributes(tag = "pre_red_vx_d_hi", precision = ML_Binary64, debug = debug_lftolx) pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d.set_attributes(tag = "pre_red_vx_d", debug = debug_lftolx, precision = ML_Binary64) modk = Modulo(k, 2**(frac_pi_index+1), precision = ML_Int32, tag = "switch_value", debug = True) sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index-1)), 2**(frac_pi_index-1)) red_vx = Select(sel_c, -pre_red_vx, pre_red_vx) red_vx.set_attributes(tag = "red_vx", debug = debug_precision, precision = self.precision) red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d) red_vx_d.set_attributes(tag = "red_vx_d", debug = debug_lftolx, precision = ML_Binary64) approx_interval = Interval(-pi/(S2**(frac_pi_index+1)), pi / S2**(frac_pi_index+1)) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) error_goal_approx = S2**-self.precision.get_precision() Log.report(Log.Info, "building mathematical polynomial") poly_degree_vector = [None] * 2**(frac_pi_index+1) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme index_relative = [] poly_object_vector = [None] * 2**(frac_pi_index+1) for i in range(2**(frac_pi_index+1)): sub_func = cos(sollya.x+i*pi/S2**frac_pi_index) degree = int(sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1 degree_list = range(degree+1) a_interval = approx_interval if i == 0: # ad-hoc, TODO: to be cleaned degree = 6 degree_list = range(0, degree+1, 2) elif i % 2**(frac_pi_index) == 2**(frac_pi_index-1): # for pi/2 and 3pi/2, an approx to sin=cos(pi/2+x) # must be generated degree_list = range(1, degree+1, 2) if i == 3 or i == 5 or i == 7 or i == 9: precision_list = [sollya.binary64] + [sollya.binary32] *(degree) else: precision_list = [sollya.binary32] * (degree+1) poly_degree_vector[i] = degree constraint = sollya.absolute delta = (2**(frac_pi_index - 3)) centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index-1) if centered_i < delta and centered_i > -delta and centered_i != 0: constraint = sollya.relative index_relative.append(i) Log.report(Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index+1))) poly_object_vector[i], _ = Polynomial.build_from_approximation_with_error(sub_func, degree_list, precision_list, a_interval, constraint, error_function = error_function) # unified power map for red_sx^n upm = {} rel_error_list = [] poly_scheme_vector = [None] * (2**(frac_pi_index+1)) for i in range(2**(frac_pi_index+1)): poly_object = poly_object_vector[i] poly_precision = self.precision if i == 3 or i == 5 or i == 7 or i == 9: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) poly_hi = (c0 + c1 * red_vx) poly_hi.set_precision(ML_Binary64) red_vx_d_2 = red_vx_d * red_vx_d poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(poly_object.sub_poly(start_index = 2, offset = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(unbreakable = True) elif i == 4: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = ML_Binary64) poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_precision(ML_Binary64) else: poly_scheme = polynomial_scheme_builder(poly_object, red_vx, unified_precision = poly_precision, power_map_ = upm) #if i == 3: # c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision) # c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) # poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(tag = "poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug = debug_precision) poly_scheme_vector[i] = poly_scheme #try: if is_gappa_installed() and i == 3: opt_scheme = self.opt_engine.optimization_process(poly_scheme, self.precision, copy = True, fuse_fma = self.fuse_fma) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, }
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 approx_interval = Interval(0.0, 2**-index_size) error_goal_approx = 2**-(self.precision.get_precision()) int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] vx_int = Floor(vx * 2**index_size, precision=self.precision, tag="vx_int", debug=debug_multi) vx_frac = vx - (vx_int * 2**-index_size) vx_frac.set_attributes(tag="vx_frac", debug=debug_multi, unbreakable=True) poly_degree = sup( guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1 precision_list = [1] + [self.precision] * (poly_degree) vx_integer = Conversion(vx_int, precision=int_precision, tag="vx_integer", debug=debug_multi) vx_int_hi = BitLogicRightShift(vx_integer, Constant(index_size), tag="vx_int_hi", debug=debug_multi) vx_int_lo = Modulo(vx_integer, 2**index_size, tag="vx_int_lo", debug=debug_multi) pow_exp = ExponentInsertion(Conversion(vx_int_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp2_table = ML_Table(dimensions=[2 * 2**index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i exp2_value = SollyaObject(2)**((input_value) * 2**-index_size) hi_value = round(exp2_value, self.precision.get_sollya_object(), RN) lo_value = round(exp2_value - hi_value, self.precision.get_sollya_object(), RN) exp2_table[i][0] = lo_value exp2_table[i][1] = hi_value error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print "poly_approx_error: ", poly_approx_error, float( log2(poly_approx_error)) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_frac, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) table_index = Addition(vx_int_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) lo_value_load = TableLoad(exp2_table, table_index, 0, tag="lo_value_load", debug=debug_multi) hi_value_load = TableLoad(exp2_table, table_index, 1, tag="hi_value_load", debug=debug_multi) result = (hi_value_load + (hi_value_load * poly + (lo_value_load + lo_value_load * poly))) * pow_exp ov_flag = Comparison(vx_int_hi, Constant(self.precision.get_emax(), precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = Abs(self.implementation.add_input_variable("x", self.precision), tag="vx") Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) debug_precision = { ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx }[self.precision] test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 3 # argument reduction frac_pi_index = 3 frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN) inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN) inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k = E(x * frac_pi) vx_pi = Multiplication(vx, frac_pi, precision=self.precision) k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True) fk = Conversion(k, precision=self.precision, tag="fk") inv_frac_pi_cst = Constant(inv_frac_pi, tag="inv_frac_pi", precision=self.precision) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag="inv_frac_pi_lo", precision=self.precision) red_vx_hi = (vx - inv_frac_pi_cst * fk) red_vx_hi.set_attributes(tag="red_vx_hi", debug=debug_precision, precision=self.precision) red_vx_lo_sub = inv_frac_pi_lo_cst * fk red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub", debug=debug_precision, unbreakable=True, precision=self.precision) vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d") pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk) pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi", precision=ML_Binary64, debug=debug_lftolx) pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d.set_attributes(tag="pre_red_vx_d", debug=debug_lftolx, precision=ML_Binary64) modk = Modulo(k, 2**(frac_pi_index + 1), precision=ML_Int32, tag="switch_value", debug=True) sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)), 2**(frac_pi_index - 1)) red_vx = Select(sel_c, -pre_red_vx, pre_red_vx) red_vx.set_attributes(tag="red_vx", debug=debug_precision, precision=self.precision) red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d) red_vx_d.set_attributes(tag="red_vx_d", debug=debug_lftolx, precision=ML_Binary64) approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)), pi / S2**(frac_pi_index + 1)) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) error_goal_approx = S2**-self.precision.get_precision() Log.report(Log.Info, "building mathematical polynomial") poly_degree_vector = [None] * 2**(frac_pi_index + 1) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme index_relative = [] poly_object_vector = [None] * 2**(frac_pi_index + 1) for i in range(2**(frac_pi_index + 1)): sub_func = cos(sollya.x + i * pi / S2**frac_pi_index) degree = int( sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1 degree_list = range(degree + 1) a_interval = approx_interval if i == 0: # ad-hoc, TODO: to be cleaned degree = 6 degree_list = range(0, degree + 1, 2) elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1): # for pi/2 and 3pi/2, an approx to sin=cos(pi/2+x) # must be generated degree_list = range(1, degree + 1, 2) if i == 3 or i == 5 or i == 7 or i == 9: precision_list = [sollya.binary64 ] + [sollya.binary32] * (degree) else: precision_list = [sollya.binary32] * (degree + 1) poly_degree_vector[i] = degree constraint = sollya.absolute delta = (2**(frac_pi_index - 3)) centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1) if centered_i < delta and centered_i > -delta and centered_i != 0: constraint = sollya.relative index_relative.append(i) Log.report( Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index + 1))) poly_object_vector[ i], _ = Polynomial.build_from_approximation_with_error( sub_func, degree_list, precision_list, a_interval, constraint, error_function=error_function) # unified power map for red_sx^n upm = {} rel_error_list = [] poly_scheme_vector = [None] * (2**(frac_pi_index + 1)) for i in range(2**(frac_pi_index + 1)): poly_object = poly_object_vector[i] poly_precision = self.precision if i == 3 or i == 5 or i == 7 or i == 9: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * red_vx) poly_hi.set_precision(ML_Binary64) red_vx_d_2 = red_vx_d * red_vx_d poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder( poly_object.sub_poly(start_index=2, offset=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_attributes(unbreakable=True) elif i == 4: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=ML_Binary64) poly_scheme = c1 * red_vx_d + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_precision(ML_Binary64) else: poly_scheme = polynomial_scheme_builder( poly_object, red_vx, unified_precision=poly_precision, power_map_=upm) #if i == 3: # c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision) # c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) # poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(tag="poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug=debug_precision) poly_scheme_vector[i] = poly_scheme #try: if is_gappa_installed() and i == 3: opt_scheme = self.opt_engine.optimization_process( poly_scheme, self.precision, copy=True, fuse_fma=self.fuse_fma) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision=self.precision, interval=approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, } print "opt_scheme" print opt_scheme.get_str(depth=None, display_precision=True, memoization_map={}) eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_scheme, cg_eval_error_copy_map, gappa_filename="red_arg_%d.g" % i) poly_range = cos(approx_interval + i * pi / S2**frac_pi_index) rel_error_list.append(eval_error / poly_range) #for rel_error in rel_error_list: # print sup(abs(rel_error)) #return # case 17 #poly17 = poly_object_vector[17] #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision) #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision) #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) half = 2**frac_pi_index sub_half = 2**(frac_pi_index - 1) # determine if the reduced input is within the second and third quarter (not first nor fourth) # to negate the cosine output factor_cond = BitLogicAnd(BitLogicXor( BitLogicRightShift(modk, frac_pi_index), BitLogicRightShift(modk, frac_pi_index - 1)), 1, tag="factor_cond", debug=True) CM1 = Constant(-1, precision=self.precision) C1 = Constant(1, precision=self.precision) factor = Select(factor_cond, CM1, C1, tag="factor", debug=debug_precision) factor2 = Select(Equal(modk, Constant(sub_half)), CM1, C1, tag="factor2", debug=debug_precision) switch_map = {} if 0: for i in range(2**(frac_pi_index + 1)): switch_map[i] = Return(poly_scheme_vector[i]) else: for i in range(2**(frac_pi_index - 1)): switch_case = (i, half - i) #switch_map[i] = Return(poly_scheme_vector[i]) #switch_map[half-i] = Return(-poly_scheme_vector[i]) if i != 0: switch_case = switch_case + (half + i, 2 * half - i) #switch_map[half+i] = Return(-poly_scheme_vector[i]) #switch_map[2*half-i] = Return(poly_scheme_vector[i]) if poly_scheme_vector[i].get_precision() != self.precision: poly_result = Conversion(poly_scheme_vector[i], precision=self.precision) else: poly_result = poly_scheme_vector[i] switch_map[switch_case] = Return(factor * poly_result) #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half]) #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half]) switch_map[(sub_half, half + sub_half)] = Return( factor2 * poly_scheme_vector[sub_half]) result = SwitchBlock(modk, switch_map) ####################################################################### # LARGE ARGUMENT MANAGEMENT # # (lar: Large Argument Reduction) # ####################################################################### # payne and hanek argument reduction for large arguments #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm" red_func_name = "payne_hanek_fp32_asm" payne_hanek_func_op = FunctionOperator( red_func_name, arg_map={0: FO_Arg(0)}, require_header=["support_lib/ml_red_arg.h"]) payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32], ML_Binary64, payne_hanek_func_op) payne_hanek_func_op.declare_prototype = payne_hanek_func #large_arg_red = FunctionCall(payne_hanek_func, vx) large_arg_red = payne_hanek_func(vx) red_bound = S2**20 cond = Abs(vx) >= red_bound cond.set_attributes(tag="cond", likely=False) lar_neark = NearestInteger(large_arg_red, precision=ML_Int64) lar_modk = Modulo(lar_neark, Constant(16, precision=ML_Int64), tag="lar_modk", debug=True) # Modulo is supposed to be already performed (by payne_hanek_cosfp32) #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64) pre_lar_red_vx = large_arg_red - Conversion(lar_neark, precision=ML_Binary64) pre_lar_red_vx.set_attributes(precision=ML_Binary64, debug=debug_lftolx, tag="pre_lar_red_vx") lar_red_vx = Conversion(pre_lar_red_vx, precision=self.precision, debug=debug_precision, tag="lar_red_vx") lar_red_vx_lo = Conversion( pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64), precision=self.precision) lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo", precision=self.precision) lar_k = 3 # large arg reduction Universal Power Map lar_upm = {} lar_switch_map = {} approx_interval = Interval(-0.5, 0.5) for i in range(2**(lar_k + 1)): frac_pi = pi / S2**lar_k func = cos(frac_pi * i + frac_pi * sollya.x) degree = 6 error_mode = sollya.absolute if i % 2**(lar_k) == 2**(lar_k - 1): # close to sin(x) cases func = -sin(frac_pi * x) if i == 2**(lar_k - 1) else sin(frac_pi * x) degree_list = range(0, degree + 1, 2) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func / x, degree_list, precision_list, approx_interval, error_mode) poly_object = poly_object.sub_poly(offset=-1) else: degree_list = range(degree + 1) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func, degree_list, precision_list, approx_interval, error_mode) if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * lar_red_vx) poly_hi.set_precision(ML_Binary64) pre_poly_scheme = poly_hi + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) pre_poly_scheme.set_attributes(precision=ML_Binary64) poly_scheme = Conversion(pre_poly_scheme, precision=self.precision) elif i == 4 or i == 12: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) c3 = Constant(coeff(poly_object.get_sollya_object(), 3), precision=self.precision) c5 = Constant(coeff(poly_object.get_sollya_object(), 5), precision=self.precision) poly_hi = polynomial_scheme_builder( poly_object.sub_poly(start_index=3), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) poly_hi.set_attributes(tag="poly_lar_%d_hi" % i, precision=ML_Binary64) poly_scheme = Conversion(FusedMultiplyAdd( c1, lar_red_vx, poly_hi, precision=ML_Binary64) + c1 * lar_red_vx_lo, precision=self.precision) else: poly_scheme = polynomial_scheme_builder( poly_object, lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm) poly_scheme.set_attributes(tag="lar_poly_%d" % i, debug=debug_precision) lar_switch_map[(i, )] = Return(poly_scheme) lar_result = SwitchBlock(lar_modk, lar_switch_map) # main scheme #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") # scheme = Statement(ConditionBlock(cond, lar_result, result)) Log.report(Log.Info, "Construction of the initial MDL scheme") scheme = Statement(pre_red_vx_d, red_vx_lo_sub, ConditionBlock(cond, lar_result, result)) return scheme
def piecewise_approximation(function, variable, precision, bound_low=-1.0, bound_high=1.0, num_intervals=16, max_degree=2, error_threshold=sollya.S2**-24): """ To be documented """ # table to store coefficients of the approximation on each segment coeff_table = ML_NewTable(dimensions=[num_intervals, max_degree + 1], storage_precision=precision, tag="coeff_table") error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai) max_approx_error = 0.0 interval_size = (bound_high - bound_low) / num_intervals for i in range(num_intervals): subint_low = bound_low + i * interval_size subint_high = bound_low + (i + 1) * interval_size #local_function = function(sollya.x) #local_interval = Interval(subint_low, subint_high) local_function = function(sollya.x + subint_low) local_interval = Interval(-interval_size, interval_size) local_degree = sollya.guessdegree(local_function, local_interval, error_threshold) degree = min(max_degree, local_degree) if function(subint_low) == 0.0: # if the lower bound is a zero to the function, we # need to force value=0 for the constant coefficient # and extend the approximation interval degree_list = range(1, degree + 1) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function(sollya.x), degree_list, [precision] * len(degree_list), Interval(-subint_high, subint_high), sollya.absolute, error_function=error_function) else: try: poly_object, approx_error = Polynomial.build_from_approximation_with_error( local_function, degree, [precision] * (degree + 1), local_interval, sollya.absolute, error_function=error_function) except SollyaError as err: print("degree: {}".format(degree)) raise err for ci in range(degree + 1): if ci in poly_object.coeff_map: coeff_table[i][ci] = poly_object.coeff_map[ci] else: coeff_table[i][ci] = 0.0 max_approx_error = max(max_approx_error, abs(approx_error)) # computing offset diff = Subtraction(variable, Constant(bound_low, precision=precision), tag="diff", precision=precision) # delta = bound_high - bound_low delta_ratio = Constant(num_intervals / (bound_high - bound_low), precision=precision) # computing table index # index = nearestint(diff / delta * <num_intervals>) index = Max(0, Min( NearestInteger(Multiplication(diff, delta_ratio, precision=precision), precision=ML_Int32), num_intervals - 1), tag="index", debug=True, precision=ML_Int32) poly_var = Subtraction(diff, Multiplication( Conversion(index, precision=precision), Constant(interval_size, precision=precision)), precision=precision, tag="poly_var", debug=True) # generating indexed polynomial coeffs = [(ci, TableLoad(coeff_table, index, ci)) for ci in range(degree + 1)][::-1] poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2( coeffs, poly_var, precision, {}, precision) return poly_scheme, max_approx_error
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 vx = Abs(vx) int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] # argument reduction arg_reg_value = log(2) / 2**index_size inv_log2_value = round(1 / arg_reg_value, self.precision.get_sollya_object(), RN) inv_log2_cst = Constant(inv_log2_value, precision=self.precision, tag="inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision( ) - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), RN) log2_hi_value_cst = Constant(log2_hi_value, tag="log2_hi_value", precision=self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag="log2_lo_value", precision=self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision=self.precision, exact=True, tag="k_log2", unbreakable=True) r_hi = vx - k_log2 r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag="r", debug=debug_multi) r_eval_error = self.get_eval_error( r_hi, variable_copy_map={ vx: Variable("vx", interval=Interval(0, 715), precision=self.precision), k: Variable("k", interval=Interval(0, 1024), precision=int_precision) }) print "r_eval_error: ", r_eval_error approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup( guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision=int_precision, tag="k_integer", debug=debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size), tag="k_int_hi", precision=int_precision, debug=debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag="k_int_lo", precision=int_precision, debug=debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp_table = ML_Table(dimensions=[2 * 2**index_size, 4], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value) * 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value) * 2**-index_size) pos_value_hi = round(exp_value, self.precision.get_sollya_object(), RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), RN) neg_value_hi = round(mexp_value, self.precision.get_sollya_object(), RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # cosh(x) = 1/2 * (exp(x) + exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print "poly_approx_error: ", poly_approx_error, float( log2(poly_approx_error)) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), r, unified_precision=self.precision) poly_pos.set_attributes(tag="poly_pos", debug=debug_multi) poly_neg = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), -r, unified_precision=self.precision) poly_neg.set_attributes(tag="poly_neg", debug=debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag="neg_value_load_hi", debug=debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag="neg_value_load_lo", debug=debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag="pos_value_load_hi", debug=debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag="pos_value_load_lo", debug=debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision) pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision) pos_exp = ( pos_value_load_hi + (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos pos_exp.set_attributes(tag="pos_exp", debug=debug_multi) neg_exp = ( neg_value_load_hi + (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg neg_exp.set_attributes(tag="neg_exp", debug=debug_multi) result = Addition(pos_exp, neg_exp, precision=self.precision, tag="result", debug=debug_multi) # ov_value ov_value = round(acosh(self.precision.get_max_value()), self.precision.get_sollya_object(), RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme