class ML_ExponentialM1_Red(ML_Function("ml_expm1")): def __init__(self, args): # initializing base class ML_FunctionBasis.__init__(self, args) self.accuracy = args.accuracy @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_ExponentialM1_Red, builtin from a default argument mapping overloaded with @p kw """ default_args_expm1 = { "output_file": "my_expm1.c", "function_name": "my_expm1", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_expm1.update(kw) return DefaultArgTemplate(**default_args_expm1) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme def numeric_emulate(self, input_value): return expm1(input_value) standard_test_cases = [[sollya.parse(x)] for x in ["0x1.9b3216p-2", "0x1.8c108p-2"]]
class ML_UT_M128_Debug(ML_Function("ml_ut_m128_debug")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for current class, builtin from a default argument mapping overloaded with @p kw """ default_args = { "output_file": "ut_m128_conversion.c", "function_name": "ut_m128_conversion", "precision": ML_Binary32, "target": X86_AVX2_Processor(), "fast_path_extract": True, "fuse_fma": True, "debug": True, "libm_compliant": True, "pre_gen_passes": ["m128_promotion"], } default_args.update(kw) return DefaultArgTemplate(**default_args) def generate_scheme(self): # declaring function input variable vx = self.implementation.add_input_variable("x", self.precision) add_xx = Addition(vx, vx, precision = self.precision) mult = Multiplication(add_xx, vx, precision = self.precision) cst = Constant(1.1, precision = self.precision) index_size = 4 table_size = 2**index_size table = ML_NewTable( dimensions = [table_size], storage_precision = self.precision ) for i in range(table_size): table[i] = i index = NearestInteger( vx, precision = ML_Int32 ) # index = index % table_size = index & (2**index_size - 1) index = BitLogicAnd( TypeCast(index,precision=ML_UInt32), Constant(2**index_size - 1, precision = ML_UInt32), precision=ML_UInt32, tag="uindex", debug=debug_multi ) index = BitLogicRightShift( TypeCast(index, precision=ML_Int32), Constant(1, precision=ML_Int32), tag="index", debug=debug_multi, precision=ML_Int32 ) table_value = TableLoad(table, index, precision = self.precision) int_tree = Multiplication( index, Addition( index, Constant(7, precision = ML_Int32), precision = ML_Int32 ), precision = ML_Int32 ) result = Multiplication( table_value, FusedMultiplyAdd( Addition( cst, Conversion(int_tree, precision = self.precision), precision=self.precision, debug=debug_multi, tag="fadd" ), mult, add_xx, specifier=FusedMultiplyAdd.Subtract, precision=self.precision, tag="fused", debug=debug_multi ), precision=self.precision, debug=debug_multi, tag="result" ) scheme = Return(result, precision=self.precision, debug=debug_multi) # conv_pass = Pass_M128_Promotion(self.processor) # new_scheme = conv_pass.execute(scheme) return scheme def numeric_emulate(self, x): index = int(sollya.nearestint(x)) % 16 table_value = index >> 1 add_xx = sollya.round(x + x, self.precision.get_sollya_object(), sollya.RN) mult = sollya.round(add_xx * x, self.precision.get_sollya_object(), sollya.RN) cst = sollya.round(1.1, self.precision.get_sollya_object(), sollya.RN) return sollya.round( table_value * sollya.round( sollya.round(cst * mult, self.precision.get_sollya_object(), sollya.RN) - add_xx , self.precision.get_sollya_object(), sollya.RN), self.precision.get_sollya_object(), sollya.RN )
class ML_HyperbolicCosine(ML_Function("ml_cosh")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args=args) @staticmethod def get_default_args(**args): """ Generate a default argument structure set specifically for the Hyperbolic Cosine """ default_cosh_args = { "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor(), "output_file": "my_cosh.c", "function_name": "my_cosh", "language": C_Code, "vector_size": 1 } default_cosh_args.update(args) return DefaultArgTemplate(**default_cosh_args) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2) / 2**index_size inv_log2_value = round(1 / arg_reg_value, self.precision.get_sollya_object(), RN) inv_log2_cst = Constant(inv_log2_value, precision=self.precision, tag="inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision( ) - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), RN) log2_hi_value_cst = Constant(log2_hi_value, tag="log2_hi_value", precision=self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag="log2_lo_value", precision=self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision=self.precision, exact=True, tag="k_log2", unbreakable=True) r_hi = vx - k_log2 r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag="r", debug=debug_multi) r_eval_error = self.get_eval_error( r_hi, variable_copy_map={ vx: Variable("vx", interval=Interval(0, 715), precision=self.precision), k: Variable("k", interval=Interval(0, 1024), precision=self.precision) }) print("r_eval_error: ", r_eval_error) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup( guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision=int_precision, tag="k_integer", debug=debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size), tag="k_int_hi", precision=int_precision, debug=debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag="k_int_lo", precision=int_precision, debug=debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp_table = ML_NewTable(dimensions=[2 * 2**index_size, 4], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() * 2 / 3.0) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value) * 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value) * 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), RN) neg_value_hi = round(mexp_value, reduced_hi_prec, RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # cosh(x) = 1/2 * (exp(x) + exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # # cosh(x) = error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print("poly_approx_error: ", poly_approx_error, float(log2(poly_approx_error))) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), r, unified_precision=self.precision) poly_pos.set_attributes(tag="poly_pos", debug=debug_multi) poly_neg = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), -r, unified_precision=self.precision) poly_neg.set_attributes(tag="poly_neg", debug=debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag="neg_value_load_hi", debug=debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag="neg_value_load_lo", debug=debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag="pos_value_load_hi", debug=debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag="pos_value_load_lo", debug=debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision) pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision) hi_terms = (pos_value_load_hi * pow_exp_pos + neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag="hi_terms") pos_exp = ( pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag="pos_exp", debug=debug_multi) neg_exp = ( neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag="neg_exp", debug=debug_multi) result = Addition(Addition( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi) # ov_value ov_value = round(acosh(self.precision.get_max_value()), self.precision.get_sollya_object(), RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_cosh" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): return cosh(input_value) standard_test_cases = [ sollya_parse(x) for x in [ "1.705527", "0.935715", "-0x1.e45322ap-1", "0x1.b8ef9f54p-1", "-0x1.b8ef9f54p-1", "0x1.b6fdb8a8p-1" ] ]
class ML_Exp2(ML_Function("ml_exp2")): def __init__(self, arg_template=DefaultArgTemplate, precision=ML_Binary32, accuracy=ML_Faithful, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="my_exp2.c", function_name="my_exp2", language=C_Code, vector_size=1): # initializing I/O precision precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_FunctionBasis.__init__(self, base_name="exp2", function_name=function_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, libm_compliant=libm_compliant, processor=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag, language=language, vector_size=vector_size, arg_template=arg_template) self.accuracy = accuracy self.precision = precision def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 approx_interval = Interval(0.0, 2**-index_size) error_goal_approx = 2**-(self.precision.get_precision()) int_precision = self.precision.get_integer_format() vx_int = Floor(vx * 2**index_size, precision=self.precision, tag="vx_int", debug=debug_multi) vx_frac = vx - (vx_int * 2**-index_size) vx_frac.set_attributes(tag="vx_frac", debug=debug_multi, unbreakable=True) poly_degree = sup( guessdegree(2**(sollya.x), approx_interval, error_goal_approx)) + 1 precision_list = [1] + [self.precision] * (poly_degree) vx_integer = Conversion(vx_int, precision=int_precision, tag="vx_integer", debug=debug_multi) vx_int_hi = BitLogicRightShift(vx_integer, Constant(index_size), tag="vx_int_hi", debug=debug_multi) vx_int_lo = Modulo(vx_integer, 2**index_size, tag="vx_int_lo", debug=debug_multi) pow_exp = ExponentInsertion(Conversion(vx_int_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp2_table = ML_NewTable(dimensions=[2 * 2**index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i exp2_value = SollyaObject(2)**((input_value) * 2**-index_size) hi_value = round(exp2_value, self.precision.get_sollya_object(), RN) lo_value = round(exp2_value - hi_value, self.precision.get_sollya_object(), RN) exp2_table[i][0] = lo_value exp2_table[i][1] = hi_value error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) print "poly_approx_error: ", poly_approx_error, float( log2(poly_approx_error)) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_frac, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) table_index = Addition(vx_int_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) lo_value_load = TableLoad(exp2_table, table_index, 0, tag="lo_value_load", debug=debug_multi) hi_value_load = TableLoad(exp2_table, table_index, 1, tag="hi_value_load", debug=debug_multi) result = (hi_value_load + (hi_value_load * poly + (lo_value_load + lo_value_load * poly))) * pow_exp ov_flag = Comparison(vx_int_hi, Constant(self.precision.get_emax(), precision=self.precision), specifier=Comparison.Greater) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_exp" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): return sollya.SollyaObject(2)**(input_value)
class ML_Log10(ML_Function("log10")): def __init__(self, args): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log10, builtin from a default argument mapping overloaded with @p kw """ default_args_log10 = { "output_file": "my_log10f.c", "function_name": "my_log10f", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_log10.update(kw) return DefaultArgTemplate(**default_args_log10) def generate_emulate(self, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ #mpfr_x = emulate_implementation.add_input_variable("x", ML_Mpfr_t) #mpfr_rnd = emulate_implementation.add_input_variable("rnd", ML_Int32) emulate_func_name = "mpfr_log10" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Result(0), 1: FO_Arg(0), 2: FO_Arg(1) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32], ML_Mpfr_t, emulate_func_op) #emulate_func_op.declare_prototype = emulate_func mpfr_call = Statement( ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): #func_implementation = CodeFunction(self.function_name, output_format = self.precision) vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) log2_hi_value = round( log10(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), RN) log2_lo_value = round( log10(2) - log2_hi_value, self.precision.sollya_object, RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = self.precision.get_integer_format() # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 table_index_range = range(1, 2**table_index_size) log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in table_index_range: #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 inv_value = inv_approx_table[i][0] value_high = round( log10(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log10(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low # determining log_table range high_index_function = lambda table, i: table[i][0] low_index_function = lambda table, i: table[i][1] table_high_interval = log_table.get_subset_interval( high_index_function, table_index_range) table_low_interval = log_table.get_subset_interval( low_index_function, table_index_range) def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debuglx), self.precision.get_field_size() - 7, debug=debuglx), 0x7f, tag="table_index", debug=debuglld) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) #if not processor.is_supported_operation(arg_red_index): # if self.precision != ML_Binary32: # arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32, _red_vx = arg_red_index * _vx_mant - 1.0 inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx, interval=red_interval) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) print("building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log10(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log10(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object #.sub_poly(start_index = 1) print("generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_lftolx) print(global_poly_object.get_sollya_object()) corr_exp = Conversion( _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_ddtolx) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo pre_result = -_log_inv_hi + ((_red_vx * _poly + (corr_exp * log2_lo - _log_inv_lo))) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) cancel_part = (corr_exp * log2_hi - _log_inv_hi) cancel_part.set_attributes(tag="cancel_part", debug=debug_lftolx) sub_part = red_vx_hi + cancel_part sub_part.set_attributes(tag="sub_part", debug=debug_lftolx) #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part = ((red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part.set_attributes(tag="result_one_low_part", debug=debug_lftolx) _result_one = ( (sub_part) + red_vx_hi * _poly) + result_one_low_part _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one, corr_exp result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one, corr_exp = compute_log( vx) result.set_attributes(tag="result", debug=debug_lftolx) new_result_one.set_attributes(tag="new_result_one", debug=debug_lftolx) # building eval error map eval_error_map = { red_vx: Variable("red_vx", precision=self.precision, interval=red_vx.get_interval()), log_inv_hi: Variable("log_inv_hi", precision=self.precision, interval=table_high_interval), log_inv_lo: Variable("log_inv_lo", precision=self.precision, interval=table_low_interval), corr_exp: Variable("corr_exp_g", precision=self.precision, interval=self.precision.get_exponent_interval()), } # computing gappa error if is_gappa_installed(): poly_eval_error = self.get_eval_error(result, eval_error_map) print("poly_eval_error: ", poly_eval_error) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # exp=-1 case print("managing exp=-1 case") #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_lftolx) #result2 = (poly2 - log_inv_hi - log_inv_lo) log_subtract = -log_inv_hi - log2_hi log_subtract.set_attributes(tag="log_subtract", debug=debug_lftolx) result2 = (log_subtract) + ((poly * red_vx) - (log_inv_lo + log2_lo)) result2.set_attributes(tag="result2", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) print("managing close to 1.0 cases") one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log10(1 + sollya.x) / sollya.x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log10(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, sollya.absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme print("MDL scheme") pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Return(result_subnormal)), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result2), Return(result)) #ConditionBlock(cond_one, #Return(new_result_one), #ConditionBlock(exp_mone, #Return(result2), #Return(result) #) #) )))) scheme = pre_scheme return scheme def numeric_emulate(self, input_value): return log10(input_value)
class ML_Log2(ML_Function("ml_log2")): def __init__(self, arg_template=DefaultArgTemplate, precision=ML_Binary32, abs_accuracy=S2**-24, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="log2f.c", function_name="log2f"): # extracting precision argument from command line precision = ArgDefault.select_value( [arg_template.precision, precision]) io_precisions = [precision] * 2 # initializing base class ML_FunctionBasis.__init__(self, base_name="log2", function_name=function_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, libm_compliant=libm_compliant, processor=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag, arg_template=arg_template) self.precision = precision def generate_emulate(self, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ #mpfr_x = emulate_implementation.add_input_variable("x", ML_Mpfr_t) #mpfr_rnd = emulate_implementation.add_input_variable("rnd", ML_Int32) emulate_func_name = "mpfr_log2" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Result(0), 1: FO_Arg(0), 2: FO_Arg(1) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32], ML_Mpfr_t, emulate_func_op) #emulate_func_op.declare_prototype = emulate_func mpfr_call = Statement( ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32))) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_Table(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in xrange(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 #print inv_approx_table[i][0], inv_value inv_value = inv_approx_table[i][0] value_high = round( log2(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log2(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debuglx), self.precision.get_field_size() - 7, debug=debuglx), 0x7f, tag="table_index", debug=debuglld) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) #if not processor.is_supported_operation(arg_red_index): # if self.precision != ML_Binary32: # arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32, _red_vx = arg_red_index * _vx_mant - 1.0 _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) print "building mathematical polynomial" approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) print "generating polynomial evaluation scheme" _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_lftolx) print "sollya global_poly_object" print global_poly_object.get_sollya_object() print "sollya poly_object" print poly_object.get_sollya_object() corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_ddtolx) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo #pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (- _log_inv_lo))) pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = corr_exp exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = corr_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx) result.set_attributes(tag="result", debug=debug_lftolx) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # exp=-1 case print "managing exp=-1 case" #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_lftolx) #result2 = (poly2 - log_inv_hi - log_inv_lo) result2 = (-log_inv_hi - 1.0) + ((poly * red_vx) - log_inv_lo) result2.set_attributes(tag="result2", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) result_subnormal.set_attributes(tag="result_subnormal", debug=debug_lftolx) print "managing close to 1.0 cases" one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + x) / x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme print "MDL scheme" pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Statement(ClearException(), result_subnormal, Return(result_subnormal))), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result2), Return(result)) #ConditionBlock(cond_one, #Return(new_result_one), #ConditionBlock(exp_mone, #Return(result2), #Return(result) #) #) )))) scheme = Statement(result, pre_scheme) return scheme def numeric_emulate(self, input_value): return log2(input_value)
class ML_Log(ML_Function("ml_log")): def __init__(self, precision=ML_Binary64, abs_accuracy=S2**-24, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="log_fixed.c", function_name="log_fixed"): # initializing I/O precision io_precisions = [precision] * 2 # initializing base class ML_FunctionBasis.__init__(self, base_name="log", function_name=function_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, libm_compliant=libm_compliant, processor=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag) self.precision = precision def generate_emulate(self, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_log" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Result(0), 1: FO_Arg(0), 2: FO_Arg(1) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32], ML_Mpfr_t, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd))) return mpfr_call """ evaluate one argument reduction (Tang): given: an input variable of type Fixed(0,k,False), and with some input interval the number of bits to read from this variable for the argument reduction the precision of its inverse it returns: out_interval: the output interval of the variable length_table: the number of elements in the table sizeof_table: the size in byte of the table used """ def evaluate_argument_reduction(self, in_interval, in_prec, inv_size, inv_prec): one = Constant(1, precision=ML_Exact, tag="one") dx = Variable("dx", precision=ML_Custom_FixedPoint_Format(0, in_prec, False), interval=in_interval) # do the argument reduction x = Addition(dx, one, tag="x", precision=ML_Exact) x1 = Conversion(x, tag="x1", precision=ML_Custom_FixedPoint_Format( 0, inv_size, False), rounding_mode=ML_RoundTowardMinusInfty) s = Multiplication(dx, Constant(S2**inv_size, precision=ML_Exact), precision=ML_Exact, tag="interval_index_table") inv_x1 = Division(one, x1, tag="ix1", precision=ML_Exact) inv_x = Conversion(inv_x1, tag="ix", precision=ML_Custom_FixedPoint_Format( 1, inv_prec, False), rounding_mode=ML_RoundTowardPlusInfty) y = Multiplication(x, inv_x, tag="y", precision=ML_Exact) dy = Subtraction(y, one, tag="dy", precision=ML_Exact) # add the necessary goals and hints dx_gappa = Variable("dx_gappa", interval=dx.get_interval(), precision=dx.get_precision()) swap_map = {dx: dx_gappa} # goal: dz (result of the argument reduction) gappa_code = self.gappa_engine.get_interval_code_no_copy( dy.copy(swap_map), bound_list=[swap_map[dx]]) #self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table # hints. are the ones with isAppox=True really necessary ? self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox=True) self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox=True) self.gappa_engine.add_hint( gappa_code, Multiplication(x1, inv_x1, precision=ML_Exact).copy(swap_map), one, Comparison(swap_map[inv_x1], Constant(0, precision=ML_Exact), specifier=Comparison.NotEqual, precision=ML_Bool)) # execute and parse the result result = execute_gappa_script_extract(gappa_code.get( self.gappa_engine)) out_interval = result['goal'] length_table = 1 + floor( sup(in_interval) * S2**inv_size).getConstantAsInt() sizeof_table = length_table * (16 + ML_Custom_FixedPoint_Format( 1, inv_prec, False).get_c_bit_size() / 8) return { 'out_interval': out_interval, 'length_table': length_table, 'sizeof_table': sizeof_table, } # explore the parameters of the argument reduction # get the fastest code possible with some memory constraint : # for all possible parameters of the arg reg: # - get the final interval and the tables sizes proven by gappa # - eliminate the ones that desn't fits in the memory constraints # - get the smallest degree of the polynomial that achieve 2^-53 relative precision # (or 2**-(self.precision.get_field_size()+1) depending on self.precision) # - get the smallest degree that achieve 2^-~128 absolute precision # (TODO: get exact limit with worst cases. should be around 2^-114) # of all the parameters that achived thoses degrees, choose the one that have the smallest table size """ return the size of the tables used by the argument reduction, and the interval of the output variable (and some other infos about the argument reduction= """ def eval_argument_reduction(self, size1, prec1, size2, prec2): one = Constant(1, precision=ML_Exact, tag="one") dx = Variable("dx", precision=ML_Custom_FixedPoint_Format(0, 52, False), interval=Interval(0, 1 - S2**-52)) # do the argument reduction x = Addition(dx, one, tag="x", precision=ML_Exact) x1 = Conversion(x, tag="x1", precision=ML_Custom_FixedPoint_Format(0, size1, False), rounding_mode=ML_RoundTowardMinusInfty) s = Multiplication(Subtraction(x1, one, precision=ML_Exact), Constant(S2**size1, precision=ML_Exact), precision=ML_Exact, tag="indexTableX") inv_x1 = Division(one, x1, tag="ix1", precision=ML_Exact) inv_x = Conversion(inv_x1, tag="ix", precision=ML_Custom_FixedPoint_Format( 1, prec1, False), rounding_mode=ML_RoundTowardPlusInfty) y = Multiplication(x, inv_x, tag="y", precision=ML_Exact) dy = Subtraction(y, one, tag="dy", precision=ML_Exact) y1 = Conversion(y, tag="y", precision=ML_Custom_FixedPoint_Format(0, size2, False), rounding_mode=ML_RoundTowardMinusInfty) t = Multiplication(Subtraction(y1, one, precision=ML_Exact), Constant(S2**size2, precision=ML_Exact), precision=ML_Exact, tag="indexTableY") inv_y1 = Division(one, y1, tag="iy1", precision=ML_Exact) inv_y = Conversion(inv_y1, tag="iy", precision=ML_Custom_FixedPoint_Format( 1, prec2, False), rounding_mode=ML_RoundTowardPlusInfty) z = Multiplication(y, inv_y, tag="z", precision=ML_Exact) dz = Subtraction(z, one, tag="dz", precision=ML_Exact) # add the necessary goals and hints dx_gappa = Variable("dx_gappa", interval=dx.get_interval(), precision=dx.get_precision()) swap_map = {dx: dx_gappa} # goals (main goal: dz, the result of the argument reduction) gappa_code = self.gappa_engine.get_interval_code_no_copy( dz.copy(swap_map), bound_list=[dx_gappa]) self.gappa_engine.add_goal(gappa_code, dy.copy(swap_map)) self.gappa_engine.add_goal( gappa_code, s.copy(swap_map)) # range of index of table 1 self.gappa_engine.add_goal( gappa_code, t.copy(swap_map)) # range of index of table 2 # hints. are the ones with isAppox=True really necessary ? self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox=True) self.gappa_engine.add_hint(gappa_code, y.copy(swap_map), y1.copy(swap_map), isApprox=True) self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox=True) self.gappa_engine.add_hint(gappa_code, inv_y1.copy(swap_map), inv_y.copy(swap_map), isApprox=True) self.gappa_engine.add_hint( gappa_code, Multiplication(x1, inv_x1, precision=ML_Exact).copy(swap_map), one, Comparison(swap_map[inv_x1], Constant(0, precision=ML_Exact), specifier=Comparison.NotEqual, precision=ML_Bool)) self.gappa_engine.add_hint( gappa_code, Multiplication(y1, inv_y1, precision=ML_Exact).copy(swap_map), one, Comparison(swap_map[inv_y1], Constant(0, precision=ML_Exact), specifier=Comparison.NotEqual, precision=ML_Bool)) toto = Variable("toto", precision=ML_Binary64) self.gappa_engine.add_hypothesis(gappa_code, toto, Interval(0, S2**-52)) # execute and parse the result result = execute_gappa_script_extract(gappa_code.get( self.gappa_engine)) self.gappa_engine.clear_memoization_map() # avoid memory leak #print result['indexTableX'], result['indexTableY'] length_table1 = 1 + floor(sup( result['indexTableX'])).getConstantAsInt() length_table2 = 1 + floor(sup( result['indexTableY'])).getConstantAsInt() if False and (length_table2 != 1 + floor(sup(result['dy']) * S2**size2).getConstantAsInt()): print "(dy*2**size2:", 1 + floor(sup( result['dy'] * S2**size2)).getConstantAsInt(), ")" print "(indexTableY:", 1 + floor(sup( result['indexTableY'])).getConstantAsInt(), ")" print result['indexTableY'], result['dy'] sys.exit(1) return { # arguments 'size1': size1, 'prec1': prec1, 'size2': size2, 'prec2': prec2, # size of the tables 'length_table1': length_table1, 'length_table2': length_table2, 'sizeof_table1': length_table1 * (16 + ML_Custom_FixedPoint_Format( 1, prec1, False).get_c_bit_size() / 8), 'sizeof_table2': length_table2 * (16 + ML_Custom_FixedPoint_Format( 1, prec2, False).get_c_bit_size() / 8), # intervals 'in_interval': dx.get_interval(), 'mid_interval': result['dy'], 'out_interval': result['goal'], } def generate_argument_reduction(self, memory_limit): best_arg_reduc = None best_arg_reduc = self.eval_argument_reduction(6, 10, 12, 13) best_arg_reduc['sizeof_tables'] = best_arg_reduc[ 'sizeof_table1'] + best_arg_reduc['sizeof_table2'] best_arg_reduc['degree_poly1'] = 4 best_arg_reduc['degree_poly2'] = 8 return best_arg_reduc # iterate through all possible parameters, and return the best argument reduction # the order of importance of the caracteristics of a good argument reduction is: # 1- the argument reduction is valid # 2- the degree of the polynomials obtains are minimals # 3- the memory used is minimal # An arument reduction is valid iff: # - the memory used is less than memory_limit # - y-1 and z-1 fit into a uint64_t # - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction) # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched: # (note that thoses bound are implied by, but not equivalents to the constraints) # size1 <= log2(memory_limit/17) (memory_limit on the first table) # prec1 < 13 + size1 (y-1 fits into a uint64_t) # size2 <= log2((memory_limit - sizeof_table1)/17/midinterval) (memory_limit on both tables) # size2 >= 1 - log2(midinterval) (second arg red should be usefull) # prec2 < 12 - prec1 - log2((y-y1)/y1), for all possible y (z-1 fits into a uint64_t) # note: it is hard to deduce a tight bound on prec2 from the last inequality # a good approximation is size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc #self.eval_argument_reduction(12, 20, 22, 14) min_size1 = 1 max_size1 = floor(log(memory_limit / 17) / log(2)).getConstantAsInt() for size1 in range(max_size1, min_size1 - 1, -1): min_prec1 = size1 max_prec1 = 12 + size1 for prec1 in range(min_prec1, max_prec1 + 1): # we need sizeof_table1 and mid_interval for the bound on size2 and prec2 first_arg_reduc = self.eval_argument_reduction( size1, prec1, prec1, prec1) mid_interval = first_arg_reduc['mid_interval'] sizeof_table1 = first_arg_reduc['sizeof_table1'] if not (0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)): continue if not (first_arg_reduc['sizeof_table1'] < memory_limit): continue min_size2 = 1 - ceil( log(sup(mid_interval)) / log(2)).getConstantAsInt() max_size2 = floor( log((memory_limit - sizeof_table1) / (17 * sup(mid_interval))) / log(2)).getConstantAsInt() # during execution of the prec2 loop, it can reduces the interval of valid values for prec2 # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop # (because they are modified inside the body of the loop, for the next iteration of size2) min_prec2 = 0 max_prec2 = 12 + max_size2 - prec1 for size2 in range(max_size2, min_size2 - 1, -1): max_prec2 = min(max_prec2, 12 + size2 - prec1) for prec2 in range(max_prec2, min_prec2 - 1, -1): #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2) #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm arg_reduc = self.eval_argument_reduction( size1, prec1, size2, prec2) mid_interval = arg_reduc['mid_interval'] out_interval = arg_reduc['out_interval'] sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc[ 'sizeof_table2'] if not (0 <= inf(out_interval) and sup(out_interval) < S2**(64 - 52 - prec1 - prec2)): max_prec2 = prec2 - 1 continue if memory_limit < sizeof_tables: continue #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1)) # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120) # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0) sollya_out_interval = Interval( S2**(-52 - prec1 - prec2), sup(out_interval)) guess_degree_poly1 = guessdegree( log(1 + sollya.x) / sollya.x, sollya_out_interval, S2**-52) guess_degree_poly2 = guessdegree( log(1 + sollya.x), sollya_out_interval, S2**-120) # TODO: detect when guessdegree return multiple possible degree, and find the right one if False and inf(guess_degree_poly1) <> sup( guess_degree_poly1): print "improvable guess_degree_poly1:", guess_degree_poly1 if False and inf(guess_degree_poly2) <> sup( guess_degree_poly2): print "improvable guess_degree_poly2:", guess_degree_poly2 degree_poly1 = sup( guess_degree_poly1).getConstantAsInt() + 1 degree_poly2 = sup( guess_degree_poly2).getConstantAsInt() if ((best_arg_reduc is not None) and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)): min_prec2 = prec2 + 1 break if ((best_arg_reduc is None) or (best_arg_reduc['degree_poly1'] > degree_poly1) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)): arg_reduc['degree_poly1'] = degree_poly1 arg_reduc['degree_poly2'] = degree_poly2 arg_reduc['sizeof_tables'] = sizeof_tables best_arg_reduc = arg_reduc #print "\n --new best-- \n", arg_reduc, "\n" #print "\nBest arg reduc: \n", best_arg_reduc, "\n" return best_arg_reduc def generate_scheme(self): memory_limit = 2500 # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = input_var kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) ### Constants computations ### v_log2_hi = nearestint(log(2) * 2**-52) * 2**52 v_log2_lo = round(log(2) - v_log2_hi, 64 + 53, sollya.RN) log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi") log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo") print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)" arg_reduc = self.generate_argument_reduction(memory_limit) print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format( arg_reduc['size1'], arg_reduc['prec1'], arg_reduc['size2'], arg_reduc['prec2'], arg_reduc['degree_poly1'], arg_reduc['degree_poly2'], arg_reduc['sizeof_tables']) print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format( arg_reduc['length_table1'], arg_reduc['sizeof_table1']) inv_table_1 = ML_NewTable( dimensions=[arg_reduc['length_table1']], storage_precision=ML_Custom_FixedPoint_Format( 1, arg_reduc['prec1'], False), tag=self.uniquify_name("inv_table_1")) log_table_1 = ML_NewTable( dimensions=[arg_reduc['length_table1']], storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False), tag=self.uniquify_name("log_table_1")) for i in range(0, arg_reduc['length_table1'] - 1): x1 = 1 + i / S2 * arg_reduc['size1'] inv_x1 = ceil(S2**arg_reduc['prec1'] / x1) * S2**arg_reduc['prec1'] log_x1 = floor(log(x1) * S2**(128 - 11)) * S2**(11 - 128) inv_table_1[ i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False)) log_table_1[ i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False)) print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format( arg_reduc['length_table2'], arg_reduc['sizeof_table2']) inv_table_2 = ML_NewTable( dimensions=[arg_reduc['length_table2']], storage_precision=ML_Custom_FixedPoint_Format( 1, arg_reduc['prec2'], False), tag=self.uniquify_name("inv_table_2")) log_table_2 = ML_NewTable( dimensions=[arg_reduc['length_table2']], storage_precision=ML_Custom_FixedPoint_Format(11, 128 - 11, False), tag=self.uniquify_name("log_table_2")) for i in range(0, arg_reduc['length_table2'] - 1): y1 = 1 + i / S2**arg_reduc['size2'] inv_y1 = ceil(S2**arg_reduc['prec2'] / x1) * S2**arg_reduc['prec2'] log_y1 = floor(log(inv_y1) * S2**(128 - 11)) * S2**(11 - 128) inv_table_2[ i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False)) log_table_2[ i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False)) ### Evaluation Scheme ### print "\n\033[1mGenerate the evaluation scheme:\033[0m" input_var = self.implementation.add_input_variable( "input_var", self.precision) ve = ExponentExtraction(input_var, tag="x_exponent", debug=debugd) vx = MantissaExtraction(input_var, tag="x_mantissa", precision=ML_Custom_FixedPoint_Format( 0, 52, False), debug=debug_lftolx) #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx) print "filtering and handling special cases" test_is_special_cases = LogicalNot( Test(input_var, specifier=Test.IsIEEENormalPositive, likely=True, debug=debugd, tag="is_special_cases")) handling_special_cases = Statement( ConditionBlock( Test(input_var, specifier=Test.IsSignalingNaN, debug=True), ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))), ConditionBlock(Test(input_var, specifier=Test.IsNaN, debug=True), Return(input_var)) #, # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN) # all that remains is x is a subnormal positive #Statement( # ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))), # ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision())))) #) ) print "doing the argument reduction" v_dx = vx v_x1 = Conversion(v_dx, tag='x1', precision=ML_Custom_FixedPoint_Format( 0, arg_reduc['size1'], False), rounding_mode=ML_RoundTowardMinusInfty) v_index_x = TypeCast( v_x1, tag='index_x', precision=ML_Int32 ) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False)) v_inv_x = TableLoad(inv_table_1, v_index_x, tag='inv_x') v_x = Addition(v_dx, 1, tag='x', precision=ML_Custom_FixedPoint_Format(1, 52, False)) v_dy = Multiplication(v_x, v_inv_x, tag='dy', precision=ML_Custom_FixedPoint_Format( 0, 52 + arg_reduc['prec1'], False)) v_y1 = Conversion(v_dy, tag='y1', precision=ML_Custom_FixedPoint_Format( 0, arg_reduc['size2'], False), rounding_mode=ML_RoundTowardMinusInfty) v_index_y = TypeCast( v_y1, tag='index_y', precision=ML_Int32 ) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False)) v_inv_y = TableLoad(inv_table_2, v_index_y, tag='inv_y') v_y = Addition(v_dy, 1, tag='y', precision=ML_Custom_FixedPoint_Format( 1, 52 + arg_reduc['prec2'], False)) # note that we limit the number of bits used to represent dz to 64. # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2)) v_dz = Multiplication( v_y, v_inv_y, tag='z', precision=ML_Custom_FixedPoint_Format( 64 - 52 - arg_reduc['prec1'] - arg_reduc['prec2'], 52 + arg_reduc['prec1'] + arg_reduc['prec2'], False)) # reduce the number of bits used to represent dz. we can do that print "doing the first polynomial evaluation" global_poly1_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, arg_reduc['degree_poly1'] - 1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute) poly1_object = global_poly1_object.sub_poly(start_index=1) print global_poly1_object print poly1_object poly1 = PolynomialSchemeEvaluator.generate_horner_scheme( poly1_object, v_dz, unified_precision=v_dz.get_precision()) return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1)) #approx_interval = Interval(0, 27021597764222975*S2**-61) #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size()))) #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute) #poly_object = global_poly_object.sub_poly(start_index = 1) #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) #_poly.set_attributes(tag = "poly", debug = debug_lftolx) """
class ML_UT_GappaCode(ML_Function("ml_ut_gappa_code")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for current class, builtin from a default argument mapping overloaded with @p kw """ default_args = { "output_file": "ut_gappa_code.c", "function_name": "ut_gappa_code", "precision": ML_Binary32, "target": MPFRProcessor(), "fast_path_extract": True, "fuse_fma": True, "libm_compliant": True } default_args.update(kw) return DefaultArgTemplate(**default_args) def generate_scheme(self): # declaring function input variable vx = self.implementation.add_input_variable("x", ML_Binary32) # declaring specific interval for input variable <x> vx.set_interval(Interval(-1, 1)) # declaring free Variable y vy = Variable("y", precision=ML_Exact) # declaring expression with vx variable expr = vx * vx - vx * 2 # declaring second expression with vx variable expr2 = vx * vx - vx # optimizing expressions (defining every unknown precision as the # default one + some optimization as FMA merging if enabled) opt_expr = self.optimise_scheme(expr) opt_expr2 = self.optimise_scheme(expr2) # setting specific tag name for optimized expression (to be extracted # from gappa script ) opt_expr.set_tag("goal") opt_expr2.set_tag("new_goal") # defining default goal to gappa execution gappa_goal = opt_expr # declaring EXACT expression to be used as hint in Gappa's script annotation = self.opt_engine.exactify(vy * (1 / vy)) # the dict var_bound is used to limit the DAG part to be explored when # generating the gappa script, each pair (key, value), indicate a node to stop at <key> # and a node to replace it with during the generation: <node>, # <node> must be a Variable instance with defined interval # vx.get_handle().get_node() is used to retrieve the node instanciating the abstract node <vx> # after the call to self.optimise_scheme var_bound = { vx.get_handle().get_node(): Variable("x", precision=ML_Binary32, interval=vx.get_interval()) } # generating gappa code to determine interval for <opt_expr> gappa_code = self.gappa_engine.get_interval_code(opt_expr, var_bound) # add a manual hint to the gappa code # which state thtat vy * (1 / vy) -> 1 { vy <> 0 }; self.gappa_engine.add_hint( gappa_code, annotation, Constant(1, precision=ML_Exact), Comparison(vy, Constant(0, precision=ML_Integer), specifier=Comparison.NotEqual, precision=ML_Bool)) # adding the expression <opt_expr2> as an extra goal in the gappa script self.gappa_engine.add_goal(gappa_code, opt_expr2) # executing gappa on the script generated from <gappa_code> # extract the result and store them into <gappa_result> # which is a dict indexed by the goals' tag gappa_result = execute_gappa_script_extract( gappa_code.get(self.gappa_engine)) print("eval error: ", gappa_result["goal"], gappa_result["new_goal"]) # dummy scheme to make functionnal code generation scheme = Statement(Return(vx)) return scheme
class ML_Exponential(ML_Function("ml_exp")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) self.accuracy = args.accuracy @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Exponential, builtin from a default argument mapping overloaded with @p kw """ default_args_exp = { "output_file": "my_exp.c", "function_name": "my_exp", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_exp.update(kw) return DefaultArgTemplate(**default_args_exp) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=ML_Int32, debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if self.accuracy is ML_Faithful: error_goal = local_ulp elif self.accuracy is ML_CorrectlyRounded: error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset diff_k.set_attributes(debug=debug_multi, tag="diff_k", precision=ML_Int32) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition(ik, underflow_exp_offset, precision=ML_Int32) late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_exp" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): """ Numeric emaluation of exponential """ return sollya.exp(input_value)
class ML_UT_ImplicitIntervalEval(ML_Function("ml_ut_implicit_interval_eval")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Exponential, builtin from a default argument mapping overloaded with @p kw """ default_args = { "output_file": "ut_implicit_interval_eval.c", "function_name": "ut_implicit_interval_eval", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": MPFRProcessor() } default_args.update(kw) return DefaultArgTemplate(**default_args) def generate_scheme(self): # map of expected interval values expected_interval = {} vx_interval = Interval(-1, 1) vx = self.implementation.add_input_variable("x", self.precision, interval=vx_interval) expected_interval[vx] = vx_interval cst = Constant(7, tag="cst") cst_interval = Interval(7) expected_interval[cst] = cst_interval shl = BitLogicLeftShift(NearestInteger(vx), 2, interval=2 * vx_interval, tag="shl") shl_interval = 2 * vx_interval expected_interval[shl] = shl_interval r = vx + cst * vx + shl - cst r.set_attributes(tag="r") r_interval = vx_interval + cst_interval * vx_interval + shl_interval - cst_interval expected_interval[r] = r_interval # NOTES: implicit interval eval is no longer enforced: explicit call # to evaluate_range is required evaluate_range(r, update_interval=True) for var in [vx, cst, r, shl]: if var.get_interval() != expected_interval[var]: Log.report( Log.Error, "unexpected interval for {}: got {}, expected {}".format( var.get_str(display_precision=True), var.get_interval(), expected_interval[var])) else: Log.report( Log.Info, "node {}: {} vs {}".format(var.get_tag(), var.get_interval(), expected_interval[var])) return Statement() def numeric_emulate(self, input_value): raise NotImplementedError
class ML_Cosine(ML_Function("ml_cos")): """ Implementation of cosinus function """ def __init__(self, precision=ML_Binary32, accuracy=ML_Faithful, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="cosf.c", function_name="cosf"): # initializing I/O precision io_precisions = [precision] * 2 # initializing base class ML_FunctionBasis.__init__(self, base_name="cos", function_name=function_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, libm_compliant=libm_compliant, processor=target, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag) self.precision = precision def generate_emulate(self, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_cos" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Result(0), 1: FO_Arg(0), 2: FO_Arg(1) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Int32], ML_Mpfr_t, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result, emulate_func(mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = Abs(self.implementation.add_input_variable("x", self.precision), tag="vx") Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) debug_precision = { ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx }[self.precision] test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 3 # argument reduction frac_pi_index = 3 frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN) inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN) inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k = E(x * frac_pi) vx_pi = Multiplication(vx, frac_pi, precision=self.precision) k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True) fk = Conversion(k, precision=self.precision, tag="fk") inv_frac_pi_cst = Constant(inv_frac_pi, tag="inv_frac_pi", precision=self.precision) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag="inv_frac_pi_lo", precision=self.precision) red_vx_hi = (vx - inv_frac_pi_cst * fk) red_vx_hi.set_attributes(tag="red_vx_hi", debug=debug_precision, precision=self.precision) red_vx_lo_sub = inv_frac_pi_lo_cst * fk red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub", debug=debug_precision, unbreakable=True, precision=self.precision) vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d") pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk) pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi", precision=ML_Binary64, debug=debug_lftolx) pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d.set_attributes(tag="pre_red_vx_d", debug=debug_lftolx, precision=ML_Binary64) modk = Modulo(k, 2**(frac_pi_index + 1), precision=ML_Int32, tag="switch_value", debug=True) sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)), 2**(frac_pi_index - 1)) red_vx = Select(sel_c, -pre_red_vx, pre_red_vx) red_vx.set_attributes(tag="red_vx", debug=debug_precision, precision=self.precision) red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d) red_vx_d.set_attributes(tag="red_vx_d", debug=debug_lftolx, precision=ML_Binary64) approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)), pi / S2**(frac_pi_index + 1)) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) error_goal_approx = S2**-self.precision.get_precision() Log.report(Log.Info, "building mathematical polynomial") poly_degree_vector = [None] * 2**(frac_pi_index + 1) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme index_relative = [] poly_object_vector = [None] * 2**(frac_pi_index + 1) for i in xrange(2**(frac_pi_index + 1)): sub_func = cos(sollya.x + i * pi / S2**frac_pi_index) degree = int( sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1 degree_list = range(degree + 1) a_interval = approx_interval if i == 0: # ad-hoc, TODO: to be cleaned degree = 6 degree_list = range(0, degree + 1, 2) elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1): # for pi/2 and 3pi/2, an approx to sin=cos(pi/2+x) # must be generated degree_list = range(1, degree + 1, 2) if i == 3 or i == 5 or i == 7 or i == 9: precision_list = [sollya.binary64 ] + [sollya.binary32] * (degree) else: precision_list = [sollya.binary32] * (degree + 1) poly_degree_vector[i] = degree constraint = sollya.absolute delta = (2**(frac_pi_index - 3)) centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1) if centered_i < delta and centered_i > -delta and centered_i != 0: constraint = sollya.relative index_relative.append(i) Log.report( Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index + 1))) poly_object_vector[ i], _ = Polynomial.build_from_approximation_with_error( sub_func, degree_list, precision_list, a_interval, constraint, error_function=error_function) # unified power map for red_sx^n upm = {} rel_error_list = [] poly_scheme_vector = [None] * (2**(frac_pi_index + 1)) for i in xrange(2**(frac_pi_index + 1)): poly_object = poly_object_vector[i] poly_precision = self.precision if i == 3 or i == 5 or i == 7 or i == 9: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * red_vx) poly_hi.set_precision(ML_Binary64) red_vx_d_2 = red_vx_d * red_vx_d poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder( poly_object.sub_poly(start_index=2, offset=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_attributes(unbreakable=True) elif i == 4: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=ML_Binary64) poly_scheme = c1 * red_vx_d + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_precision(ML_Binary64) else: poly_scheme = polynomial_scheme_builder( poly_object, red_vx, unified_precision=poly_precision, power_map_=upm) #if i == 3: # c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision) # c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) # poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(tag="poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug=debug_precision) poly_scheme_vector[i] = poly_scheme #try: if is_gappa_installed() and i == 3: opt_scheme = self.opt_engine.optimization_process( poly_scheme, self.precision, copy=True, fuse_fma=self.fuse_fma) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision=self.precision, interval=approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, } eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_scheme, cg_eval_error_copy_map, gappa_filename="red_arg_%d.g" % i) poly_range = cos(approx_interval + i * pi / S2**frac_pi_index) rel_error_list.append(eval_error / poly_range) #for rel_error in rel_error_list: # print sup(abs(rel_error)) #return # case 17 #poly17 = poly_object_vector[17] #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision) #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision) #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) half = 2**frac_pi_index sub_half = 2**(frac_pi_index - 1) # determine if the reduced input is within the second and third quarter (not first nor fourth) # to negate the cosine output factor_cond = BitLogicAnd(BitLogicXor( BitLogicRightShift(modk, frac_pi_index), BitLogicRightShift(modk, frac_pi_index - 1)), 1, tag="factor_cond", debug=True) CM1 = Constant(-1, precision=self.precision) C1 = Constant(1, precision=self.precision) factor = Select(factor_cond, CM1, C1, tag="factor", debug=debug_precision) factor2 = Select(Equal(modk, Constant(sub_half)), CM1, C1, tag="factor2", debug=debug_precision) switch_map = {} if 0: for i in xrange(2**(frac_pi_index + 1)): switch_map[i] = Return(poly_scheme_vector[i]) else: for i in xrange(2**(frac_pi_index - 1)): switch_case = (i, half - i) #switch_map[i] = Return(poly_scheme_vector[i]) #switch_map[half-i] = Return(-poly_scheme_vector[i]) if i != 0: switch_case = switch_case + (half + i, 2 * half - i) #switch_map[half+i] = Return(-poly_scheme_vector[i]) #switch_map[2*half-i] = Return(poly_scheme_vector[i]) if poly_scheme_vector[i].get_precision() != self.precision: poly_result = Conversion(poly_scheme_vector[i], precision=self.precision) else: poly_result = poly_scheme_vector[i] switch_map[switch_case] = Return(factor * poly_result) #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half]) #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half]) switch_map[(sub_half, half + sub_half)] = Return( factor2 * poly_scheme_vector[sub_half]) result = SwitchBlock(modk, switch_map) ####################################################################### # LARGE ARGUMENT MANAGEMENT # # (lar: Large Argument Reduction) # ####################################################################### # payne and hanek argument reduction for large arguments #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm" red_func_name = "payne_hanek_fp32_asm" payne_hanek_func_op = FunctionOperator( red_func_name, arg_map={0: FO_Arg(0)}, require_header=["support_lib/ml_red_arg.h"]) payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32], ML_Binary64, payne_hanek_func_op) payne_hanek_func_op.declare_prototype = payne_hanek_func #large_arg_red = FunctionCall(payne_hanek_func, vx) large_arg_red = payne_hanek_func(vx) red_bound = S2**20 cond = Abs(vx) >= red_bound cond.set_attributes(tag="cond", likely=False) lar_neark = NearestInteger(large_arg_red, precision=ML_Int64) lar_modk = Modulo(lar_neark, Constant(16, precision=ML_Int64), tag="lar_modk", debug=True) # Modulo is supposed to be already performed (by payne_hanek_cosfp32) #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64) pre_lar_red_vx = large_arg_red - Conversion(lar_neark, precision=ML_Binary64) pre_lar_red_vx.set_attributes(precision=ML_Binary64, debug=debug_lftolx, tag="pre_lar_red_vx") lar_red_vx = Conversion(pre_lar_red_vx, precision=self.precision, debug=debug_precision, tag="lar_red_vx") lar_red_vx_lo = Conversion( pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64), precision=self.precision) lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo", precision=self.precision) lar_k = 3 # large arg reduction Universal Power Map lar_upm = {} lar_switch_map = {} approx_interval = Interval(-0.5, 0.5) for i in xrange(2**(lar_k + 1)): frac_pi = pi / S2**lar_k func = cos(frac_pi * i + frac_pi * x) degree = 6 error_mode = sollya.absolute if i % 2**(lar_k) == 2**(lar_k - 1): # close to sin(x) cases func = -sin(frac_pi * x) if i == 2**(lar_k - 1) else sin(frac_pi * x) degree_list = range(0, degree + 1, 2) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func / x, degree_list, precision_list, approx_interval, error_mode) poly_object = poly_object.sub_poly(offset=-1) else: degree_list = range(degree + 1) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func, degree_list, precision_list, approx_interval, error_mode) if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * lar_red_vx) poly_hi.set_precision(ML_Binary64) pre_poly_scheme = poly_hi + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) pre_poly_scheme.set_attributes(precision=ML_Binary64) poly_scheme = Conversion(pre_poly_scheme, precision=self.precision) elif i == 4 or i == 12: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) c3 = Constant(coeff(poly_object.get_sollya_object(), 3), precision=self.precision) c5 = Constant(coeff(poly_object.get_sollya_object(), 5), precision=self.precision) poly_hi = polynomial_scheme_builder( poly_object.sub_poly(start_index=3), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) poly_hi.set_attributes(tag="poly_lar_%d_hi" % i, precision=ML_Binary64) poly_scheme = Conversion(FusedMultiplyAdd( c1, lar_red_vx, poly_hi, precision=ML_Binary64) + c1 * lar_red_vx_lo, precision=self.precision) else: poly_scheme = polynomial_scheme_builder( poly_object, lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm) poly_scheme.set_attributes(tag="lar_poly_%d" % i, debug=debug_precision) lar_switch_map[(i, )] = Return(poly_scheme) lar_result = SwitchBlock(lar_modk, lar_switch_map) # main scheme #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") # scheme = Statement(ConditionBlock(cond, lar_result, result)) Log.report(Log.Info, "Construction of the initial MDL scheme") scheme = Statement(pre_red_vx_d, red_vx_lo_sub, ConditionBlock(cond, lar_result, result)) return scheme
class ML_FastSinCos(ML_Function("ml_fast_cos")): """ Implementation of cosinus function """ def __init__(self, precision=ML_Binary32, accuracy=ML_Faithful, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, processor=GenericProcessor(), output_file="cosf.c", function_name="cosf", input_interval=Interval(0, 1), result_precision=ML_Binary32, table_size_log=8, cos_output=True): # initializing I/O precision io_precisions = [result_precision, precision] # initializing base class ML_FunctionBasis.__init__(self, base_name="cos", function_name=function_name, output_file=output_file, io_precisions=io_precisions, abs_accuracy=None, libm_compliant=libm_compliant, processor=processor, fuse_fma=fuse_fma, fast_path_extract=fast_path_extract, debug_flag=debug_flag) self.precision = precision self.cos_output = cos_output self.accuracy = accuracy self.input_interval = input_interval self.table_size_log = table_size_log def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_FastSinCos functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_cos" if self.cos_output else "mpfr_sin" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "target: %s " % self.processor.target_name) # display parameter information Log.report(Log.Info, "accuracy : %s " % self.accuracy) Log.report(Log.Info, "input interval: %s " % self.input_interval) accuracy_goal = self.accuracy.get_goal() Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal) table_size_log = self.table_size_log integer_size = 31 integer_precision = ML_Int32 max_bound = sup(abs(self.input_interval)) max_bound_log = int(ceil(log2(max_bound))) Log.report(Log.Info, "max_bound_log=%s " % max_bound_log) scaling_power = integer_size - max_bound_log Log.report(Log.Info, "scaling power: %s " % scaling_power) storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True) Log.report(Log.Info, "tabulating cosine and sine") # cosine and sine fused table fused_table = ML_Table( dimensions=[2**table_size_log, 2], storage_precision=storage_precision, tag="fast_lib_shared_table") # self.uniquify_name("cossin_table")) # filling table for i in xrange(2**table_size_log): local_x = i / S2**table_size_log * S2**max_bound_log cos_local = cos( local_x ) # nearestint(cos(local_x) * S2**storage_precision.get_frac_size()) sin_local = sin( local_x ) # nearestint(sin(local_x) * S2**storage_precision.get_frac_size()) fused_table[i][0] = cos_local fused_table[i][1] = sin_local # argument reduction evaluation scheme # scaling_factor = Constant(S2**scaling_power, precision = self.precision) red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power, scaling_power, signed=True) Log.report( Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" % red_vx_precision.get_c_bit_size()) # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision) red_vx = Conversion(vx, precision=red_vx_precision, tag="red_vx", debug=debug_fixed32) computation_precision = red_vx_precision # self.precision output_precision = self.io_precisions[0] Log.report(Log.Info, "computation_precision is %s" % computation_precision) Log.report(Log.Info, "storage_precision is %s" % storage_precision) Log.report(Log.Info, "output_precision is %s" % output_precision) hi_mask_value = 2**32 - 2**(32 - table_size_log - 1) hi_mask = Constant(hi_mask_value, precision=ML_Int32) Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value) red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32), hi_mask, precision=ML_Int32, tag="red_vx_hi_int", debug=debugd) red_vx_hi = TypeCast(red_vx_hi_int, precision=red_vx_precision, tag="red_vx_hi", debug=debug_fixed32) red_vx_lo = red_vx - red_vx_hi red_vx_lo.set_attributes(precision=red_vx_precision, tag="red_vx_lo", debug=debug_fixed32) table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32), scaling_power - (table_size_log - max_bound_log), precision=ML_Int32, tag="table_index", debug=debugd) tabulated_cos = TableLoad(fused_table, table_index, 0, tag="tab_cos", precision=storage_precision, debug=debug_fixed32) tabulated_sin = TableLoad(fused_table, table_index, 1, tag="tab_sin", precision=storage_precision, debug=debug_fixed32) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building polynomial approximation for cosine") # cosine polynomial approximation poly_interval = Interval(0, S2**(max_bound_log - table_size_log)) Log.report(Log.Info, "poly_interval=%s " % poly_interval) cos_poly_degree = 2 # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal))) Log.report(Log.Verbose, "cosine polynomial approximation") cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error( cos(x), [0, 2], [0] + [computation_precision.get_bit_size()], poly_interval, sollya.absolute, error_function=error_function) #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision) Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error) cos_coeff_list = cos_poly_object.get_ordered_coeff_list() coeff_C0 = cos_coeff_list[0][1] coeff_C2 = Constant(cos_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) Log.report(Log.Info, "building polynomial approximation for sine") # sine polynomial approximation sin_poly_degree = 2 # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal))) Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree) Log.report(Log.Verbose, "sine polynomial approximation") sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error( sin(sollya.x) / sollya.x, [0, 2], [0] + [computation_precision.get_bit_size()] * (sin_poly_degree + 1), poly_interval, sollya.absolute, error_function=error_function) sin_coeff_list = sin_poly_object.get_ordered_coeff_list() coeff_S0 = sin_coeff_list[0][1] coeff_S2 = Constant(sin_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) # scheme selection between sine and cosine if self.cos_output: scheme = self.generate_cos_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) else: scheme = self.generate_sin_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) result = Conversion(scheme, precision=self.io_precisions[0]) Log.report( Log.Verbose, "result operation tree :\n %s " % result.get_str( display_precision=True, depth=None, memoization_map={})) scheme = Statement(Return(result)) return scheme ## generate scheme for cosine approximation of cos(X = x + u) # @param computation_precision ML_Format used as default precision for scheme evaluation # @param tabulated_cos tabulated value of cosine(high part of vx) # @param tabulated_sin tabulated value of sine(high part of vx) # @param sin_C2 polynomial coefficient of sine approximation for u^3 # @param cos_C2 polynomial coefficient of cosine approximation for u^2 # @param red_vx_lo low part of the reduced input variable (i.e. u) def generate_cos_scheme(self, computation_precision, tabulated_cos, tabulated_sin, sin_C2, cos_C2, red_vx_lo): cos_C2 = Multiplication(tabulated_cos, cos_C2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="cos_C2") u2 = Multiplication( red_vx_lo, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="u2") sin_u = Multiplication( tabulated_sin, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(1, 30, signed = True) tag="sin_u") cos_C2_u2 = Multiplication( cos_C2, u2, precision= computation_precision, # ML_Custom_FixedPoint_Format(1, 30,signed = True) tag="cos_C2_u2") S2_u2 = Multiplication(sin_C2, u2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="S2_u2") S2_u3_sin = Multiplication( S2_u2, sin_u, precision= computation_precision, # ML_Custom_FixedPoint_Format(5,26, signed = True) tag="S2_u3_sin") cos_C2_u2_P_cos = Addition( tabulated_cos, cos_C2_u2, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="cos_C2_u2_P_cos") cos_C2_u2_P_cos_M_sin_u = Subtraction( cos_C2_u2_P_cos, sin_u, precision= computation_precision # ML_Custom_FixedPoint_Format(5, 26, signed = True) ) scheme = Subtraction( cos_C2_u2_P_cos_M_sin_u, S2_u3_sin, precision= computation_precision # ML_Custom_FixedPoint_Format(5, 26, signed = True) ) return scheme ## generate scheme for sine approximation of sin(X = x + u) # @param computation_precision ML_Format used as default precision for scheme evaluation # @param tabulated_cos tabulated value of cosine(high part of vx) # @param tabulated_sin tabulated value of sine(high part of vx) # @param sin_C2 polynomial coefficient of sine approximation for u^3 # @param cos_C2 polynomial coefficient of cosine approximation for u^2 # @param red_vx_lo low part of the reduced input variable (i.e. u) def generate_sin_scheme(self, computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo): sin_C2 = Multiplication(tabulated_sin, coeff_C2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="sin_C2") u2 = Multiplication( red_vx_lo, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="u2") cos_u = Multiplication( tabulated_cos, red_vx_lo, precision= computation_precision, # ML_Custom_FixedPoint_Format(1, 30, signed = True) tag="cos_u") S2_u2 = Multiplication(coeff_S2, u2, precision=ML_Custom_FixedPoint_Format( -1, 32, signed=True), tag="S2_u2") sin_C2_u2 = Multiplication(sin_C2, u2, precision=computation_precision, tag="sin_C2_u2") S2_u3_cos = Multiplication( S2_u2, cos_u, precision= computation_precision, # ML_Custom_FixedPoint_Format(5,26, signed = True) tag="S2_u3_cos") sin_P_cos_u = Addition( tabulated_sin, cos_u, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="sin_P_cos_u") sin_P_cos_u_P_C2_u2_sin = Addition( sin_P_cos_u, sin_C2_u2, precision= computation_precision, # ML_Custom_FixedPoint_Format(5, 26, signed = True) tag="sin_P_cos_u_P_C2_u2_sin") scheme = Addition( sin_P_cos_u_P_C2_u2_sin, S2_u3_cos, precision= computation_precision # ML_Custom_FixedPoint_Format(5, 26, signed = True) ) return scheme
class ML_Exp2(ML_Function("ml_exp2")): def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) self.accuracy = args.accuracy @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Exponential, builtin from a default argument mapping overloaded with @p kw """ default_args_exp2 = { "output_file": "my_exp2.c", "function_name": "exp2f", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_exp2.update(kw) return DefaultArgTemplate(**default_args_exp2) def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) r_interval = Interval(-0.5, 0.5) local_ulp = sup(ulp(2**r_interval, self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1 * local_ulp print("error goal: ", error_goal) sollya_precision = { ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] #Argument Reduction vx_int = NearestInteger(vx, precision=int_precision, tag='vx_int', debug=debug_multi) vx_intf = Conversion(vx_int, precision=self.precision) vx_r = vx - vx_intf vx_r.set_attributes(tag="vx_r", debug=debug_multi) degree = sup(guessdegree(2**(sollya.x), r_interval, error_goal)) + 2 precision_list = [1] + [self.precision] * degree exp_X = ExponentInsertion(vx_int, tag="exp_X", debug=debug_multi, precision=self.precision) #Polynomial Approx polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_object, poly_error = Polynomial.build_from_approximation_with_error( 2**(sollya.x) - 1, degree, precision_list, r_interval, sollya.absolute) Log.report(Log.Info, "Poly : %s" % poly_object) print("poly_error : ", poly_error) poly = polynomial_scheme_builder(poly_object.sub_poly(start_index=1), vx_r, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) #Handling special cases oflow_bound = Constant(self.precision.get_emax() + 1, precision=self.precision) subnormal_bound = self.precision.get_emin_subnormal() uflow_bound = self.precision.get_emin_normal() print("oflow : ", oflow_bound) #print "uflow : ", uflow_bound #print "sub : ", subnormal_bound test_overflow = Comparison(vx, oflow_bound, specifier=Comparison.GreaterOrEqual) test_overflow.set_attributes(tag="oflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_underflow = Comparison(vx, uflow_bound, specifier=Comparison.Less) test_underflow.set_attributes(tag="uflow_test", debug=debug_multi, likely=False, precision=ML_Bool) test_subnormal = Comparison(vx, subnormal_bound, specifier=Comparison.Greater) test_subnormal.set_attributes(tag="sub_test", debug=debug_multi, likely=False, precision=ML_Bool) subnormal_offset = -(uflow_bound - vx_int) subnormal_offset.set_attributes(tag="offset", debug=debug_multi) exp_offset = ExponentInsertion(subnormal_offset, precision=self.precision, debug=debug_multi, tag="exp_offset") exp_min = ExponentInsertion(uflow_bound, precision=self.precision, debug=debug_multi, tag="exp_min") subnormal_result = exp_offset * exp_min * poly + exp_offset * exp_min test_std = LogicalOr(test_overflow, test_underflow, precision=ML_Bool, tag="std_test", likely=False) #Reconstruction result = exp_X * poly + exp_X result.set_attributes(tag="result", debug=debug_multi) C0 = Constant(0, precision=self.precision) return_inf = Return(FP_PlusInfty(self.precision)) return_C0 = Return(C0) return_sub = Return(subnormal_result) return_std = Return(result) non_std_statement = Statement( ConditionBlock( test_overflow, return_inf, ConditionBlock(test_subnormal, return_sub, return_C0))) scheme = Statement( ConditionBlock(test_std, non_std_statement, return_std)) return scheme def generate_emulate(self, result_ternary, result, mpfr_x, mpfr_rnd): """ generate the emulation code for ML_Log2 functions mpfr_x is a mpfr_t variable which should have the right precision mpfr_rnd is the rounding mode """ emulate_func_name = "mpfr_exp" emulate_func_op = FunctionOperator(emulate_func_name, arg_map={ 0: FO_Arg(0), 1: FO_Arg(1), 2: FO_Arg(2) }, require_header=["mpfr.h"]) emulate_func = FunctionObject(emulate_func_name, [ML_Mpfr_t, ML_Mpfr_t, ML_Int32], ML_Int32, emulate_func_op) mpfr_call = Statement( ReferenceAssign(result_ternary, emulate_func(result, mpfr_x, mpfr_rnd))) return mpfr_call def numeric_emulate(self, input_value): return sollya.SollyaObject(2)**(input_value) standard_test_cases = [[ sollya.parse(x) ] for x in ["0x1.ffead1bac7ad2p+9", "-0x1.ee9cb4p+1", "-0x1.db0928p+3"]]
class ML_Log1p(ML_Function("ml_log1p")): def __init__(self, args): ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log1p, builtin from a default argument mapping overloaded with @p kw """ default_args_log1p = { "output_file": "my_log1p.c", "function_name": "my_log1pf", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_log1p.update(kw) return DefaultArgTemplate(**default_args_log1p) def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.get_input_precision().sollya_object # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision = self.precision) log2_lo = Constant(log2_lo_value, precision = self.precision) vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd) int_precision = self.precision.get_integer_format() # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1 value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd) # case close to 0: ctz ctz_exp_limit = -7 ctz_cond = vx_exp < ctz_exp_limit ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit) ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1 ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute) Log.report(Log.Info, "generating polynomial evaluation scheme") ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision) ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx) ctz_result = vx * ctz_poly neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input") vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf") vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan") vx_inf = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf") vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal") log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code) newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator) # case away from 0.0 pre_vxp1 = vx + 1.0 pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx) pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd) cm500 = Constant(-500, precision = ML_Int32) c0 = Constant(0, precision = ML_Int32) cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2) scaling_factor_exp = Select(cond_scaling, cm500, c0) scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor") vxp1 = pre_vxp1 * scaling_factor vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx) vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd) vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True) vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx) table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx) arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx) red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index) #red_vxp1 = arg_red_index * vxp1 - 1.0 red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx) log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx) inv_err = S2**-6 # TODO: link to target DivisionSeed precision Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1 global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index = 1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision) _poly.set_attributes(tag = "poly", debug = debug_lftolx) Log.report(Log.Info, global_poly_object.get_sollya_object()) vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd) corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp #poly = (red_vxp1) * (1 + _poly) #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True) pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo)) pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx) exact_log2_hi_exp = - corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True) #std_result = exact_log2_hi_exp + pre_result exact_log2_lo_exp = - corr_exp * log2_lo exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True) init = exact_log2_lo_exp - log_inv_lo init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True) fma0 = (red_vxp1 * _poly + init) # - log_inv_lo) fma0.set_attributes(tag = "fma0", debug = debug_lftolx) step0 = fma0 step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True) step1 = step0 + red_vxp1 step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True) step2 = -log_inv_hi + step1 step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True) std_result = exact_log2_hi_exp + step2 std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock(neg_input, Statement( ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision)) ), ConditionBlock(vx_nan_or_inf, ConditionBlock(vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement( ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid) ), Return(FP_QNaN(self.precision)) ) ), ConditionBlock(vx_subnormal, Return(vx), ConditionBlock(ctz_cond, Statement( Return(ctz_result), ), Statement( Return(std_result) ) ) ) ) ) scheme = pre_scheme return scheme def numeric_emulate(self, input_value): return log1p(input_value)
class ML_HyperbolicTangent(ML_Function("ml_tanh")): """ Implementation of hyperbolic tangent function """ def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_HyperbolicTangent, builtin from a default argument mapping overloaded with @p kw """ default_args_tanh = { "output_file": "my_tanh.c", "function_name": "my_tanh", "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_tanh.update(kw) return DefaultArgTemplate(**default_args_tanh) def generate_approx_poly_near_zero(self, function, high_bound, error_bound, variable): """ Generate polynomial approximation scheme """ error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai) # Some issues encountered when 0 is one of the interval bound # so we use a symetric interval around it approx_interval = Interval(-high_bound, high_bound) local_function = function / sollya.x degree = sollya.sup( sollya.guessdegree(local_function, approx_interval, error_bound)) degree_list = range(0, int(degree) + 1, 1) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function / sollya.x, degree_list, [1] + [self.precision] * (len(degree_list) - 1), approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "approximation poly: {}\n with error {}".format( poly_object, approx_error)) poly_scheme = Multiplication( variable, PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, variable, self.precision)) return poly_scheme, approx_error def generate_scheme(self): """ Generating implementation script for hyperic tangent meta-function """ # registering the single input variable to the function vx = self.implementation.add_input_variable("x", self.precision) #Log.set_dump_stdout(True) # tanh(x) = sinh(x) / cosh(x) # = (e^x - e^-x) / (e^x + e^-x) # = (e^(2x) - 1) / (e^(2x) + 1) # when x -> +inf, tanh(x) -> 1 # when x -> -inf, tanh(x) -> -1 # ~0 e^x ~ 1 + x - x^2 / 2 + x^3 / 6 + ... # e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ... # when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x # We can divide the input interval into 3 parts # positive, around 0, and finally negative # Possible argument reduction # x = m.2^E = k * log(2) + r # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1) # = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k) # # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1) # = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1) # = 1 - 2 / (e^(2x) + 1) # tanh is odd so we reduce the computation to the absolute value of # vx abs_vx = Abs(vx, precision=self.precision) # if p is the expected output precision # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps # where eps < 1/2 * 2^-p p = self.precision.get_mantissa_size() high_bound = (p + 2) * sollya.log(2) / 2 near_zero_bound = 0.125 interval_num = 1024 interval_size = (high_bound - near_zero_bound) / (1024) new_interval_size = sollya.S2**int(sollya.log2(interval_size)) interval_num *= 2 high_bound = new_interval_size * interval_num + near_zero_bound # Near 0 approximation near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero( sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx) # approximation parameters poly_degree = 5 approx_interval = Interval(near_zero_bound, high_bound) sollya.settings.points = 117 approx_scheme, approx_error = piecewise_approximation( sollya.tanh, abs_vx, self.precision, bound_low=near_zero_bound, bound_high=high_bound, num_intervals=interval_num, max_degree=5, error_threshold=sollya.S2**-p) Log.report(Log.Warning, "approx_error={}".format(approx_error)) complete_scheme = Select( abs_vx < near_zero_bound, near_zero_scheme, Select(abs_vx < high_bound, approx_scheme, Constant(1.0, precision=self.precision))) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") scheme = Return(Select(vx < 0, Negation(complete_scheme), complete_scheme), precision=self.precision) return scheme def numeric_emulate(self, input_value): return tanh(input_value) standard_test_cases = [[sollya.parse(x)] for x in [ "-0x1.572306p+0", "0x1.af0bf2p+1", "-0x1.af0bf2p+1", "-0x1.51b618p-13", "0x1.ffb99ep-1" ]]
class ML_Log10(ML_Function("log10")): def __init__(self, args): # initializing base class ML_FunctionBasis.__init__(self, args) self.basis = args.basis @staticmethod def get_default_args(**kw): """ Return a structure containing the arguments for ML_Log10, builtin from a default argument mapping overloaded with @p kw """ default_args_log10 = { "output_file": "ml_log10f.c", "function_name": "ml_log10f", "basis": 10, "precision": ML_Binary32, "accuracy": ML_Faithful, "target": GenericProcessor() } default_args_log10.update(kw) return DefaultArgTemplate(**default_args_log10) def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() log_f = sollya.log(sollya.x) # /sollya.log(self.basis) # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) log2_hi_value = round( log_f(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), RN) log2_lo_value = round( log_f(2) - log2_hi_value, self.precision.sollya_object, RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = self.precision.get_integer_format() #--------------------- # Approximation scheme #--------------------- # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t)) # = log10(m.2^-t) + (e+t) log10(2) # t = (m > sqrt(2)) ? 1 : 0 is used to avoid catastrophic cancellation # when e = -1 and m ~ 2 # # # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r) # = log10(m.r) - log10(r.2^t) # where r = rcp(m) an approximation of 1/m such that r.m ~ 1 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = inv_approx_table.index_size table_index_range = range(1, 2**table_index_size) log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 log_table_tho[0][0] = 0.0 log_table_tho[0][1] = 0.0 hi_size = self.precision.get_field_size() - ( self.precision.get_exponent_size() + 1) for i in table_index_range: #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 inv_value = inv_approx_table[i] value_high = round(log_f(inv_value), hi_size, sollya.RN) value_low = round( log_f(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low inv_value_tho = S2 * inv_approx_table[i] value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN) value_low_tho = round( log_f(inv_value_tho) - value_high_tho, sollya_precision, sollya.RN) log_table_tho[i][0] = value_high_tho log_table_tho[i][1] = value_low_tho # determining log_table range high_index_function = lambda table, i: table[i][0] low_index_function = lambda table, i: table[i][1] table_high_interval = log_table.get_subset_interval( high_index_function, table_index_range) table_low_interval = log_table.get_subset_interval( low_index_function, table_index_range) def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_multi) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) tho_cond = _vx_mant > Constant(sollya.sqrt(2), precision=self.precision) tho = Select(tho_cond, Constant(1.0, precision=self.precision), Constant(0.0, precision=self.precision), precision=self.precision, tag="tho", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) _red_vx = arg_red_index * _vx_mant - 1.0 inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, poly_object.get_sollya_object()) corr_exp = Conversion(_vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) + tho corr_exp.set_attributes(tag="corr_exp", debug=debug_multi) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object( 1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object( 1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh result = compute_log(vx) result.set_attributes(tag="result", debug=debug_multi) if False: # building eval error map eval_error_map = { red_vx: Variable("red_vx", precision=self.precision, interval=red_vx.get_interval()), log_inv_hi: Variable("log_inv_hi", precision=self.precision, interval=table_high_interval), log_inv_lo: Variable("log_inv_lo", precision=self.precision, interval=table_low_interval), corr_exp: Variable("corr_exp_g", precision=self.precision, interval=self.precision.get_exponent_interval()), } # computing gappa error if is_gappa_installed(): poly_eval_error = self.get_eval_error(result, eval_error_map) Log.report(Log.Info, "poly_eval_error: ", poly_eval_error) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) # exp=-1 case Log.report(Log.Info, "managing exp=-1 case") #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_multi) #result2 = (poly2 - log_inv_hi - log_inv_lo) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Return(result_subnormal)), Return(result)))) scheme = pre_scheme return scheme def numeric_emulate(self, input_value): return sollya.log(input_value) / sollya.log(self.basis) standard_test_cases = [(sollya.parse("0x1.42af3ap-1"), None)]