def __init__(self, args=DefaultArgTemplate): # initializing base class ML_FunctionBasis.__init__(self, args) # function specific arguments self.cgpe_index = args.cgpe_index self.tbl_index_size = args.tbl_index_size self.no_subnormal = args.no_subnormal self.no_fma = args.no_fma self.no_rcp = args.no_rcp self.log_radix = { "e": EXP_1, "2": S2, "10": S10 }[args.log_radix] self.force_division = args.force_division # TODO: beware dict indexing is reliying on python matching numerical values # to keys which may have different construction methods (you try to make # sure every value is constructed through sollya.parse, but this is # not really safe): to be IMPROVED LOG_EMUL_FCT_MAP = { S2: sollya.log2, EXP_1: sollya.log, S10: sollya.log10 } if self.log_radix in LOG_EMUL_FCT_MAP: Log.report(Log.Info, "radix {} is part of standard radices", self.log_radix) self.log_emulation_function = LOG_EMUL_FCT_MAP[self.log_radix] else: Log.report(Log.Info, "radix {} is not part of standard radices {{2, e, 10}}", self.log_radix) self.log_emulation_function = lambda v: sollya.log(v) / sollya.log(self.log_radix) # .. update output and function name self.function_name = "LOG{}".format(args.log_radix) self.output_file = "{}.c".format(self.function_name)
def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join( ["Variables", " real z in [{},{}];".format(x_low, x_high), "Definitions", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;"]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, {**config, "--rel-error": "true", "--abs-error": "true"}) rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, {**config, "--rel-error": "false", "--abs-error": "true"}) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def generate_scheme(self): x = self.implementation.add_input_variable("x", self.precision) n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN) if not self.skip_reduction: n_invlog2 = self.precision.round_sollya_object( 1 / sollya.log(2), sollya.RN) invlog2 = Constant(n_invlog2, tag="invlog2") unround_k = Multiplication(x, invlog2, tag="unround_k") k = Floor(unround_k, precision=self.precision, tag="k") log2 = Constant(n_log2, tag="log2") whole = Multiplication(k, log2, tag="whole") r = Subtraction(x, whole, tag="r") else: r = x approx_interval = sollya.Interval(-2**-10, n_log2 + 2**-10) approx_func = sollya.exp(sollya.x) builder = Polynomial.build_from_approximation sollya.settings.prec = 2**10 poly_object = builder(approx_func, self.poly_degree, [self.precision] * (self.poly_degree + 1), approx_interval, sollya.relative) self.poly_object = poly_object schemer = PolynomialSchemeEvaluator.generate_horner_scheme poly = schemer(poly_object, r) if not self.skip_reduction: ik = Conversion(k, precision=ML_Binary32.get_integer_format(), tag="ik") twok = ExponentInsertion(ik, precision=self.precision, tag="twok") retval = Multiplication(poly, twok, tag="retval") else: retval = poly scheme = Return(retval) return scheme
def generate_scheme(self): x = self.implementation.add_input_variable("x", self.precision) if not self.skip_reduction: x_exp_int = ExponentExtraction(x, tag="x_exp_int") x_exp = Conversion(x_exp_int, precision=self.precision, tag="x_exp") x_man = MantissaExtraction(x, tag="x_man") r = Multiplication(x_man, 0.5, tag="r") else: r = x approx_interval = sollya.Interval(0.5-2**-10, 1+2**-10) approx_func = sollya.log(sollya.x) builder = Polynomial.build_from_approximation sollya.settings.prec = 2**10 poly_object = builder(approx_func, self.poly_degree, [self.precision]*(self.poly_degree+1), approx_interval, sollya.relative) self.poly_object = poly_object schemer = PolynomialSchemeEvaluator.generate_horner_scheme poly = schemer(poly_object, r, unified_precision=self.precision) if not self.skip_reduction: n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN) log2 = Constant(n_log2, tag="log2") x_mul = Addition(x_exp, 1, tag="x_mul") offset = Multiplication(x_mul, log2, tag="offset") retval = Addition(offset, poly, tag="retval") else: retval = poly scheme = Return(retval) return scheme
def determine_error(self): sollya.settings.display = sollya.hexadecimal n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN) poly_expr = str(sollya.horner(self.poly_object.get_sollya_object())) poly_expr = poly_expr.replace("_x_", "z") poly_expr = poly_expr.replace("z^0x1p1", "z*z") config = fptaylor.CHECK_CONFIG.copy() del config["--abs-error"] config["--opt"] = "gelpia" config["--rel-error-threshold"] = "0.0" config["--intermediate-opt"] = "false" config["--uncertainty"] = "false" config["--opt-timeout"] = 100000 # log is returning inf config["--opt-f-rel-tol"] = "1e0" config["--opt-f-abs-tol"] = "0.0" config["--opt-x-rel-tol"] = "0.0" config["--opt-x-abs-tol"] = "0.0" def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join( ["Variables", " real z in [{},{}];".format(x_low, x_high), "Definitions", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;"]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, {**config, "--rel-error": "true", "--abs-error": "true"}) rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, {**config, "--rel-error": "false", "--abs-error": "true"}) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def generate_reduction_fptaylor(x): unround_e = sollya.log2(I) e_low = sollya.floor(sollya.inf(unround_e)) e_high = sollya.floor(sollya.sup(unround_e)) if e_low != e_high: assert False, "Interval must not stradle a binade" e = int(e_low) + 1 z = x / (2**e) * 0.5 query = "\n".join( ["Variables", " real z in [{},{}];".format(sollya.inf(z), sollya.sup(z)), "Definitions", " poly rnd64= {};".format(poly_expr), " retval rnd64= {}*{} + poly;".format(e, n_log2), "Expressions", " retval;"]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, {**config, "--rel-error": "true", "--abs-error": "true"}) rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, {**config, "--rel-error": "false", "--abs-error": "true"}) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def split_domain(starting_domain, slivers): in_domains = [starting_domain] out_domains = list() while len(in_domains) > 0: I = in_domains.pop() unround_e = sollya.log2(I) e_low = sollya.floor(sollya.inf(unround_e)) e_high = sollya.floor(sollya.sup(unround_e)) #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high))) if e_low == e_high: #print(" accepted") out_domains.append(I) continue e_range = sollya.Interval(e_low, e_low+1) I_range = 2**e_range for _ in range(100): mid = sollya.mid(I_range) e = sollya.floor(sollya.log2(mid)) if e == e_low: I_range = sollya.Interval(mid, sollya.sup(I_range)) else: I_range = sollya.Interval(sollya.inf(I_range), mid) divider_high = sollya.sup(I_range) divider_low = sollya.inf(I_range) lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(upper_part) in_domains.append(lower_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x))) in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)] return in_domains if self.skip_reduction: starting_domain = sollya.Interval(0.5, 1.0) else: reduction_e = 12 starting_domain = sollya.Interval(2**(-reduction_e), 2**reduction_e) # analyse each piece in_domains = split_domain(starting_domain, self.slivers) errors = list() for I in in_domains: if self.skip_reduction: rel_err, abs_err = generate_fptaylor(I) else: rel_err, abs_err = generate_reduction_fptaylor(I) print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)), float(sollya.sup(I)), float(abs_err), float(rel_err))) errors.append((I, abs_err, rel_err)) def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [float(sollya.inf(domain)), float(sollya.sup(domain)),], "epsilon": float(epsilon), "operation": "log" } return d if self.skip_reduction: d = generate_json(errors, sollya.Interval(0.5, 1.0)) json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) else: specs = list() for e in range(1, reduction_e): d = generate_json(errors, sollya.Interval(2**(-e), 2**e)) specs.append(d) for i in range(len(specs)): d = specs[i] if i == len(specs)-1: json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) break nd = specs[i+1] if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]: continue json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str)
def __init__(self, precision=ML_Binary32, abs_accuracy=S2**-24, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="log1pf.c", function_name="log1pf"): # declaring CodeFunction and retrieving input variable self.function_name = function_name self.precision = precision self.processor = target func_implementation = CodeFunction(self.function_name, output_format=self.precision) vx = func_implementation.add_input_variable("x", self.precision) sollya_precision = self.precision.sollya_object # debug utilities debugf = ML_Debug(display_format="%f") debuglf = ML_Debug(display_format="%lf") debugx = ML_Debug(display_format="%x") debuglx = ML_Debug(display_format="%\"PRIx64\"", ) debugd = ML_Debug(display_format="%d", pre_process=lambda v: "(int) %s" % v) debugld = ML_Debug(display_format="%ld") #debug_lftolx = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v) debug_lftolx = ML_Debug( display_format="%\"PRIx64\" ev=%x", pre_process=lambda v: "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v) debug_ddtolx = ML_Debug( display_format="%\"PRIx64\" %\"PRIx64\"", pre_process=lambda v: "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" % (v, v)) debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}", pre_process=lambda v: "%s.hi, %s.lo" % (v, v)) # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) log2_hi_value = round( log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round( log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_Table(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in xrange(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1 value_high = round( log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) # case close to 0: ctz ctz_exp_limit = -7 ctz_cond = vx_exp < ctz_exp_limit ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit) ctz_poly_degree = sup( guessdegree( log1p(sollya.x) / sollya.x, ctz_interval, S2** -(self.precision.get_field_size() + 1))) + 1 ctz_poly_object = Polynomial.build_from_approximation( log1p(sollya.x) / sollya.x, ctz_poly_degree, [self.precision] * (ctz_poly_degree + 1), ctz_interval, sollya.absolute) print "generating polynomial evaluation scheme" ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme( ctz_poly_object, vx, unified_precision=self.precision) ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx) ctz_result = vx * ctz_poly neg_input = Comparison(vx, -1, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") log_function_code = CodeFunction( "new_log", [Variable("x", precision=ML_Binary64)], output_format=ML_Binary64) log_call_generator = FunctionOperator( log_function_code.get_name(), arity=1, output_precision=ML_Binary64, declare_prototype=log_function_code) newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64, ), ML_Binary64, log_call_generator) # case away from 0.0 pre_vxp1 = vx + 1.0 pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx) pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag="pre_vxp1_exp", debug=debugd) cm500 = Constant(-500, precision=ML_Int32) c0 = Constant(0, precision=ML_Int32) cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() - 2) scaling_factor_exp = Select(cond_scaling, cm500, c0) scaling_factor = ExponentInsertion(scaling_factor_exp, precision=self.precision, tag="scaling_factor") vxp1 = pre_vxp1 * scaling_factor vxp1.set_attributes(tag="vxp1", debug=debug_lftolx) vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd) vxp1_inv = DivisionSeed(vxp1, precision=self.precision, tag="vxp1_inv", debug=debug_lftolx, silent=True) vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision=self.precision, tag="vxp1_dirty_inv", debug=debug_lftolx) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(vxp1, precision=int_precision, debug=debuglx), self.precision.get_field_size() - 7, debug=debuglx), 0x7f, tag="table_index", debug=debuglx) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index) #red_vxp1 = arg_red_index * vxp1 - 1.0 red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx) log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) inv_err = S2**-6 # TODO: link to target DivisionSeed precision print "building mathematical polynomial" approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) print "generating polynomial evaluation scheme" _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, red_vxp1, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_lftolx) print global_poly_object.get_sollya_object() vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag="vxp1_inv_exp", debug=debugd) corr_exp = -vxp1_exp + scaling_factor_exp # vxp1_inv_exp #poly = (red_vxp1) * (1 + _poly) #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True) pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = -corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp", debug=debug_lftolx, prevent_optimization=True) #std_result = exact_log2_hi_exp + pre_result exact_log2_lo_exp = -corr_exp * log2_lo exact_log2_lo_exp.set_attributes( tag="exact_log2_lo_exp", debug=debug_lftolx) #, prevent_optimization = True) init = exact_log2_lo_exp - log_inv_lo init.set_attributes(tag="init", debug=debug_lftolx, prevent_optimization=True) fma0 = (red_vxp1 * _poly + init) # - log_inv_lo) fma0.set_attributes(tag="fma0", debug=debug_lftolx) step0 = fma0 step0.set_attributes( tag="step0", debug=debug_lftolx) #, prevent_optimization = True) step1 = step0 + red_vxp1 step1.set_attributes(tag="step1", debug=debug_lftolx, prevent_optimization=True) step2 = -log_inv_hi + step1 step2.set_attributes(tag="step2", debug=debug_lftolx, prevent_optimization=True) std_result = exact_log2_hi_exp + step2 std_result.set_attributes(tag="std_result", debug=debug_lftolx, prevent_optimization=True) # main scheme print "MDL scheme" pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, Return(vx), ConditionBlock(ctz_cond, Statement(Return(ctz_result), ), Statement(Return(std_result)))))) scheme = pre_scheme #print scheme.get_str(depth = None, display_precision = True) opt_eng = OptimizationEngine(self.processor) # fusing FMA print "MDL fusing FMA" scheme = opt_eng.fuse_multiply_add(scheme, silence=True) print "MDL abstract scheme" opt_eng.instantiate_abstract_precision(scheme, None) #print scheme.get_str(depth = None, display_precision = True) print "MDL instantiated scheme" opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32) print "subexpression sharing" opt_eng.subexpression_sharing(scheme) print "silencing operation" opt_eng.silence_fp_operations(scheme) # registering scheme as function implementation func_implementation.set_scheme(scheme) # check processor support opt_eng.check_processor_support(scheme) # factorizing fast path opt_eng.factorize_fast_path(scheme) #print scheme.get_str(depth = None, display_precision = True) cg = CCodeGenerator(self.processor, declare_cst=False, disable_debug=not debug_flag, libm_compliant=libm_compliant) self.result = func_implementation.get_definition(cg, C_Code, static_cst=True) self.result.add_header("support_lib/ml_special_values.h") self.result.add_header("math.h") self.result.add_header("stdio.h") self.result.add_header("inttypes.h") #print self.result.get(cg) output_stream = open("%s.c" % func_implementation.get_name(), "w") output_stream.write(self.result.get(cg)) output_stream.close()
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.precision.sollya_object # constant computation invlog2 = round(1 / log(2), sollya_precision, sollya.RN) invlog2_cst = Constant(invlog2, precision=self.precision) #v_log2_hi = round(log(2), 16, sollya.RN) #v_log2_lo = round(log(2) - v_log2_hi, sollya_precision, sollya.RN) #log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi") #log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) v_log2_hi = round( log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) v_log2_lo = round( log(2) - v_log2_hi, self.precision.sollya_object, sollya.RN) log2_hi = Constant(v_log2_hi, precision=self.precision, tag="log2_hi") log2_lo = Constant(v_log2_lo, precision=self.precision, tag="log2_lo") vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi) int_precision = self.precision.get_integer_format() # table creation table_index_size = 7 log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) log_table[0][0] = 0.0 log_table[0][1] = 0.0 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) integer_precision = { ML_Binary32: ML_UInt32, ML_Binary64: ML_UInt64 }[self.precision] for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = inv_approx_table[ i] # (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 value_high = round( log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_multi, precision=self.precision) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debug_multi), self.precision.get_field_size() - 7, debug=debug_multi), 0x7f, tag="table_index", debug=debug_multi) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=integer_precision), Constant(-2, precision=integer_precision), precision=integer_precision), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index) #_red_vx = arg_red_index * _vx_mant - 1.0 _red_vx = FusedMultiplyAdd(arg_red_index, _vx_mant, 1.0, specifier=FusedMultiplyAdd.Subtract) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi) inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree, [1] + [self.precision] * (poly_degree), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Verbose, "generating polynomial evaluation scheme") #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) _poly = PolynomialSchemeEvaluator.generate_estrin_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) corr_exp = Conversion( _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_multi) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (corr_exp * log2_lo - _log_inv_lo))) pre_result.set_attributes(tag="pre_result", debug=debug_multi) exact_log2_hi_exp = corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp", debug=debug_multi) cancel_part = (corr_exp * log2_hi - _log_inv_hi) cancel_part.set_attributes(tag="cancel_part", debug=debug_multi) sub_part = red_vx_hi + cancel_part sub_part.set_attributes(tag="sub_part", debug=debug_multi) #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part = ((red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part.set_attributes(tag="result_one_low_part", debug=debug_multi) _result_one = ( (sub_part) + red_vx_hi * _poly) + result_one_low_part return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one result, poly, log_inv_lo, log_inv_hi, red_vx, new_result_one = compute_log( vx) result.set_attributes(tag="result", debug=debug_multi) new_result_one.set_attributes(tag="new_result_one", debug=debug_multi) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debug_multi, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debug_multi, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debug_multi) # exp=-1 case Log.report(Log.Verbose, "managing exp=-1 case") result2 = (-log_inv_hi - log2_hi) + ( (red_vx + poly * red_vx) - log2_lo - log_inv_lo) result2.set_attributes(tag="result2", debug=debug_multi) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) Log.report(Log.Verbose, "managing close to 1.0 cases") one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, sollya.absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_multi) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debug_multi, likely=False) # main scheme pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Return(result_subnormal)), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result2), Return(result)) #ConditionBlock(cond_one, #Return(new_result_one), #ConditionBlock(exp_mone, #Return(result2), #Return(result) #) #) )))) scheme = pre_scheme return scheme
def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_multi, precision=self.precision) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debug_multi), self.precision.get_field_size() - 7, debug=debug_multi), 0x7f, tag="table_index", debug=debug_multi) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=integer_precision), Constant(-2, precision=integer_precision), precision=integer_precision), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index) #_red_vx = arg_red_index * _vx_mant - 1.0 _red_vx = FusedMultiplyAdd(arg_red_index, _vx_mant, 1.0, specifier=FusedMultiplyAdd.Subtract) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi) inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree, [1] + [self.precision] * (poly_degree), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Verbose, "generating polynomial evaluation scheme") #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) _poly = PolynomialSchemeEvaluator.generate_estrin_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) corr_exp = Conversion( _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_multi) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (corr_exp * log2_lo - _log_inv_lo))) pre_result.set_attributes(tag="pre_result", debug=debug_multi) exact_log2_hi_exp = corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp", debug=debug_multi) cancel_part = (corr_exp * log2_hi - _log_inv_hi) cancel_part.set_attributes(tag="cancel_part", debug=debug_multi) sub_part = red_vx_hi + cancel_part sub_part.set_attributes(tag="sub_part", debug=debug_multi) #result_one_low_part = (red_vx_hi * _poly + (red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part = ((red_vx_lo + (red_vx_lo * _poly + (corr_exp * log2_lo - _log_inv_lo)))) result_one_low_part.set_attributes(tag="result_one_low_part", debug=debug_multi) _result_one = ( (sub_part) + red_vx_hi * _poly) + result_one_low_part return exact_log2_hi_exp + pre_result, _poly, _log_inv_lo, _log_inv_hi, _red_vx, _result_one
def generate_argument_reduction(self, memory_limit): best_arg_reduc = None best_arg_reduc = self.eval_argument_reduction(6,10,12,13) best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2'] best_arg_reduc['degree_poly1'] = 4 best_arg_reduc['degree_poly2'] = 8 return best_arg_reduc # iterate through all possible parameters, and return the best argument reduction # the order of importance of the caracteristics of a good argument reduction is: # 1- the argument reduction is valid # 2- the degree of the polynomials obtains are minimals # 3- the memory used is minimal # An arument reduction is valid iff: # - the memory used is less than memory_limit # - y-1 and z-1 fit into a uint64_t # - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction) # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched: # (note that thoses bound are implied by, but not equivalents to the constraints) # size1 <= log2(memory_limit/17) (memory_limit on the first table) # prec1 < 13 + size1 (y-1 fits into a uint64_t) # size2 <= log2((memory_limit - sizeof_table1)/17/midinterval) (memory_limit on both tables) # size2 >= 1 - log2(midinterval) (second arg red should be usefull) # prec2 < 12 - prec1 - log2((y-y1)/y1), for all possible y (z-1 fits into a uint64_t) # note: it is hard to deduce a tight bound on prec2 from the last inequality # a good approximation is size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc #self.eval_argument_reduction(12, 20, 22, 14) min_size1 = 1 max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt() for size1 in xrange(max_size1, min_size1-1, -1): min_prec1 = size1 max_prec1 = 12 + size1 for prec1 in xrange(min_prec1,max_prec1+1): # we need sizeof_table1 and mid_interval for the bound on size2 and prec2 first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1) mid_interval = first_arg_reduc['mid_interval'] sizeof_table1 = first_arg_reduc['sizeof_table1'] if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)): continue if not(first_arg_reduc['sizeof_table1'] < memory_limit): continue min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt() max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt() # during execution of the prec2 loop, it can reduces the interval of valid values for prec2 # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop # (because they are modified inside the body of the loop, for the next iteration of size2) min_prec2 = 0 max_prec2 = 12 + max_size2 - prec1 for size2 in xrange(max_size2,min_size2-1,-1): max_prec2 = min(max_prec2, 12 + size2 - prec1) for prec2 in xrange(max_prec2,min_prec2-1,-1): #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2) #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2) mid_interval = arg_reduc['mid_interval'] out_interval = arg_reduc['out_interval'] sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2'] if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)): max_prec2 = prec2 - 1 continue if memory_limit < sizeof_tables: continue #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1)) # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120) # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0) sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval)) guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52) guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120) # TODO: detect when guessdegree return multiple possible degree, and find the right one if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1): print "improvable guess_degree_poly1:", guess_degree_poly1 if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2): print "improvable guess_degree_poly2:", guess_degree_poly2 degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1 degree_poly2 = sup(guess_degree_poly2).getConstantAsInt() if ((best_arg_reduc is not None) and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)): min_prec2 = prec2 + 1 break if ((best_arg_reduc is None) or (best_arg_reduc['degree_poly1'] > degree_poly1) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)): arg_reduc['degree_poly1'] = degree_poly1 arg_reduc['degree_poly2'] = degree_poly2 arg_reduc['sizeof_tables'] = sizeof_tables best_arg_reduc = arg_reduc #print "\n --new best-- \n", arg_reduc, "\n" #print "\nBest arg reduc: \n", best_arg_reduc, "\n" return best_arg_reduc
def generate_scalar_scheme(self, vx): """ Generating implementation script for hyperic tangent meta-function """ # tanh(x) = sinh(x) / cosh(x) # = (e^x - e^-x) / (e^x + e^-x) # = (e^(2x) - 1) / (e^(2x) + 1) # when x -> +inf, tanh(x) -> 1 # when x -> -inf, tanh(x) -> -1 # ~0 e^x ~ 1 + x - x^2 / 2 + x^3 / 6 + ... # e^(-x) ~ 1 - x - x^2 / 2- x^3/6 + ... # when x -> 0, tanh(x) ~ (2 (x + x^3/6 + ...)) / (2 - x^2 + ...) ~ x # We can divide the input interval into 3 parts # positive, around 0, and finally negative # Possible argument reduction # x = m.2^E = k * log(2) + r # (k != 0) => tanh(x) = (2k * e^(2r) - 1) / (2k * e^(2r) + 1) # = (1 - 1 * e^(-2r) / 2k) / (1 + e^(-2r) / 2k) # # tanh(x) = (e^(2x) - 1) / (e^(2x) + 1) # = (e^(2x) + 1 - 1- 1) / (e^(2x) + 1) # = 1 - 2 / (e^(2x) + 1) # tanh is odd so we reduce the computation to the absolute value of # vx abs_vx = Abs(vx, precision=self.precision) # if p is the expected output precision # x > (p+2) * log(2) / 2 => tanh(x) = 1 - eps # where eps < 1/2 * 2^-p p = self.precision.get_mantissa_size() high_bound = (p + 2) * sollya.log(2) / 2 near_zero_bound = 0.125 interval_num = 1024 Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) interval_size = (high_bound - near_zero_bound) / (1024) new_interval_size = S2**int(sollya.log2(interval_size)) interval_num *= 2 high_bound = new_interval_size * interval_num + near_zero_bound Log.report(Log.Verbose, "high_bound={}, near_zero_bound={}, interval_num={}", float(high_bound), near_zero_bound, interval_num) ERROR_THRESHOLD = S2**-p Log.report(Log.Info, "ERROR_THRESHOLD={}", ERROR_THRESHOLD) # Near 0 approximation near_zero_scheme, near_zero_error = self.generate_approx_poly_near_zero( sollya.tanh(sollya.x), near_zero_bound, S2**-p, abs_vx) # approximation parameters poly_degree = 7 approx_interval = Interval(near_zero_bound, high_bound) sollya.settings.points = 117 approx_scheme, approx_error = piecewise_approximation( sollya.tanh, abs_vx, self.precision, bound_low=near_zero_bound, bound_high=high_bound, num_intervals=interval_num, max_degree=poly_degree, error_threshold=ERROR_THRESHOLD) Log.report(Log.Warning, "approx_error={}".format(approx_error)) comp_near_zero_bound = abs_vx < near_zero_bound comp_near_zero_bound.set_attributes(tag="comp_near_zero_bound", debug=debug_multi) comp_high_bound = abs_vx < high_bound comp_high_bound.set_attributes(tag="comp_high_bound", debug=debug_multi) complete_scheme = Select( comp_near_zero_bound, near_zero_scheme, Select(comp_high_bound, approx_scheme, Constant(1.0, precision=self.precision))) scheme = Return(Select(vx < 0, Negation(complete_scheme), complete_scheme), precision=self.precision) return scheme
def __init__(self, precision = ML_Binary32, abs_accuracy = S2**-24, libm_compliant = True, debug_flag = False, fuse_fma = True, fast_path_extract = True, target = GenericProcessor(), output_file = "expf.c", function_name = "expf"): # declaring target and instantiating optimization engine processor = target self.precision = precision opt_eng = OptimizationEngine(processor) gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True) # declaring CodeFunction and retrieving input variable self.function_name = function_name exp_implementation = CodeFunction(self.function_name, output_format = self.precision) vx = exp_implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf") test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test") test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign") test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan") return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater) early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2 ** precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less) early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision))) sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64} # constant computation invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision invlog2_cst = Constant(invlog2, precision = self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f")) k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f")) ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact= True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact = True) r = exact_hi_part - k * log2_lo r.set_tag("r") r.set_attributes(debug = ML_Debug(display_format = "%f")) opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma) tag_map = {} opt_eng.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision = self.precision, interval = interval_vx), tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision) } #try: if 1: #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g") eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g") Log.report(Log.Info, "eval error: %s" % eval_error) #except: # Log.report(Log.Info, "gappa error evaluation failed") print r.get_str(depth = None, display_precision = True, display_attribute = True) print opt_r.get_str(depth = None, display_precision = True, display_attribute = True) approx_interval = Interval(-log(2)/2, log(2)/2) local_ulp = sup(ulp(exp(approx_interval), self.precision)) print "ulp: ", local_ulp error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1 init_poly_degree = poly_degree return while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision) poly.set_tag("poly") # optimizing poly before evaluation error computation opt_poly = opt_eng.optimization_process(poly, self.precision) #print "poly: ", poly.get_str(depth = None, display_precision = True) #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval) poly_error_copy_map = { r.get_handle().get_node(): r_gappa_var } gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True) poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g") Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) global_poly_error = poly_eval_error + poly_approx_error global_rel_poly_error = global_poly_error / exp(approx_interval) print "global_poly_error: ", global_poly_error, global_rel_poly_error flag = local_ulp > sup(abs(global_rel_poly_error)) print "test: ", flag if flag: break else: if poly_degree > init_poly_degree + 5: Log.report(Log.Error, "poly degree search did not converge") poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k") late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset) late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf) late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset) late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False) test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal) late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result)) std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx) std_result.set_attributes(tag = "std_result", debug = debug_lftolx) result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result))) std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return) #print scheme.get_str(depth = None, display_precision = True) # fusing FMA if fuse_fma: Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m") scheme = opt_eng.fuse_multiply_add(scheme, silence = True) Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m") opt_eng.instantiate_abstract_precision(scheme, None) Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m") opt_eng.instantiate_precision(scheme, default_precision = self.precision) Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m") opt_eng.subexpression_sharing(scheme) Log.report(Log.Info, "\033[33;1m silencing operation \033[0m") opt_eng.silence_fp_operations(scheme) # registering scheme as function implementation exp_implementation.set_scheme(scheme) # check processor support Log.report(Log.Info, "\033[33;1m checking processor support \033[0m") opt_eng.check_processor_support(scheme) # factorizing fast path if fast_path_extract: Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m") opt_eng.factorize_fast_path(scheme) Log.report(Log.Info, "\033[33;1m generating source code \033[0m") cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant) self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True) #self.result.add_header("support_lib/ml_types.h") self.result.add_header("support_lib/ml_special_values.h") self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree) self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object()) if debug_flag: self.result.add_header("stdio.h") self.result.add_header("inttypes.h") output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w") output_stream.write(self.result.get(cg)) output_stream.close()
def determine_error(self): sollya.settings.display = sollya.hexadecimal n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN) n_invlog2 = self.precision.round_sollya_object(1 / sollya.log(2), sollya.RN) poly_expr = str(sollya.horner(self.poly_object.get_sollya_object())) poly_expr = poly_expr.replace("_x_", "r") poly_expr = poly_expr.replace("r^0x1p1", "r*r") config = fptaylor.CHECK_CONFIG.copy() del config["--abs-error"] config["--opt"] = "bb-eval" config["--rel-error-threshold"] = "0.0" config["--intermediate-opt"] = "false" config["--uncertainty"] = "false" def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " r rnd64= x;", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def generate_reduction_fptaylor(x): # get k, must be the same at endpoints unround_k = x * n_invlog2 k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if not (k_low == k_high or (k_low == -1 and sollya.sup(x) == 0)): assert False, "Interval must not straddle multples of log(2)" k = int(k_low) r = x - k * n_log2 twok = 2**k x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " whole rnd64= {} * {};".format(k, n_log2), " r rnd64= x - whole;", " poly rnd64= {};".format(poly_expr), " retval rnd64= poly*{};".format(twok), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), r, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), r, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def split_domain(starting_domain, slivers): in_domains = [starting_domain] # abs out_domains = list() for I in in_domains: if sollya.inf(I) < 0 and sollya.sup(I) > 0: out_domains.append(sollya.Interval(sollya.inf(I), 0)) out_domains.append(sollya.Interval(0, sollya.sup(I))) else: out_domains.append(I) in_domains = out_domains # k out_domains = list() while len(in_domains) > 0: I = in_domains.pop() unround_mult = I * n_invlog2 mult_low = sollya.floor(sollya.inf(unround_mult)) mult_high = sollya.floor(sollya.sup(unround_mult)) #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(mult_low), int(mult_high))) if mult_low == mult_high or (mult_low == -1 and mult_high == 0): #print(" accepted") out_domains.append(I) continue k_range = sollya.Interval(mult_low, mult_low + 1.5) I_range = k_range * n_log2 for _ in range(100): mid = sollya.mid(I_range) k = sollya.floor(mid * n_invlog2) if k == mult_low: I_range = sollya.Interval(mid, sollya.sup(I_range)) else: I_range = sollya.Interval(sollya.inf(I_range), mid) divider_high = sollya.sup(I_range) divider_low = sollya.inf(I_range) lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(upper_part) in_domains.append(lower_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x))) in_domains = [ d for d in in_domains if sollya.inf(d) != sollya.sup(d) ] return in_domains if self.skip_reduction: starting_domain = sollya.Interval(0, n_log2) else: reduction_k = 40 starting_domain = sollya.Interval(-reduction_k * n_log2, reduction_k * n_log2) # analyse each piece in_domains = split_domain(starting_domain, self.slivers) errors = list() for I in in_domains: if self.skip_reduction: rel_err, abs_err = generate_fptaylor(I) else: rel_err, abs_err = generate_reduction_fptaylor(I) print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)), float(sollya.sup(I)), float(abs_err), float(rel_err))) errors.append((I, abs_err, rel_err)) def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [ float(sollya.inf(domain)), float(sollya.sup(domain)), ], "epsilon": float(epsilon), "operation": "exp" } return d if self.skip_reduction: d = generate_json(errors, sollya.Interval(0, n_log2)) json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) else: specs = list() for k in range(1, reduction_k): d = generate_json(errors, sollya.Interval(-k * n_log2, k * n_log2)) specs.append(d) for i in range(len(specs)): d = specs[i] if i == len(specs) - 1: json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) break nd = specs[i + 1] if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]: continue json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str)
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(ML_Binary32))) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_Table(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in xrange(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 #print inv_approx_table[i][0], inv_value inv_value = inv_approx_table[i][0] value_high = round( log2(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log2(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(_vx_mant, precision=int_precision, debug=debuglx), self.precision.get_field_size() - 7, debug=debuglx), 0x7f, tag="table_index", debug=debuglld) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) #if not processor.is_supported_operation(arg_red_index): # if self.precision != ML_Binary32: # arg_red_index = DivisionSeed(Conversion(_vx_mant, precision = ML_Binary32), precision = ML_Binary32, _red_vx = arg_red_index * _vx_mant - 1.0 _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-7 red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) print "building mathematical polynomial" approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) print "generating polynomial evaluation scheme" _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_lftolx) print "sollya global_poly_object" print global_poly_object.get_sollya_object() print "sollya poly_object" print poly_object.get_sollya_object() corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor split_red_vx = Split(_red_vx, precision=ML_DoubleDouble, tag="split_red_vx", debug=debug_ddtolx) red_vx_hi = split_red_vx.hi red_vx_lo = split_red_vx.lo Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() # result = _red_vx * poly - log_inv_hi - log_inv_lo + _vx_exp * log2_hi + _vx_exp * log2_lo #pre_result = -_log_inv_hi + (_red_vx + (_red_vx * _poly + (- _log_inv_lo))) pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = corr_exp exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = corr_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx) result.set_attributes(tag="result", debug=debug_lftolx) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # exp=-1 case print "managing exp=-1 case" #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_lftolx) #result2 = (poly2 - log_inv_hi - log_inv_lo) result2 = (-log_inv_hi - 1.0) + ((poly * red_vx) - log_inv_lo) result2.set_attributes(tag="result2", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) result_subnormal.set_attributes(tag="result_subnormal", debug=debug_lftolx) print "managing close to 1.0 cases" one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + x) / x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme print "MDL scheme" pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Statement(ClearException(), result_subnormal, Return(result_subnormal))), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result2), Return(result)) #ConditionBlock(cond_one, #Return(new_result_one), #ConditionBlock(exp_mone, #Return(result2), #Return(result) #) #) )))) scheme = Statement(result, pre_scheme) return scheme
def generate_reduction_fptaylor(x): unround_e = sollya.log2(I) e_low = sollya.floor(sollya.inf(unround_e)) e_high = sollya.floor(sollya.sup(unround_e)) if e_low != e_high: assert False, "Interval must not stradle a binade" e = int(e_low) + 1 z = x / (2**e) * 0.5 query = "\n".join( ["Variables", " real z in [{},{}];".format(sollya.inf(z), sollya.sup(z)), "Definitions", " poly rnd64= {};".format(poly_expr), " retval rnd64= {}*{} + poly;".format(e, n_log2), "Expressions", " retval;"]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, {**config, "--rel-error": "true", "--abs-error": "true"}) rnd_rel_err = float(res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, {**config, "--rel-error": "false", "--abs-error": "true"}) rnd_abs_err = float(res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.log(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def generate_scalar_scheme(self, vx, n): # fixing inputs' node tag vx.set_attributes(tag="x") n.set_attributes(tag="n") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # rootn(x, n) = x^(1/n) # = exp(1/n * log(x)) # = 2^(1/n * log2(x)) # = 2^(1/n * (log2(m) + e)) # # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) use_reciprocal = False # non-scaled vx used to compute vx^1 unmodified_vx = vx is_subnormal = Test(vx, specifier=Test.IsSubnormal, tag="is_subnormal") exp_correction_factor = self.precision.get_mantissa_size() mantissa_factor = Constant(2**exp_correction_factor, tag="mantissa_factor") vx = Select(is_subnormal, vx * mantissa_factor, vx, tag="corrected_vx") m = MantissaExtraction(vx, tag="m", precision=self.precision) e = ExponentExtraction(vx, tag="e", precision=int_precision) e = Select(is_subnormal, e - exp_correction_factor, e, tag="corrected_e") ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table( log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split( Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) # floating-point version of n n_f = Conversion(n, precision=self.precision, tag="n_f") inv_n = Division(Constant(1, precision=self.precision), n_f) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) if use_reciprocal: r = Multiplication(log_approx, inv_n, tag="r", debug=debug_multi) else: r = Division(log_approx, n_f, tag="r", debug=debug_multi) # e_n ~ e / n e_f = Conversion(e, precision=self.precision, tag="e_f") if use_reciprocal: e_n = Multiplication(e_f, inv_n, tag="e_n") else: e_n = Division(e_f, n_f, tag="e_n") error_e_n = FMA(e_n, -n_f, e_f, tag="error_e_n") e_n_int = NearestInteger(e_n, precision=self.precision, tag="e_n_int") pre_e_n_frac = e_n - e_n_int pre_e_n_frac.set_attributes(tag="pre_e_n_frac") e_n_frac = pre_e_n_frac + error_e_n * inv_n e_n_frac.set_attributes(tag="e_n_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_r = ml_exp2.generate_scalar_scheme(r, inline_select=True) exp2_r.set_attributes(tag="exp2_r", debug=debug_multi) exp2_e_n_frac = ml_exp2.generate_scalar_scheme(e_n_frac, inline_select=True) exp2_e_n_frac.set_attributes(tag="exp2_e_n_frac", debug=debug_multi) exp2_e_n_int = ExponentInsertion(Conversion(e_n_int, precision=int_precision), precision=self.precision, tag="exp2_e_n_int") n_is_even = Equal(Modulo(n, 2), 0, tag="n_is_even", debug=debug_multi) n_is_odd = LogicalNot(n_is_even, tag="n_is_odd") result_sign = Select( n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) # managing n == -1 if self.expand_div: ml_division_args = ML_Division.get_default_args( precision=self.precision, input_formats=[self.precision] * 2) ml_division = ML_Division(ml_division_args) self.division_implementation = ml_division.implementation self.division_implementation.set_scheme( ml_division.generate_scheme()) ml_division_fct = self.division_implementation.get_function_object( ) else: ml_division_fct = Division # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = ConditionBlock( LogicalOr(LogicalOr(Test(vx, specifier=Test.IsNaN), Equal(n, 0)), LogicalAnd(n_is_even, vx < 0)), Return(FP_QNaN(self.precision)), Statement( ConditionBlock( Equal(n, -1, tag="n_is_mone"), #Return(Division(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), Return( ml_division_fct(Constant(1, precision=self.precision), unmodified_vx, tag="div_res", precision=self.precision)), ), ConditionBlock( # rootn( ±inf, n) is +∞ for even n< 0. Test(vx, specifier=Test.IsInfty), Statement( ConditionBlock( n < 0, #LogicalAnd(n_is_odd, n < 0), Return( Select(Test(vx, specifier=Test.IsPositiveInfty), Constant(FP_PlusZero(self.precision), precision=self.precision), Constant(FP_MinusZero(self.precision), precision=self.precision), precision=self.precision)), Return(vx), ), ), ), ConditionBlock( # rootn(±0, n) is ±∞ for odd n < 0. LogicalAnd(LogicalAnd(n_is_odd, n < 0), Equal(vx, 0), tag="n_is_odd_and_neg"), Return( Select(Test(vx, specifier=Test.IsPositiveZero), Constant(FP_PlusInfty(self.precision), precision=self.precision), Constant(FP_MinusInfty(self.precision), precision=self.precision), precision=self.precision)), ), ConditionBlock( # rootn( ±0, n) is +∞ for even n< 0. LogicalAnd(LogicalAnd(n_is_even, n < 0), Equal(vx, 0)), Return(FP_PlusInfty(self.precision))), ConditionBlock( # rootn(±0, n) is +0 for even n > 0. LogicalAnd(n_is_even, Equal(vx, 0)), Return(vx)), ConditionBlock( Equal(n, 1), Return(unmodified_vx), Return(result_sign * exp2_r * exp2_e_n_int * exp2_e_n_frac)))) return result
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.get_input_precision().sollya_object # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # 2-limb approximation of log(2) # hi part precision is reduced to provide exact operation # when multiplied by an exponent value log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) int_precision = self.precision.get_integer_format() # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_rcp_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation(dummy_rcp_seed, language = None, table_getter = lambda self: self.approx_table_map) # table creation table_index_size = inv_approx_table.index_size log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision) # storing accurate logarithm approximation of value returned # by the fast reciprocal operation for i in range(0, 2**table_index_size): inv_value = inv_approx_table[i] value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low neg_input = Comparison(vx, -1, likely=False, precision=ML_Bool, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, precision=ML_Bool, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") # for x = m.2^e, such that e >= 0 # # log(1+x) = log(1 + m.2^e) # = log(2^e . 2^-e + m.2^e) # = log(2^e . (2^-e + m)) # = log(2^e) + log(2^-e + m) # = e . log(2) + log (2^-e + m) # # t = (2^-e + m) # t = m_t . 2^e_t # r ~ 1 / m_t => r.m_t ~ 1 ~ 0 # # t' = t . 2^-e_t # = 2^-e-e_t + m . 2^-e_t # # if e >= 0, then 2^-e <= 1, then 1 <= m + 2^-e <= 3 # r = m_r . 2^e_r # # log(1+x) = e.log(2) + log(r . 2^e_t . 2^-e_t . (2^-e + m) / r) # = e.log(2) + log(r . 2^(-e-e_t) + r.m.2^-e_t) + e_t . log(2)- log(r) # = (e+e_t).log(2) + log(r . t') - log(r) # = (e+e_t).log(2) + log(r . t') - log(r) # = (e+e_t).log(2) + P_log1p(r . t' - 1) - log(r) # # # argument reduction m = MantissaExtraction(vx, tag="vx", precision=self.precision, debug=debug_multi) e = ExponentExtraction(vx, tag="e", precision=int_precision, debug=debug_multi) # 2^-e TwoMinusE = ExponentInsertion(-e, tag="Two_minus_e", precision=self.precision, debug=debug_multi) t = Addition(TwoMinusE, m, precision=self.precision, tag="t", debug=debug_multi) m_t = MantissaExtraction(t, tag="m_t", precision=self.precision, debug=debug_multi) e_t = ExponentExtraction(t, tag="e_t", precision=int_precision, debug=debug_multi) # 2^(-e-e_t) TwoMinusEEt = ExponentInsertion(-e-e_t, tag="Two_minus_e_et", precision=self.precision) TwoMinusEt = ExponentInsertion(-e_t, tag="Two_minus_et", precision=self.precision, debug=debug_multi) rcp_mt = ReciprocalSeed(m_t, tag="rcp_mt", precision=self.precision, debug=debug_multi) INDEX_SIZE = table_index_size table_index = generic_mantissa_msb_index_fct(INDEX_SIZE, m_t) table_index.set_attributes(tag="table_index", debug=debug_multi) log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_multi) log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_multi) inv_err = S2**-6 # TODO: link to target DivisionSeed precision Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) approx_fct = sollya.log1p(sollya.x) / (sollya.x) poly_degree = sup(guessdegree(approx_fct, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1 Log.report(Log.Debug, "poly_degree is {}", poly_degree) global_poly_object = Polynomial.build_from_approximation(approx_fct, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute) poly_object = global_poly_object # .sub_poly(start_index=1) EXT_PRECISION_MAP = { ML_Binary32: ML_SingleSingle, ML_Binary64: ML_DoubleDouble, ML_SingleSingle: ML_TripleSingle, ML_DoubleDouble: ML_TripleDouble } if not self.precision in EXT_PRECISION_MAP: Log.report(Log.Error, "no extended precision available for {}", self.precision) ext_precision = EXT_PRECISION_MAP[self.precision] # pre_rtp = r . 2^(-e-e_t) + m .2^-e_t pre_rtp = Addition( rcp_mt * TwoMinusEEt, Multiplication( rcp_mt, Multiplication( m, TwoMinusEt, precision=self.precision, tag="pre_mult", debug=debug_multi, ), precision=ext_precision, tag="pre_mult2", debug=debug_multi, ), precision=ext_precision, tag="pre_rtp", debug=debug_multi ) pre_red_vx = Addition( pre_rtp, -1, precision=ext_precision, ) red_vx = Conversion(pre_red_vx, precision=self.precision, tag="red_vx", debug=debug_multi) Log.report(Log.Info, "generating polynomial evaluation scheme") poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, red_vx, unified_precision=self.precision) poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Debug, "{}", global_poly_object.get_sollya_object()) fp_e = Conversion(e + e_t, precision=self.precision, tag="fp_e", debug=debug_multi) ext_poly = Multiplication(red_vx, poly, precision=ext_precision) pre_result = Addition( Addition( fp_e * log2_hi, fp_e * log2_lo, precision=ext_precision ), Addition( Addition( -log_inv_hi, -log_inv_lo, precision=ext_precision ), ext_poly, precision=ext_precision ), precision=ext_precision ) result = Conversion(pre_result, precision=self.precision, tag="result", debug=debug_multi) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock(neg_input, Statement( ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision)) ), ConditionBlock(vx_nan_or_inf, ConditionBlock(vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement( ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid) ), Return(FP_QNaN(self.precision)) ) ), Return(result) ) ) scheme = pre_scheme return scheme
def generate_reduced_log_split(self, _vx_mant, log_f, inv_approx_table, log_table, log_table_tho=None, corr_exp=None, tho_cond=None): """ Generate a logarithm approximation (log_f(_vx_mant) + corr_exp) for a reduced argument _vx_mant which is assumed to be within [1, 2[ (i.e. an extracted mantissa) Addiing exponent correction (optionnal) """ log2_hi_value = round( log_f(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), RN) log2_lo_value = round( log_f(2) - log2_hi_value, self.precision.sollya_object, RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") int_format = self.precision.get_integer_format() # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=int_format), Constant(-2, precision=int_format), precision=int_format), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) C0 = Constant(0, precision=table_index.get_precision()) index_comp_0 = Equal(table_index, C0, tag="index_comp_0", debug=debug_multi) arg_red_index = Select(index_comp_0, 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) #_red_vx = arg_red_index * _vx_mant - 1.0 _red_vx = FMA(arg_red_index, _vx_mant, -1.0) inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input if not tho_cond is None: assert not log_table_tho is None _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) else: assert log_table_tho is None _log_inv_lo = TableLoad(log_table, table_index, 1) _log_inv_hi = TableLoad(log_table, table_index, 0) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, "{}", poly_object.get_sollya_object()) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") if not corr_exp is None: l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) else: # bypass exponent addition if no exponent correction is disabled rh, rl = m0h, m0l rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) # FIXME: log<self.basis>(vx) is computed as log(vx) / log(self.basis) # which could be optimized for some value of self.basis (e.g. 2) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object(1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object(1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh
def generate_scalar_scheme(self, vx, vy): # fixing inputs' node tag vx.set_attributes(tag="x") vy.set_attributes(tag="y") int_precision = self.precision.get_integer_format() # assuming x = m.2^e (m in [1, 2[) # n, positive or null integers # # pow(x, n) = x^(y) # = exp(y * log(x)) # = 2^(y * log2(x)) # = 2^(y * (log2(m) + e)) # e = ExponentExtraction(vx, tag="e", precision=int_precision) m = MantissaExtraction(vx, tag="m", precision=self.precision) # approximation log2(m) # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter= lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) ml_log_args = ML_GenericLog.get_default_args(precision=self.precision, basis=2) ml_log = ML_GenericLog(ml_log_args) log_table, log_table_tho, table_index_range = ml_log.generate_log_table(log_f, inv_approx_table) log_approx = ml_log.generate_reduced_log_split(Abs(m, precision=self.precision), log_f, inv_approx_table, log_table) log_approx = Select(Equal(vx, 0), FP_MinusInfty(self.precision), log_approx) log_approx.set_attributes(tag="log_approx", debug=debug_multi) r = Multiplication(log_approx, vy, tag="r", debug=debug_multi) # 2^(y * (log2(m) + e)) = 2^(y * log2(m)) * 2^(y * e) # # log_approx = log2(Abs(m)) # r = y * log_approx ~ y * log2(m) # # NOTES: manage cases where e is negative and # (y * log2(m)) AND (y * e) could cancel out # if e positive, whichever the sign of y (y * log2(m)) and (y * e) CANNOT # be of opposite signs # log2(m) in [0, 1[ so cancellation can occur only if e == -1 # we split 2^x in 2^x = 2^t0 * 2^t1 # if e < 0: t0 = y * (log2(m) + e), t1=0 # else: t0 = y * log2(m), t1 = y * e t_cond = e < 0 # e_y ~ e * y e_f = Conversion(e, precision=self.precision) #t0 = Select(t_cond, (e_f + log_approx) * vy, Multiplication(e_f, vy), tag="t0") #NearestInteger(t0, precision=self.precision, tag="t0_int") EY = NearestInteger(e_f * vy, tag="EY", precision=self.precision) LY = NearestInteger(log_approx * vy, tag="LY", precision=self.precision) t0_int = Select(t_cond, EY + LY, EY, tag="t0_int") t0_frac = Select(t_cond, FMA(e_f, vy, -EY) + FMA(log_approx, vy, -LY) ,EY - t0_int, tag="t0_frac") #t0_frac.set_attributes(tag="t0_frac") ml_exp2_args = ML_Exp2.get_default_args(precision=self.precision) ml_exp2 = ML_Exp2(ml_exp2_args) exp2_t0_frac = ml_exp2.generate_scalar_scheme(t0_frac, inline_select=True) exp2_t0_frac.set_attributes(tag="exp2_t0_frac", debug=debug_multi) exp2_t0_int = ExponentInsertion(Conversion(t0_int, precision=int_precision), precision=self.precision, tag="exp2_t0_int") t1 = Select(t_cond, Constant(0, precision=self.precision), r) exp2_t1 = ml_exp2.generate_scalar_scheme(t1, inline_select=True) exp2_t1.set_attributes(tag="exp2_t1", debug=debug_multi) result_sign = Constant(1.0, precision=self.precision) # Select(n_is_odd, CopySign(vx, Constant(1.0, precision=self.precision)), 1) y_int = NearestInteger(vy, precision=self.precision) y_is_integer = Equal(y_int, vy) y_is_even = LogicalOr( # if y is a number (exc. inf) greater than 2**mantissa_size * 2, # then it is an integer multiple of 2 => even Abs(vy) >= 2**(self.precision.get_mantissa_size()+1), LogicalAnd( y_is_integer and Abs(vy) < 2**(self.precision.get_mantissa_size()+1), # we want to limit the modulo computation to an integer input Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 0) ) ) y_is_odd = LogicalAnd( LogicalAnd( Abs(vy) < 2**(self.precision.get_mantissa_size()+1), y_is_integer ), Equal(Modulo(Conversion(y_int, precision=int_precision), 2), 1) ) # special cases management special_case_results = Statement( # x is sNaN OR y is sNaN ConditionBlock( LogicalOr(Test(vx, specifier=Test.IsSignalingNaN), Test(vy, specifier=Test.IsSignalingNaN)), Return(FP_QNaN(self.precision)) ), # pow(x, ±0) is 1 if x is not a signaling NaN ConditionBlock( Test(vy, specifier=Test.IsZero), Return(Constant(1.0, precision=self.precision)) ), # pow(±0, y) is ±∞ and signals the divideByZero exception for y an odd integer <0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy < 0)), Return(Select(Test(vx, specifier=Test.IsPositiveZero), FP_PlusInfty(self.precision), FP_MinusInfty(self.precision))), ), # pow(±0, −∞) is +∞ with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_MinusInfty(self.precision)), ), # pow(±0, +∞) is +0 with no exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is ±0 for finite y>0 an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(y_is_odd, vy > 0)), Return(vx), ), # pow(−1, ±∞) is 1 with no exception ConditionBlock( LogicalAnd(Equal(vx, -1), Test(vy, specifier=Test.IsInfty)), Return(Constant(1.0, precision=self.precision)), ), # pow(+1, y) is 1 for any y (even a quiet NaN) ConditionBlock( vx == 1, Return(Constant(1.0, precision=self.precision)), ), # pow(x, +∞) is +0 for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusZero(self.precision)) ), # pow(x, +∞) is +∞ for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsPositiveInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +∞ for −1<x<1 ConditionBlock( LogicalAnd(Abs(vx) < 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusInfty(self.precision)) ), # pow(x, −∞) is +0 for x<−1 or for 1<x (including ±∞) ConditionBlock( LogicalAnd(Abs(vx) > 1, Test(vy, specifier=Test.IsNegativeInfty)), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +0 for a number y < 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy < 0), Return(FP_PlusZero(self.precision)) ), # pow(+∞, y) is +∞ for a number y > 0 ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsPositiveInfty), vy > 0), Return(FP_PlusInfty(self.precision)) ), # pow(−∞, y) is −0 for finite y < 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy < 0)), Return(FP_MinusZero(self.precision)), ), # pow(−∞, y) is −∞ for finite y > 0 an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(y_is_odd, vy > 0)), Return(FP_MinusInfty(self.precision)), ), # pow(−∞, y) is +0 for finite y < 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusZero(self.precision)), ), # pow(−∞, y) is +∞ for finite y > 0 and not an odd integer # TODO: check y is finite ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsNegativeInfty), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +∞ and signals the divideByZero exception for finite y<0 and not an odd integer # TODO: signal divideByZero exception ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy < 0)), Return(FP_PlusInfty(self.precision)), ), # pow(±0, y) is +0 for finite y>0 and not an odd integer ConditionBlock( LogicalAnd(Test(vx, specifier=Test.IsZero), LogicalAnd(LogicalNot(y_is_odd), vy > 0)), Return(FP_PlusZero(self.precision)), ), ) # manage n=1 separately to avoid catastrophic propagation of errors # between log2 and exp2 to eventually compute the identity function # test-case #3 result = Statement( special_case_results, # fallback default cases Return(result_sign * exp2_t1 * exp2_t0_int * exp2_t0_frac)) return result
def generate_scalar_scheme(self, vx): # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) int_precision = self.precision.get_integer_format() vx_exp = ExponentExtraction(vx, tag="vx_exp", precision=int_precision, debug=debug_multi) #--------------------- # Approximation scheme #--------------------- # log(x) = log(m.2^e) = log(m.2^(e-tho+tho)) # = log(m.2^-tho) + (e+tho) log(2) # tho = (m > sqrt(2)) ? 1 : 0 is used to avoid catastrophic cancellation # when e = -1 and m ~ 2 # # # log(m.2^-tho) = log(m.r/r.2^-tho) = log(m.r) + log(2^-tho/r) # = log(m.r) - log(r.2^tho) # where r = rcp(m) an approximation of 1/m such that r.m ~ 1 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) # table of the reciprocal approximation of the targeted processor inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) log_f = sollya.log(sollya.x) # /sollya.log(self.basis) log_table, log_table_tho, table_index_range = self.generate_log_table( log_f, inv_approx_table) # determining log_table range high_index_function = lambda table, i: table[i][0] low_index_function = lambda table, i: table[i][1] table_high_interval = log_table.get_subset_interval( high_index_function, table_index_range) table_low_interval = log_table.get_subset_interval( low_index_function, table_index_range) result = self.generate_reduced_log(vx, log_f, inv_approx_table, log_table, log_table_tho) result.set_attributes(tag="result", debug=debug_multi) if False: # building eval error map eval_error_map = { red_vx: Variable("red_vx", precision=self.precision, interval=red_vx.get_interval()), log_inv_hi: Variable("log_inv_hi", precision=self.precision, interval=table_high_interval), log_inv_lo: Variable("log_inv_lo", precision=self.precision, interval=table_low_interval), corr_exp: Variable("corr_exp_g", precision=self.precision, interval=self.precision.get_exponent_interval()), } # computing gappa error if is_gappa_installed(): poly_eval_error = self.get_eval_error(result, eval_error_map) Log.report(Log.Info, "poly_eval_error: ", poly_eval_error) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debug_multi, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debug_multi, likely=False) # exp=-1 case Log.report(Log.Info, "managing exp=-1 case") #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_multi) #result2 = (poly2 - log_inv_hi - log_inv_lo) m100 = Constant(-100, precision=int_precision) S2100 = Constant(S2**100, precision=self.precision) result_subnormal = self.generate_reduced_log(vx * S2100, log_f, inv_approx_table, log_table, log_table_tho, exp_corr_factor=m100) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock( neg_input, Statement( ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision), precision=self.precision)), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision), precision=self.precision), ), Statement( ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision), precision=self.precision))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision), precision=self.precision), ), Return(result_subnormal)), Return(result)))) scheme = pre_scheme return scheme
def generate_scheme(self): memory_limit = 2500 # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = input_var kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) ### Constants computations ### v_log2_hi = nearestint(log(2) * 2**-52) * 2**52 v_log2_lo = round(log(2) - v_log2_hi, 64+53, sollya.RN) log2_hi = Constant(v_log2_hi, precision = self.precision, tag = "log2_hi") log2_lo = Constant(v_log2_lo, precision = self.precision, tag = "log2_lo") print "\n\033[1mSearch parameters for the argument reduction:\033[0m (this can take a while)" arg_reduc = self.generate_argument_reduction(memory_limit) print "\n\033[1mArgument reduction found:\033[0m [({},{}),({},{})] -> polynomials of degree {},{}, using {} bytes of memory".format(arg_reduc['size1'],arg_reduc['prec1'],arg_reduc['size2'],arg_reduc['prec2'],arg_reduc['degree_poly1'],arg_reduc['degree_poly2'],arg_reduc['sizeof_tables']) print "\n\033[1mGenerate the first logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table1'], arg_reduc['sizeof_table1']) inv_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']], storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False), tag = self.uniquify_name("inv_table_1")) log_table_1 = ML_Table(dimensions = [arg_reduc['length_table1']], storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False), tag = self.uniquify_name("log_table_1")) for i in xrange(0, arg_reduc['length_table1']-1): x1 = 1 + i/S2*arg_reduc['size1'] inv_x1 = ceil(S2**arg_reduc['prec1']/x1)*S2**arg_reduc['prec1'] log_x1 = floor(log(x1) * S2**(128-11))*S2**(11-128) inv_table_1[i] = inv_x1 #Constant(inv_x1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec1'], False)) log_table_1[i] = log_x1 #Constant(log_x1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False)) print "\n\033[1mGenerate the second logarithm table:\033[0m containing {} elements, using {} bytes of memory".format(arg_reduc['length_table2'], arg_reduc['sizeof_table2']) inv_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']], storage_precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False), tag = self.uniquify_name("inv_table_2")) log_table_2 = ML_Table(dimensions = [arg_reduc['length_table2']], storage_precision = ML_Custom_FixedPoint_Format(11, 128-11, False), tag = self.uniquify_name("log_table_2")) for i in xrange(0, arg_reduc['length_table2']-1): y1 = 1 + i/S2**arg_reduc['size2'] inv_y1 = ceil(S2**arg_reduc['prec2']/x1) * S2**arg_reduc['prec2'] log_y1 = floor(log(inv_y1) * S2**(128-11))*S2**(11-128) inv_table_2[i] = inv_y1 #Constant(inv_y1, precision = ML_Custom_FixedPoint_Format(1, arg_reduc['prec2'], False)) log_table_2[i] = log_y1 #Constant(log_y1, precision = ML_Custom_FixedPoint_Format(11, 128-11, False)) ### Evaluation Scheme ### print "\n\033[1mGenerate the evaluation scheme:\033[0m" input_var = self.implementation.add_input_variable("input_var", self.precision) ve = ExponentExtraction(input_var, tag = "x_exponent", debug = debugd) vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = ML_Custom_FixedPoint_Format(0,52,False), debug = debug_lftolx) #vx = MantissaExtraction(input_var, tag = "x_mantissa", precision = self.precision, debug = debug_lftolx) print "filtering and handling special cases" test_is_special_cases = LogicalNot(Test(input_var, specifier = Test.IsIEEENormalPositive, likely = True, debug = debugd, tag = "is_special_cases")) handling_special_cases = Statement( ConditionBlock( Test(input_var, specifier = Test.IsSignalingNaN, debug = True), ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)) ), ConditionBlock( Test(input_var, specifier = Test.IsNaN, debug = True), Return(input_var) )#, # TODO: add tests for x == 0 (raise DivideByZero, return -Inf), x < 0 (raise InvalidOperation, return qNaN) # all that remains is x is a subnormal positive #Statement( # ReferenceAssign(Dereference(ve), Subtraction(ve, Subtraction(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(12, precision = ve.get_precision())))), # ReferenceAssign(Dereference(vx), BitLogicLeftShift(vx, Addition(CountLeadingZeros(input_var, tag = 'subnormal_clz', precision = ve.get_precision()), Constant(1, precision = ve.get_precision())))) #) ) print "doing the argument reduction" v_dx = vx v_x1 = Conversion(v_dx, tag = 'x1', precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size1'],False), rounding_mode = ML_RoundTowardMinusInfty) v_index_x = TypeCast(v_x1, tag = 'index_x', precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_x1.get_precision().get_c_bit_size(), 0, False)) v_inv_x = TableLoad(inv_table_1, v_index_x, tag = 'inv_x') v_x = Addition(v_dx, 1, tag = 'x', precision = ML_Custom_FixedPoint_Format(1,52,False)) v_dy = Multiplication(v_x, v_inv_x, tag = 'dy', precision = ML_Custom_FixedPoint_Format(0,52+arg_reduc['prec1'],False)) v_y1 = Conversion(v_dy, tag = 'y1', precision = ML_Custom_FixedPoint_Format(0,arg_reduc['size2'],False), rounding_mode = ML_RoundTowardMinusInfty) v_index_y = TypeCast(v_y1, tag = 'index_y', precision = ML_Int32) #ML_Custom_FixedPoint_Format(v_y1.get_precision().get_c_bit_size(), 0, False)) v_inv_y = TableLoad(inv_table_2, v_index_y, tag = 'inv_y') v_y = Addition(v_dy, 1, tag = 'y', precision = ML_Custom_FixedPoint_Format(1,52+arg_reduc['prec2'],False)) # note that we limit the number of bits used to represent dz to 64. # we proved during the arg reduction that we can do that (sup(out_interval) < 2^(64-52-prec1-prec2)) v_dz = Multiplication(v_y, v_inv_y, tag = 'z', precision = ML_Custom_FixedPoint_Format(64-52-arg_reduc['prec1']-arg_reduc['prec2'],52+arg_reduc['prec1']+arg_reduc['prec2'],False)) # reduce the number of bits used to represent dz. we can do that print "doing the first polynomial evaluation" global_poly1_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, arg_reduc['degree_poly1']-1, [64] * (arg_reduc['degree_poly1']), arg_reduc['out_interval'], fixed, sollya.absolute) poly1_object = global_poly1_object.sub_poly(start_index = 1) print global_poly1_object print poly1_object poly1 = PolynomialSchemeEvaluator.generate_horner_scheme(poly1_object, v_dz, unified_precision = v_dz.get_precision()) return ConditionBlock(test_is_special_cases, handling_special_cases, Return(poly1)) #approx_interval = Interval(0, 27021597764222975*S2**-61) #poly_degree = 1+sup(guessdegree(log(1+x)/x, approx_interval, S2**-(self.precision.get_field_size()))) #global_poly_object = Polynomial.build_from_approximation(log(1+x)/x, poly_degree, [1] + [self.precision]*(poly_degree), approx_interval, sollya.absolute) #poly_object = global_poly_object.sub_poly(start_index = 1) #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) #_poly.set_attributes(tag = "poly", debug = debug_lftolx) """
def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_multi) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) tho_cond = _vx_mant > Constant(sollya.sqrt(2), precision=self.precision) tho = Select(tho_cond, Constant(1.0, precision=self.precision), Constant(0.0, precision=self.precision), precision=self.precision, tag="tho", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") int_format = self.precision.get_integer_format() # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=int_format), Constant(-2, precision=int_format), precision=int_format), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) _red_vx = arg_red_index * _vx_mant - 1.0 inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, poly_object.get_sollya_object()) corr_exp = Conversion(_vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) + tho corr_exp.set_attributes(tag="corr_exp", debug=debug_multi) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object( 1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object( 1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh
def numeric_emulate(self, input_value): return log(input_value)
def numeric_emulate(self, input_value): return sollya.log(input_value) / sollya.log(self.basis)
def generate_scalar_scheme(self, vx): Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") index_size = 5 comp_lo = (vx < 0) comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool) sign = Select(comp_lo, -1, 1, precision = self.precision) # as sinh is an odd function, we can simplify the input to its absolute # value once the sign has been extracted vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2)/2**index_size inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN) inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN) log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True) r_hi = vx - k_log2 r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag = "r", debug = debug_multi) if is_gappa_installed(): r_eval_error = self.get_eval_error(r_hi, variable_copy_map = { vx: Variable("vx", interval = Interval(0, 715), precision = self.precision), k: Variable("k", interval = Interval(0, 1024), precision = self.precision) }) Log.report(Log.Verbose, "r_eval_error: ", r_eval_error) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3 precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi) exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() - 8) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value)* 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN) neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # sinh(x) = 1/2 * (exp(x) - exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function) Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error)))) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision) poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi) poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision) poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision = int_precision)) # 2^(h-1) pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi) # 2^(-h-1) pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi) hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi) pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi) neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi) result = Addition( Subtraction( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi ) # ov_value ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater) # main scheme scheme = Statement( Return( Select( ov_flag, sign*FP_PlusInfty(self.precision), sign*result ))) return scheme
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() log_f = sollya.log(sollya.x) # /sollya.log(self.basis) # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) log2_hi_value = round( log_f(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), RN) log2_lo_value = round( log_f(2) - log2_hi_value, self.precision.sollya_object, RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) int_precision = self.precision.get_integer_format() vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debug_multi) #--------------------- # Approximation scheme #--------------------- # log10(x) = log10(m.2^e) = log10(m.2^(e-t+t)) # = log10(m.2^-t) + (e+t) log10(2) # t = (m > sqrt(2)) ? 1 : 0 is used to avoid catastrophic cancellation # when e = -1 and m ~ 2 # # # log10(m.2^-t) = log10(m.r/r.2^-t) = log10(m.r) + log10(2^-t/r) # = log10(m.r) - log10(r.2^t) # where r = rcp(m) an approximation of 1/m such that r.m ~ 1 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = inv_approx_table.index_size table_index_range = range(1, 2**table_index_size) log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table_tho = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 log_table_tho[0][0] = 0.0 log_table_tho[0][1] = 0.0 hi_size = self.precision.get_field_size() - ( self.precision.get_exponent_size() + 1) for i in table_index_range: #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 inv_value = inv_approx_table[i] value_high = round(log_f(inv_value), hi_size, sollya.RN) value_low = round( log_f(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low inv_value_tho = S2 * inv_approx_table[i] value_high_tho = round(log_f(inv_value_tho), hi_size, sollya.RN) value_low_tho = round( log_f(inv_value_tho) - value_high_tho, sollya_precision, sollya.RN) log_table_tho[i][0] = value_high_tho log_table_tho[i][1] = value_low_tho # determining log_table range high_index_function = lambda table, i: table[i][0] low_index_function = lambda table, i: table[i][1] table_high_interval = log_table.get_subset_interval( high_index_function, table_index_range) table_low_interval = log_table.get_subset_interval( low_index_function, table_index_range) def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_multi) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debug_multi) table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debug_multi) tho_cond = _vx_mant > Constant(sollya.sqrt(2), precision=self.precision) tho = Select(tho_cond, Constant(1.0, precision=self.precision), Constant(0.0, precision=self.precision), precision=self.precision, tag="tho", debug=debug_multi) rcp = ReciprocalSeed(_vx_mant, precision=self.precision, tag="rcp") r = Multiplication(rcp, _vx_mant, precision=self.precision, tag="r") int_format = self.precision.get_integer_format() # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(ReciprocalSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_multi, silent=True), precision=int_format), Constant(-2, precision=int_format), precision=int_format), precision=self.precision, tag="pre_arg_red_index", debug=debug_multi) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_multi) _red_vx = arg_red_index * _vx_mant - 1.0 inv_err = S2**-6 red_interval = Interval(1 - inv_err, 1 + inv_err) _red_vx.set_attributes(tag="_red_vx", debug=debug_multi, interval=red_interval) # return in case of standard (non-special) input _log_inv_lo = Select(tho_cond, TableLoad(log_table_tho, table_index, 1), TableLoad(log_table, table_index, 1), tag="log_inv_lo", debug=debug_multi) _log_inv_hi = Select(tho_cond, TableLoad(log_table_tho, table_index, 0), TableLoad(log_table, table_index, 0), tag="log_inv_hi", debug=debug_multi) Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + x) / x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_multi) Log.report(Log.Info, poly_object.get_sollya_object()) corr_exp = Conversion(_vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor, precision=self.precision) + tho corr_exp.set_attributes(tag="corr_exp", debug=debug_multi) # _poly approximates log10(1+r)/r # _poly * red_vx approximates log10(x) m0h, m0l = Mul211(_red_vx, _poly) m0h, m0l = Add212(_red_vx, m0h, m0l) m0h.set_attributes(tag="m0h", debug=debug_multi) m0l.set_attributes(tag="m0l") l0_h = corr_exp * log2_hi l0_l = corr_exp * log2_lo l0_h.set_attributes(tag="l0_h") l0_l.set_attributes(tag="l0_l") rh, rl = Add222(l0_h, l0_l, m0h, m0l) rh.set_attributes(tag="rh0", debug=debug_multi) rl.set_attributes(tag="rl0", debug=debug_multi) rh, rl = Add222(-_log_inv_hi, -_log_inv_lo, rh, rl) rh.set_attributes(tag="rh", debug=debug_multi) rl.set_attributes(tag="rl", debug=debug_multi) if sollya.log(self.basis) != 1.0: lbh = self.precision.round_sollya_object( 1 / sollya.log(self.basis)) lbl = self.precision.round_sollya_object( 1 / sollya.log(self.basis) - lbh) rh, rl = Mul222(rh, rl, lbh, lbl) return rh else: return rh result = compute_log(vx) result.set_attributes(tag="result", debug=debug_multi) if False: # building eval error map eval_error_map = { red_vx: Variable("red_vx", precision=self.precision, interval=red_vx.get_interval()), log_inv_hi: Variable("log_inv_hi", precision=self.precision, interval=table_high_interval), log_inv_lo: Variable("log_inv_lo", precision=self.precision, interval=table_low_interval), corr_exp: Variable("corr_exp_g", precision=self.precision, interval=self.precision.get_exponent_interval()), } # computing gappa error if is_gappa_installed(): poly_eval_error = self.get_eval_error(result, eval_error_map) Log.report(Log.Info, "poly_eval_error: ", poly_eval_error) neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debug_multi, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debug_multi, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debug_multi, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debug_multi, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debug_multi, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debug_multi, likely=False) # exp=-1 case Log.report(Log.Info, "managing exp=-1 case") #red_vx_2 = arg_red_index * vx_mant * 0.5 #approx_interval2 = Interval(0.5 - inv_err, 0.5 + inv_err) #poly_degree2 = sup(guessdegree(log(x), approx_interval2, S2**-(self.precision.get_field_size()+1))) + 1 #poly_object2 = Polynomial.build_from_approximation(log(sollya.x), poly_degree, [self.precision]*(poly_degree+1), approx_interval2, sollya.absolute) #print "poly_object2: ", poly_object2.get_sollya_object() #poly2 = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object2, red_vx_2, unified_precision = self.precision) #poly2.set_attributes(tag = "poly2", debug = debug_multi) #result2 = (poly2 - log_inv_hi - log_inv_lo) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal = compute_log(vx * S2100, exp_corr_factor=m100) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Return(result_subnormal)), Return(result)))) scheme = pre_scheme return scheme
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # testing special value inputs test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") # if input is a signaling NaN, raise an invalid exception and returns # a quiet NaN return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = self.precision.get_integer_format() # log2(vx) # r = vx_mant # e = vx_exp # vx reduced to r in [1, 2[ # log2(vx) = log2(r * 2^e) # = log2(r) + e # ## log2(r) is approximated by # log2(r) = log2(inv_seed(r) * r / inv_seed(r) # = log2(inv_seed(r) * r) - log2(inv_seed(r)) # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0] # # inv_seed(r) * r ~ 1 # we can easily tabulate -log2(inv_seed(r)) # # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) # value for index 0 is set to 0.0 log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 #print inv_approx_table[i][0], inv_value inv_value = inv_approx_table[i][0] value_high_bitsize = self.precision.get_field_size() - ( self.precision.get_exponent_size() + 1) value_high = round(log2(inv_value), value_high_bitsize, sollya.RN) value_low = round( log2(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) # The main table is indexed by the 7 most significant bits # of the mantissa table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debuglld) # argument reduction # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) _red_vx = FMA(arg_red_index, _vx_mant, -1.0) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-inv_approx_table.index_size red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() * 1.1))) + 1 sollya.settings.display = sollya.hexadecimal global_poly_object, approx_error = Polynomial.build_from_approximation_with_error( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute, error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai)) Log.report( Log.Info, "poly_degree={}, approx_error={}".format( poly_degree, approx_error)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) #poly_object = global_poly_object.sub_poly(start_index=0,offset=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) Log.report(Log.Verbose, "generating polynomial evaluation scheme") pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly = FMA(pre_poly, _red_vx, global_poly_object.get_cst_coeff(0, self.precision)) _poly.set_attributes(tag="poly", debug=debug_lftolx) Log.report( Log.Verbose, "sollya global_poly_object: {}".format( global_poly_object.get_sollya_object())) Log.report( Log.Verbose, "sollya poly_object: {}".format( poly_object.get_sollya_object())) corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision) exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx) result.set_attributes(tag="result", debug=debug_lftolx) # specific input value predicate neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="vx_snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="vx_inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # Specific specific for the case exp == -1 # log2(x) = log2(m) - 1 # # as m in [1, 2[, log2(m) in [0, 1[ # if r is close to 2, a catastrophic cancellation can occur # # r = seed(m) # log2(x) = log2(seed(m) * m / seed(m)) - 1 # = log2(seed(m) * m) - log2(seed(m)) - 1 # # for m really close to 2 => seed(m) = 0.5 # => log2(x) = log2(0.5 * m) # = result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo) result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) result_subnormal.set_attributes(tag="result_subnormal", debug=debug_lftolx) one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + x) / x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Statement(ClearException(), result_subnormal, Return(result_subnormal))), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result_exp_m1), Return(result)))))) scheme = Statement(result, pre_scheme) return scheme
def generate_scheme(self): """Produce an abstract scheme for the logarithm. This abstract scheme will be used by the code generation backend. """ if self.precision not in [ML_Binary32, ML_Binary64]: Log.report(Log.Error, "The demanded precision is not supported") vx = self.implementation.add_input_variable("x", self.precision) def default_bool_convert(optree, precision=None, **kw): return bool_convert(optree, precision, -1, 0, **kw) \ if isinstance(self.processor, VectorBackend) \ else bool_convert(optree, precision, 1, 0, **kw) precision = self.precision.sollya_object int_prec = self.precision.get_integer_format() Log.report(Log.Info, "int_prec is %s" % int_prec) uint_prec = self.precision.get_unsigned_integer_format() Log.report(Log.Info, "MDL constants") cgpe_scheme_idx = int(self.cgpe_index) table_index_size = int(self.tbl_index_size) # table_nb_elements = 2**(table_index_size) table_dimensions = [2*table_nb_elements] # two values are stored for each element field_size = Constant(self.precision.get_field_size(), precision = int_prec, tag = 'field_size') if self.log_radix == EXP_1: log2_hi = Constant( round(log(2), precision, sollya.RN), precision = self.precision, tag = 'log2_hi') log2_lo = Constant( round(log(2) - round(log(2), precision, sollya.RN), precision, sollya.RN), precision = self.precision, tag = 'log2_lo') elif self.log_radix == 10: log2_hi = Constant( round(log10(2), precision, sollya.RN), precision = self.precision, tag = 'log2_hi') log2_lo = Constant( round(log10(2) - round(log10(2), precision, sollya.RN), precision, sollya.RN), precision = self.precision, tag = 'log2_lo') # ... if log_radix == '2' then log2(2) == 1 # subnormal_mask aims at trapping positive subnormals except zero. # That's why we will subtract 1 to the integer bitstring of the input, and # then compare for Less (strict) the resulting integer bitstring to this # mask, e.g. 0x7fffff for binary32. if self.no_subnormal == False: subnormal_mask = Constant((1 << self.precision.get_field_size()) - 1, precision = int_prec, tag = 'subnormal_mask') fp_one = Constant(1.0, precision = self.precision, tag = 'fp_one') fp_one_as_uint = TypeCast(fp_one, precision = uint_prec, tag = 'fp_one_as_uint') int_zero = Constant(0, precision = int_prec, tag = 'int_zero') int_one = Constant(1, precision = int_prec, tag = 'int_one') table_mantissa_half_ulp = Constant( 1 << (self.precision.field_size - table_index_size - 1), precision = int_prec ) table_s_exp_index_mask = Constant( ~((table_mantissa_half_ulp.get_value() << 1) - 1), precision = uint_prec ) Log.report(Log.Info, "MDL table") # The table holds approximations of -log(2^tau * r_i) so we first compute # the index value for which tau changes from 1 to 0. cut = sqrt(2.) tau_index_limit = floor(table_nb_elements * (2./cut - 1)) sollya_logtbl = [ (-log1p(float(i) / table_nb_elements) + (0 if i <= tau_index_limit else log(2.))) / log(self.log_radix) for i in range(table_nb_elements) ] # ... init_logtbl_hi = [ round(sollya_logtbl[i], self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] init_logtbl_lo = [ round(sollya_logtbl[i] - init_logtbl_hi[i], self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] init_logtbl = [tmp[i] for i in range(len(init_logtbl_hi)) for tmp in [init_logtbl_hi, init_logtbl_lo]] log1p_table = ML_NewTable(dimensions = table_dimensions, storage_precision = self.precision, init_data = init_logtbl, tag = 'ml_log1p_table') # ... if self.no_rcp: sollya_rcptbl = [ (1/((1+float(i)/table_nb_elements)+2**(-1-int(self.tbl_index_size)))) for i in range(table_nb_elements) ] init_rcptbl = [ round(sollya_rcptbl[i], int(self.tbl_index_size)+1, # self.precision.get_mantissa_size(), sollya.RN) for i in range(table_nb_elements) ] rcp_table = ML_NewTable(dimensions = [table_nb_elements], storage_precision = self.precision, init_data = init_rcptbl, tag = 'ml_rcp_table') # ... Log.report(Log.Info, 'MDL unified subnormal handling') vx_as_int = TypeCast(vx, precision = int_prec, tag = 'vx_as_int') if self.no_subnormal == False: vx_as_uint = TypeCast(vx, precision = uint_prec, tag = 'vx_as_uint') # Avoid the 0.0 case by subtracting 1 from vx_as_int tmp = Comparison(vx_as_int - 1, subnormal_mask, specifier = Comparison.Less) is_subnormal = default_bool_convert( tmp, # Will catch negative values as well as NaNs with sign bit set precision = int_prec) is_subnormal.set_attributes(tag = "is_subnormal") if not(isinstance(self.processor, VectorBackend)): is_subnormal = Subtraction(Constant(0, precision = int_prec), is_subnormal, precision = int_prec) ################################################# # Vectorizable integer based subnormal handling # ################################################# # 1. lzcnt # custom lzcount-like for subnormal numbers using FPU (see draft article) Zi = BitLogicOr(vx_as_uint, fp_one_as_uint, precision = uint_prec, tag="Zi") Zf = Subtraction( TypeCast(Zi, precision = self.precision), fp_one, precision = self.precision, tag="Zf") # Zf exponent is -(nlz(x) - exponent_size). # 2. compute shift value # Vectorial comparison on x86+sse/avx is going to look like # '|0x00|0xff|0x00|0x00|' and that's why we use Negate. # But for scalar code generation, comparison will rather be either 0 or 1 # in C. Thus mask below won't be correct for a scalar implementation. # FIXME: Can we know the backend that will be called and choose in # consequence? Should we make something arch-agnostic instead? # n_value = BitLogicAnd( Addition( DirtyExponentExtraction(Zf, self.precision), Constant( self.precision.get_bias(), precision = int_prec), precision = int_prec), is_subnormal, precision = int_prec, tag = "n_value") alpha = Negation(n_value, tag="alpha") # # 3. shift left # renormalized_mantissa = BitLogicLeftShift(vx_as_int, value) normal_vx_as_int = BitLogicLeftShift(vx_as_int, alpha) # 4. set exponent to the right value # Compute the exponent to add : (p-1)-(value) + 1 = p-1-value # The final "+ 1" comes from the fact that once renormalized, the # floating-point datum has a biased exponent of 1 #tmp0 = Subtraction( # field_size, # value, # precision = int_prec, # tag="tmp0") # Set the value to 0 if the number is not subnormal #tmp1 = BitLogicAnd(tmp0, is_subnormal) #renormalized_exponent = BitLogicLeftShift( # tmp1, # field_size # ) else: # no_subnormal == True normal_vx_as_int = vx_as_int #normal_vx_as_int = renormalized_mantissa + renormalized_exponent normal_vx = TypeCast(normal_vx_as_int, precision = self.precision, tag = 'normal_vx') # alpha = BitLogicAnd(field_size, is_subnormal, tag = 'alpha') # XXX Extract the mantissa, see if this is supported in the x86 vector # backend or if it still uses the support_lib. vx_mantissa = MantissaExtraction(normal_vx, precision = self.precision) Log.report(Log.Info, "MDL scheme") if self.force_division == True: rcp_m = Division(fp_one, vx_mantissa, precision = self.precision) elif self.no_rcp == False: rcp_m = ReciprocalSeed(vx_mantissa, precision = self.precision) if not self.processor.is_supported_operation(rcp_m): if self.precision == ML_Binary64: # Try using a binary32 FastReciprocal binary32_m = Conversion(vx_mantissa, precision = ML_Binary32) rcp_m = ReciprocalSeed(binary32_m, precision = ML_Binary32) rcp_m = Conversion(rcp_m, precision = ML_Binary64) if not self.processor.is_supported_operation(rcp_m): # FIXME An approximation table could be used instead but for vector # implementations another GATHER would be required. # However this may well be better than a division... rcp_m = Division(fp_one, vx_mantissa, precision = self.precision) else: # ... use a look-up table rcp_shift = BitLogicLeftShift(normal_vx_as_int, self.precision.get_exponent_size() + 1) rcp_idx = BitLogicRightShift(rcp_shift, self.precision.get_exponent_size() + 1 + self.precision.get_field_size() - int(self.tbl_index_size)) rcp_m = TableLoad(rcp_table, rcp_idx, tag = 'rcp_idx', debug = debug_multi) # rcp_m.set_attributes(tag = 'rcp_m') # exponent is normally either 0 or -1, since m is in [1, 2). Possible # optimization? # exponent = ExponentExtraction(rcp_m, precision = self.precision, # tag = 'exponent') ri_round = TypeCast( Addition( TypeCast(rcp_m, precision = int_prec), table_mantissa_half_ulp, precision = int_prec ), precision = uint_prec ) ri_fast_rndn = BitLogicAnd( ri_round, table_s_exp_index_mask, tag = 'ri_fast_rndn', precision = uint_prec ) # u = m * ri - 1 ul = None if self.no_rcp == True: # ... u does not fit on a single word tmp_u, tmp_ul = Mul211(vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fma = (self.no_fma == False)) fp_minus_one = Constant(-1.0, precision = self.precision, tag = 'fp_minus_one') u, ul = Add212(fp_minus_one, tmp_u, tmp_ul) u.set_attributes(tag='uh') ul.set_attributes(tag='ul') elif self.no_fma == False: u = FusedMultiplyAdd( vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fp_one, specifier = FusedMultiplyAdd.Subtract, tag = 'u') else: # disable FMA # tmph + tmpl = m * ri, where tmph ~ 1 tmph, tmpl = Mul211(vx_mantissa, TypeCast(ri_fast_rndn, precision = self.precision), fma = False) # u_tmp = tmph - 1 ... exact due to Sterbenz u_tmp = Subtraction(tmph, fp_one, precision = self.precision) # u = u_tmp - tmpl ... exact since the result u is representable as a single word u = Addition(u_tmp, tmpl, precision = self.precision, tag = 'u') unneeded_bits = Constant( self.precision.field_size - table_index_size, precision=uint_prec, tag="unneeded_bits" ) assert self.precision.field_size - table_index_size >= 0 ri_bits = BitLogicRightShift( ri_fast_rndn, unneeded_bits, precision = uint_prec, tag = "ri_bits" ) # Retrieve mantissa's MSBs + first bit of exponent, for tau computation in case # exponent is 0 (i.e. biased 127, i.e. first bit of exponent is set.). # In this particular case, i = 0 but tau is 1 # table_index does not need to be as long as uint_prec might be, # try and keep it the size of size_t. size_t_prec = ML_UInt32 signed_size_t_prec = ML_Int32 table_index_mask = Constant( (1 << (table_index_size + 1)) - 1, precision = size_t_prec ) table_index = BitLogicAnd( Conversion(ri_bits, precision = size_t_prec), table_index_mask, tag = 'table_index', precision = size_t_prec ) # Compute tau using the tau_index_limit value. tmp = default_bool_convert( Comparison( TypeCast(table_index, precision = signed_size_t_prec), Constant(tau_index_limit, precision = signed_size_t_prec), specifier = Comparison.Greater if isinstance(self.processor, VectorBackend) else Comparison.LessOrEqual ), precision = signed_size_t_prec, tag="tmp" ) # A true tmp will typically be -1 for VectorBackends, but 1 for standard C. tau = Conversion( Addition(tmp, Constant(1, precision=signed_size_t_prec), precision = signed_size_t_prec, tag="pre_add") if isinstance(self.processor, VectorBackend) else tmp, precision=int_prec, tag="pre_tau" ) tau.set_attributes(tag = 'tau') # Update table_index: keep only table_index_size bits table_index_hi = BitLogicAnd( table_index, Constant((1 << table_index_size) - 1, precision = size_t_prec), precision = size_t_prec ) # table_index_hi = table_index_hi << 1 table_index_hi = BitLogicLeftShift( table_index_hi, Constant(1, precision = size_t_prec), precision = size_t_prec, tag = "table_index_hi" ) # table_index_lo = table_index_hi + 1 table_index_lo = Addition( table_index_hi, Constant(1, precision = size_t_prec), precision = size_t_prec, tag = "table_index_lo" ) tbl_hi = TableLoad(log1p_table, table_index_hi, tag = 'tbl_hi', debug = debug_multi) tbl_lo = TableLoad(log1p_table, table_index_lo, tag = 'tbl_lo', debug = debug_multi) # Compute exponent e + tau - alpha, but first subtract the bias. if self.no_subnormal == False: tmp_eptau = Addition( Addition( BitLogicRightShift( normal_vx_as_int, field_size, tag = 'exponent', interval = self.precision.get_exponent_interval(), precision = int_prec), Constant( self.precision.get_bias(), precision = int_prec)), tau, tag = 'tmp_eptau', precision = int_prec) exponent = Subtraction(tmp_eptau, alpha, precision = int_prec) else: exponent = Addition( Addition( BitLogicRightShift( normal_vx_as_int, field_size, tag = 'exponent', interval = self.precision.get_exponent_interval(), precision = int_prec), Constant( self.precision.get_bias(), precision = int_prec)), tau, tag = 'tmp_eptau', precision = int_prec) # fp_exponent = Conversion(exponent, precision = self.precision, tag = 'fp_exponent') Log.report(Log.Info, 'MDL polynomial approximation') if self.log_radix == EXP_1: sollya_function = log(1 + sollya.x) elif self.log_radix == 2: sollya_function = log2(1 + sollya.x) elif self.log_radix == 10: sollya_function = log10(1 + sollya.x) # ... if self.force_division == True: # rcp accuracy is 2^(-p) boundrcp = 2**(-self.precision.get_precision()) else: boundrcp = 1.5 * 2**(-12) # ... see Intel intrinsics guide if self.precision in [ML_Binary64]: if not self.processor.is_supported_operation(rcp_m): boundrcp = (1+boundrcp)*(1+2**(-24)) - 1 else: boundrcp = 2**(-14) # ... see Intel intrinsics guide arg_red_mag = boundrcp + 2**(-table_index_size-1) + boundrcp * 2**(-table_index_size-1) if self.no_rcp == False: approx_interval = Interval(-arg_red_mag, arg_red_mag) else: approx_interval = Interval(-2**(-int(self.tbl_index_size)+1),2**(-int(self.tbl_index_size)+1)) max_eps = 2**-(2*(self.precision.get_field_size())) Log.report(Log.Info, "max acceptable error for polynomial = {}".format(float.hex(max_eps))) poly_degree = sup( guessdegree( sollya_function, approx_interval, max_eps, ) ) Log.report(Log.Info, "poly degree is ", poly_degree) if self.log_radix == EXP_1: poly_object = Polynomial.build_from_approximation( sollya_function, range(2, int(poly_degree) + 1), # Force 1st 2 coeffs to 0 and 1, resp. # Emulate double-self.precision coefficient formats [self.precision.get_mantissa_size()*2 + 1]*(poly_degree - 1), approx_interval, sollya.absolute, 0 + sollya._x_) # Force the first 2 coefficients to 0 and 1, resp. else: # ... == '2' or '10' poly_object = Polynomial.build_from_approximation( sollya_function, range(1, int(poly_degree) + 1), # Force 1st coeff to 0 # Emulate double-self.precision coefficient formats [self.precision.get_mantissa_size()*2 + 1]*(poly_degree), approx_interval, sollya.absolute, 0) # Force the first coefficients to 0 Log.report(Log.Info, str(poly_object)) constant_precision = ML_SingleSingle if self.precision == ML_Binary32 \ else ML_DoubleDouble if self.precision == ML_Binary64 \ else None if is_cgpe_available(): log1pu_poly = PolynomialSchemeEvaluator.generate_cgpe_scheme( poly_object, u, unified_precision = self.precision, constant_precision = constant_precision, scheme_id = cgpe_scheme_idx ) else: Log.report(Log.Warning, "CGPE not available, falling back to std poly evaluator") log1pu_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, u, unified_precision = self.precision, constant_precision = constant_precision ) # XXX Dirty implementation of double-(self.precision) poly def dirty_poly_node_conversion(node, variable_h, variable_l, use_fma): return dirty_multi_node_expand( node, self.precision, mem_map={variable_h: (variable_h, variable_l)}, fma=use_fma) log1pu_poly_hi, log1pu_poly_lo = dirty_poly_node_conversion(log1pu_poly, u, ul, use_fma=(self.no_fma == False)) log1pu_poly_hi.set_attributes(tag = 'log1pu_poly_hi') log1pu_poly_lo.set_attributes(tag = 'log1pu_poly_lo') # Compute log(2) * (e + tau - alpha) if self.log_radix != 2: # 'e' or '10' log2e_hi, log2e_lo = Mul212(fp_exponent, log2_hi, log2_lo, fma = (self.no_fma == False)) # Add log1p(u) if self.log_radix != 2: # 'e' or '10' tmp_res_hi, tmp_res_lo = Add222(log2e_hi, log2e_lo, log1pu_poly_hi, log1pu_poly_lo) else: tmp_res_hi, tmp_res_lo = Add212(fp_exponent, log1pu_poly_hi, log1pu_poly_lo) # Add -log(2^(tau)/m) approximation retrieved by two table lookups logx_hi = Add122(tmp_res_hi, tmp_res_lo, tbl_hi, tbl_lo)[0] logx_hi.set_attributes(tag = 'logx_hi') scheme = Return(logx_hi, precision = self.precision) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.precision) sollya_precision = self.get_input_precision().sollya_object # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) log2_hi_value = round(log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round(log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision = self.precision) log2_lo = Constant(log2_lo_value, precision = self.precision) vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd) int_precision = self.precision.get_integer_format() # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision = self.precision) dummy_div_seed = ReciprocalSeed(dummy_var, precision = self.precision) inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_NewTable(dimensions = [2**table_index_size, 2], storage_precision = self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = inv_approx_table[i] # (1.0 + (inv_approx_table[i] / S2**9) ) * S2**-1 value_high = round(log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round(log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low vx_exp = ExponentExtraction(vx, tag = "vx_exp", debug = debugd) # case close to 0: ctz ctz_exp_limit = -7 ctz_cond = vx_exp < ctz_exp_limit ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit) ctz_poly_degree = sup(guessdegree(log1p(sollya.x)/sollya.x, ctz_interval, S2**-(self.precision.get_field_size()+1))) + 1 ctz_poly_object = Polynomial.build_from_approximation(log1p(sollya.x)/sollya.x, ctz_poly_degree, [self.precision]*(ctz_poly_degree+1), ctz_interval, sollya.absolute) Log.report(Log.Info, "generating polynomial evaluation scheme") ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme(ctz_poly_object, vx, unified_precision = self.precision) ctz_poly.set_attributes(tag = "ctz_poly", debug = debug_lftolx) ctz_result = vx * ctz_poly neg_input = Comparison(vx, -1, likely = False, specifier = Comparison.Less, debug = debugd, tag = "neg_input") vx_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debugd, tag = "nan_or_inf") vx_snan = Test(vx, specifier = Test.IsSignalingNaN, likely = False, debug = debugd, tag = "snan") vx_inf = Test(vx, specifier = Test.IsInfty, likely = False, debug = debugd, tag = "inf") vx_subnormal = Test(vx, specifier = Test.IsSubnormal, likely = False, debug = debugd, tag = "vx_subnormal") log_function_code = CodeFunction("new_log", [Variable("x", precision = ML_Binary64)], output_format = ML_Binary64) log_call_generator = FunctionOperator(log_function_code.get_name(), arity = 1, output_precision = ML_Binary64, declare_prototype = log_function_code) newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64,), ML_Binary64, log_call_generator) # case away from 0.0 pre_vxp1 = vx + 1.0 pre_vxp1.set_attributes(tag = "pre_vxp1", debug = debug_lftolx) pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag = "pre_vxp1_exp", debug = debugd) cm500 = Constant(-500, precision = ML_Int32) c0 = Constant(0, precision = ML_Int32) cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size()-2) scaling_factor_exp = Select(cond_scaling, cm500, c0) scaling_factor = ExponentInsertion(scaling_factor_exp, precision = self.precision, tag = "scaling_factor") vxp1 = pre_vxp1 * scaling_factor vxp1.set_attributes(tag = "vxp1", debug = debug_lftolx) vxp1_exp = ExponentExtraction(vxp1, tag = "vxp1_exp", debug = debugd) vxp1_inv = ReciprocalSeed(vxp1, precision = self.precision, tag = "vxp1_inv", debug = debug_lftolx, silent = True) vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision = self.precision, tag = "vxp1_dirty_inv", debug = debug_lftolx) table_index = BitLogicAnd(BitLogicRightShift(TypeCast(vxp1, precision = int_precision, debug = debuglx), self.precision.get_field_size() - 7, debug = debuglx), 0x7f, tag = "table_index", debug = debuglx) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision = ML_UInt64), Constant(-2, precision = ML_UInt64), precision = ML_UInt64), precision = self.precision, tag = "pre_arg_red_index", debug = debug_lftolx) arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag = "arg_red_index", debug = debug_lftolx) red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index) #red_vxp1 = arg_red_index * vxp1 - 1.0 red_vxp1.set_attributes(tag = "red_vxp1", debug = debug_lftolx) log_inv_lo = TableLoad(log_table, table_index, 1, tag = "log_inv_lo", debug = debug_lftolx) log_inv_hi = TableLoad(log_table, table_index, 0, tag = "log_inv_hi", debug = debug_lftolx) inv_err = S2**-6 # TODO: link to target DivisionSeed precision Log.report(Log.Info, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup(guessdegree(log(1+sollya.x)/sollya.x, approx_interval, S2**-(self.precision.get_field_size()+1))) + 1 global_poly_object = Polynomial.build_from_approximation(log(1+sollya.x)/sollya.x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index = 1) Log.report(Log.Info, "generating polynomial evaluation scheme") _poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, red_vxp1, unified_precision = self.precision) _poly.set_attributes(tag = "poly", debug = debug_lftolx) Log.report(Log.Info, global_poly_object.get_sollya_object()) vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag = "vxp1_inv_exp", debug = debugd) corr_exp = Conversion(-vxp1_exp + scaling_factor_exp, precision = self.precision)# vxp1_inv_exp #poly = (red_vxp1) * (1 + _poly) #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True) pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo)) pre_result.set_attributes(tag = "pre_result", debug = debug_lftolx) exact_log2_hi_exp = - corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag = "exact_log2_hi_exp", debug = debug_lftolx, prevent_optimization = True) #std_result = exact_log2_hi_exp + pre_result exact_log2_lo_exp = - corr_exp * log2_lo exact_log2_lo_exp.set_attributes(tag = "exact_log2_lo_exp", debug = debug_lftolx)#, prevent_optimization = True) init = exact_log2_lo_exp - log_inv_lo init.set_attributes(tag = "init", debug = debug_lftolx, prevent_optimization = True) fma0 = (red_vxp1 * _poly + init) # - log_inv_lo) fma0.set_attributes(tag = "fma0", debug = debug_lftolx) step0 = fma0 step0.set_attributes(tag = "step0", debug = debug_lftolx) #, prevent_optimization = True) step1 = step0 + red_vxp1 step1.set_attributes(tag = "step1", debug = debug_lftolx, prevent_optimization = True) step2 = -log_inv_hi + step1 step2.set_attributes(tag = "step2", debug = debug_lftolx, prevent_optimization = True) std_result = exact_log2_hi_exp + step2 std_result.set_attributes(tag = "std_result", debug = debug_lftolx, prevent_optimization = True) # main scheme Log.report(Log.Info, "MDL scheme") pre_scheme = ConditionBlock(neg_input, Statement( ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision)) ), ConditionBlock(vx_nan_or_inf, ConditionBlock(vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement( ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid) ), Return(FP_QNaN(self.precision)) ) ), ConditionBlock(vx_subnormal, Return(vx), ConditionBlock(ctz_cond, Statement( Return(ctz_result), ), Statement( Return(std_result) ) ) ) ) ) scheme = pre_scheme return scheme