def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " r rnd64= x;", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def get_value_str(self, value): if value is Gappa_Unknown: return "?" elif isinstance(value, sollya.SollyaObject) and value.is_range(): return "[%s, %s]" % (sollya.inf(value), sollya.sup(value)) else: return str(value)
def generate_approx_poly_near_zero(self, function, high_bound, error_bound, variable): """ Generate polynomial approximation scheme """ error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai) # Some issues encountered when 0 is one of the interval bound # so we use a symetric interval around it approx_interval = Interval(2**-100, high_bound) local_function = function / sollya.x degree = sollya.sup( sollya.guessdegree(local_function, approx_interval, error_bound)) degree_list = range(0, int(degree) + 4, 2) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function / sollya.x, degree_list, [1] + [self.precision] * (len(degree_list) - 1), approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "approximation poly: {}\n with error {}".format( poly_object, approx_error)) poly_scheme = Multiplication( variable, PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, variable, self.precision)) return poly_scheme, approx_error
def sollya_gamma_fct(x, diff_order, prec): """ wrapper to use bigfloat implementation of exponential rather than sollya's implementation directly. This wrapper implements sollya's function API. :param x: numerical input value (may be an Interval) :param diff_order: differential order :param prec: numerical precision expected (min) """ fct = None if diff_order == 0: fct = sollya_gamma elif diff_order == 1: fct = sollya_gamma_d0 elif diff_order == 2: fct = sollya_gamma_d1 else: raise NotImplementedError with bigfloat.precision(prec): if x.is_range(): lo = sollya.inf(x) hi = sollya.sup(x) return sollya.Interval(fct(lo), fct(hi)) else: return fct(x)
def generate_test_case(self, input_signals, io_map, index, test_range=Interval(-1.0, 1.0)): """ generic test case generation: generate a random input with index @p index Args: index (int): integer index of the test case Returns: dict: mapping (input tag -> numeric value) """ # extracting test interval boundaries low_input = sollya.inf(test_range) high_input = sollya.sup(test_range) input_values = {} for input_tag in input_signals: input_signal = io_map[input_tag] # FIXME: correct value generation depending on signal precision input_precision = input_signal.get_precision().get_base_format() if isinstance(input_precision, ML_FP_Format): input_value = generate_random_fp_value(input_precision, low_input, high_input) elif isinstance(input_precision, ML_Fixed_Format): # TODO: does not depend on low and high range bounds input_value = generate_random_fixed_value(input_precision) else: input_value = random.randrange( 2**input_precision.get_bit_size()) # registering input value input_values[input_tag] = input_value return input_values
def sup(obj): """ generic getter for interval superior bound """ if isinstance(obj, SollyaObject) and obj.is_range(): return sollya.sup(obj) elif isinstance(obj, (MetaInterval, MetaIntervalList)): return obj.sup else: raise NotImplementedError
def get_integer_format(backend, optree): """ return integer format to use for optree """ int_range = optree.get_interval() if int_range == None: return backend.default_integer_format elif inf(int_range) < 0: # signed if sup(int_range) > 2**31 - 1 or inf(int_range) < -2**31: return ML_Int64 else: return ML_Int32 else: # unsigned if sup(int_range) >= 2**32 - 1: return ML_UInt64 else: return ML_UInt32
def test_interval_out_of_bound_risk(x_range, y_range): """ Try to determine from x and y's interval if there is a risk of underflow or overflow """ div_range = abs(x_range / y_range) underflow_risk = sollya.inf(div_range) < S2**( self.precision.get_emin_normal() + 2) overflow_risk = sollya.sup(div_range) > S2**( self.precision.get_emax() - 2) return underflow_risk or overflow_risk
def is_simplifiable_to_cst(node): """ node can be simplified to a constant """ node_interval = node.get_interval() if node_interval is None or isinstance(node, Constant): return False elif isinstance(node_interval, SollyaObject) and node_interval.is_range(): return sollya.inf(node_interval) == sollya.sup(node_interval) elif isinstance(node_interval, (MetaInterval, MetaIntervalList)): return not node_interval.is_empty and (node_interval.inf == node_interval.sup) else: return False
def split_domain(starting_domain, slivers): in_domains = [starting_domain] # abs out_domains = list() for I in in_domains: if sollya.inf(I) < 0 and sollya.sup(I) > 0: out_domains.append(sollya.Interval(sollya.inf(I), 0)) out_domains.append(sollya.Interval(0, sollya.sup(I))) else: out_domains.append(I) in_domains = out_domains # k out_domains = list() while len(in_domains) > 0: I = in_domains.pop() #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I)))) unround_mult = I * n_invpi mult_low = sollya.floor(sollya.inf(unround_mult)) mult_high = sollya.floor(sollya.sup(unround_mult)) if mult_low == mult_high or (mult_low == -1 and mult_high == 0): #print(" accepted") out_domains.append(I) continue if sollya.sup(I) <= 0: divider_low = (mult_low + 1) * n_pi divider_high = divider_low - divider_low * 2**-53 else: divider_high = (mult_low + 1) * n_pi divider_low = divider_high - divider_high * 2**-53 lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(lower_part) in_domains.append(upper_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x))) in_domains = [ d for d in in_domains if sollya.inf(d) != sollya.sup(d) ] return in_domains
def get_value_str(self, value): if value is Gappa_Unknown: return "?" elif isinstance(value, MetaInterval): return self.get_value_str(value.interval) elif isinstance(value, MetaIntervalList): # MetaIntervalList should have been catched early and # should have generated a disjonction of cases raise NotImplementedError elif isinstance(value, sollya.SollyaObject) and value.is_range(): return "[%s, %s]" % (sollya.inf(value), sollya.sup(value)) else: return str(value)
def get_add_error_budget(lhs, rhs, eps_target): """ How accurate should be the addition of lhs + rhs to ensure that the overall global relative error is limited to eps_target """ # lhs_eps = lhs.epsilon # rhs_eps = rhs.epsilon # real result = (lhs (1 + lhs_eps) + rhs (1 + rhs_eps)) (1 + add_eps) # real result = (lhs + rhs + lhs . lhs_eps + rhs . rhs_eps) (1 + add_eps) # = exact result (1 + (lhs . lhs_eps + rhs . rhs_eps) / exact_result) (1 + add_eps) # eps_in = (lhs . lhs_eps + rhs . rhs_eps) / exact_result # real result = exact resul * (1 + eps_in + add_eps + eps_in * add_eps) # # objective (eps_in + add_eps + eps_in * add_eps) <= eps_target # |eps_in| + |add_eps| + |eps_in| * |add_eps| <= eps_target # |add_eps| (1 + |eps_in|) <= |eps_target| - |eps_in| # |add_eps| <= (|eps_target| - |eps_in|) / (1 + |eps_in|) # assuming |eps_target| > |eps_in| eps_in = (sup(abs(lhs.internal)) * lhs.epsilon + sup(abs(rhs.interval)) * rhs.epsilon) / inf(abs(lhs.interval + rhs.interval)) assert eps_in > 0 assert eps_in >= eps_target add_eps_bound = (eps_target - eps_in) / (1 + eps_in) return add_eps_bound
def evaluate_argument_reduction(self, in_interval, in_prec, inv_size, inv_prec): one = Constant(1, precision = ML_Exact, tag = "one") dx = Variable("dx", precision = ML_Custom_FixedPoint_Format(0, in_prec, False), interval = in_interval) # do the argument reduction x = Addition(dx, one, tag = "x", precision = ML_Exact) x1 = Conversion(x, tag = "x1", precision = ML_Custom_FixedPoint_Format(0, inv_size, False), rounding_mode = ML_RoundTowardMinusInfty) s = Multiplication(dx, Constant(S2**inv_size, precision = ML_Exact), precision = ML_Exact, tag = "interval_index_table") inv_x1 = Division(one, x1, tag = "ix1", precision = ML_Exact) inv_x = Conversion(inv_x1, tag = "ix", precision = ML_Custom_FixedPoint_Format(1, inv_prec, False), rounding_mode = ML_RoundTowardPlusInfty) y = Multiplication(x, inv_x, tag = "y", precision = ML_Exact) dy = Subtraction(y, one, tag = "dy", precision = ML_Exact) # add the necessary goals and hints dx_gappa = Variable("dx_gappa", interval = dx.get_interval(), precision = dx.get_precision()) swap_map = {dx: dx_gappa} # goal: dz (result of the argument reduction) gappa_code = self.gappa_engine.get_interval_code_no_copy(dy.copy(swap_map), bound_list = [swap_map[dx]]) #self.gappa_engine.add_goal(gappa_code, s.copy(swap_map)) # range of index of table # hints. are the ones with isAppox=True really necessary ? self.gappa_engine.add_hint(gappa_code, x.copy(swap_map), x1.copy(swap_map), isApprox = True) self.gappa_engine.add_hint(gappa_code, inv_x1.copy(swap_map), inv_x.copy(swap_map), isApprox = True) self.gappa_engine.add_hint(gappa_code, Multiplication(x1, inv_x1, precision = ML_Exact).copy(swap_map), one, Comparison(swap_map[inv_x1], Constant(0, precision = ML_Exact), specifier = Comparison.NotEqual, precision = ML_Bool)) # execute and parse the result result = execute_gappa_script_extract(gappa_code.get(self.gappa_engine)) out_interval = result['goal'] length_table = 1 + floor(sup(in_interval) * S2**inv_size).getConstantAsInt() sizeof_table = length_table * (16 + ML_Custom_FixedPoint_Format(1, inv_prec, False).get_c_bit_size()/8) return { 'out_interval': out_interval, 'length_table': length_table, 'sizeof_table': sizeof_table, }
def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [float(sollya.inf(domain)), float(sollya.sup(domain)),], "epsilon": float(epsilon), "operation": "log" } return d
def piecewise_approximation_degree_generator(function, bound_low=-1.0, bound_high=1.0, num_intervals=16, max_degree=2, error_threshold=S2**-24): """ """ interval_size = (bound_high - bound_low) / num_intervals for i in range(num_intervals): subint_low = bound_low + i * interval_size subint_high = bound_low + (i + 1) * interval_size local_function = function(sollya.x + subint_low) local_interval = Interval(-interval_size, interval_size) local_degree = sollya.guessdegree(local_function, local_interval, error_threshold) yield int(sollya.sup(local_degree))
def get_precision_rng(precision, value_range=None): if value_range is None: # default full-range value generation base_format = precision.get_base_format() if isinstance(base_format, ML_FP_MultiElementFormat): return MPFPRandomGen(precision) elif isinstance(base_format, ML_FP_Format): return FPRandomGen(precision, include_snan=False) elif isinstance(base_format, ML_Fixed_Format): return FixedPointRandomGen(precision) else: Log.report(Log.Error, "unsupported format {}/{} in get_precision_rng", precision, base_format) else: low_bound = sollya.inf(value_range) high_bound = sollya.sup(value_range) return get_precision_rng_with_defined_range(precision, low_bound, high_bound)
def split_domain(starting_domain, slivers): in_domains = [starting_domain] out_domains = list() while len(in_domains) > 0: I = in_domains.pop() unround_e = sollya.log2(I) e_low = sollya.floor(sollya.inf(unround_e)) e_high = sollya.floor(sollya.sup(unround_e)) #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high))) if e_low == e_high: #print(" accepted") out_domains.append(I) continue e_range = sollya.Interval(e_low, e_low+1) I_range = 2**e_range for _ in range(100): mid = sollya.mid(I_range) e = sollya.floor(sollya.log2(mid)) if e == e_low: I_range = sollya.Interval(mid, sollya.sup(I_range)) else: I_range = sollya.Interval(sollya.inf(I_range), mid) divider_high = sollya.sup(I_range) divider_low = sollya.inf(I_range) lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(upper_part) in_domains.append(lower_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x))) in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)] return in_domains
def findMaxIssue(res): """ Find the issue with the maximum error Parameters: - res: (sollya object) result from the checkModulusFilterInSpecification function Returns the maximum value (0 if not available) """ maxError = 0 for b in dict(res)["results"]: # for every band okay = dict(b)["okay"] if not okay: for i in dict(b)["issue"]: # for every issues H = dict(i)["H"] betaInf = dict(dict(i)["specification"])["betaInf"] betaSup = dict(dict(i)["specification"])["betaSup"] if sollya.inf(H) > betaSup: maxError = sollya.max(maxError, sollya.sup(H) - betaSup) else: maxError = sollya.max(maxError, betaSup - sollya.inf(H)) return maxError
def __init__(self, precision=ML_Binary32, abs_accuracy=S2**-24, libm_compliant=True, debug_flag=False, fuse_fma=True, fast_path_extract=True, target=GenericProcessor(), output_file="log1pf.c", function_name="log1pf"): # declaring CodeFunction and retrieving input variable self.function_name = function_name self.precision = precision self.processor = target func_implementation = CodeFunction(self.function_name, output_format=self.precision) vx = func_implementation.add_input_variable("x", self.precision) sollya_precision = self.precision.sollya_object # debug utilities debugf = ML_Debug(display_format="%f") debuglf = ML_Debug(display_format="%lf") debugx = ML_Debug(display_format="%x") debuglx = ML_Debug(display_format="%\"PRIx64\"", ) debugd = ML_Debug(display_format="%d", pre_process=lambda v: "(int) %s" % v) debugld = ML_Debug(display_format="%ld") #debug_lftolx = ML_Debug(display_format = "%\"PRIx64\"", pre_process = lambda v: "double_to_64b_encoding(%s)" % v) debug_lftolx = ML_Debug( display_format="%\"PRIx64\" ev=%x", pre_process=lambda v: "double_to_64b_encoding(%s), __k1_fpu_get_exceptions()" % v) debug_ddtolx = ML_Debug( display_format="%\"PRIx64\" %\"PRIx64\"", pre_process=lambda v: "double_to_64b_encoding(%s.hi), double_to_64b_encoding(%s.lo)" % (v, v)) debug_dd = ML_Debug(display_format="{.hi=%lf, .lo=%lf}", pre_process=lambda v: "%s.hi, %s.lo" % (v, v)) # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) log2_hi_value = round( log(2), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) log2_lo_value = round( log(2) - log2_hi_value, self.precision.sollya_object, sollya.RN) log2_hi = Constant(log2_hi_value, precision=self.precision) log2_lo = Constant(log2_lo_value, precision=self.precision) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = ML_Int64 if self.precision is ML_Binary64 else ML_Int32 # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_Table(dimensions=[2**table_index_size, 2], storage_precision=self.precision) log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in xrange(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 inv_value = (1.0 + (inv_approx_table[i][0] / S2**9)) * S2**-1 value_high = round( log(inv_value), self.precision.get_field_size() - (self.precision.get_exponent_size() + 1), sollya.RN) value_low = round( log(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) # case close to 0: ctz ctz_exp_limit = -7 ctz_cond = vx_exp < ctz_exp_limit ctz_interval = Interval(-S2**ctz_exp_limit, S2**ctz_exp_limit) ctz_poly_degree = sup( guessdegree( log1p(sollya.x) / sollya.x, ctz_interval, S2** -(self.precision.get_field_size() + 1))) + 1 ctz_poly_object = Polynomial.build_from_approximation( log1p(sollya.x) / sollya.x, ctz_poly_degree, [self.precision] * (ctz_poly_degree + 1), ctz_interval, sollya.absolute) print "generating polynomial evaluation scheme" ctz_poly = PolynomialSchemeEvaluator.generate_horner_scheme( ctz_poly_object, vx, unified_precision=self.precision) ctz_poly.set_attributes(tag="ctz_poly", debug=debug_lftolx) ctz_result = vx * ctz_poly neg_input = Comparison(vx, -1, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") log_function_code = CodeFunction( "new_log", [Variable("x", precision=ML_Binary64)], output_format=ML_Binary64) log_call_generator = FunctionOperator( log_function_code.get_name(), arity=1, output_precision=ML_Binary64, declare_prototype=log_function_code) newlog_function = FunctionObject(log_function_code.get_name(), (ML_Binary64, ), ML_Binary64, log_call_generator) # case away from 0.0 pre_vxp1 = vx + 1.0 pre_vxp1.set_attributes(tag="pre_vxp1", debug=debug_lftolx) pre_vxp1_exp = ExponentExtraction(pre_vxp1, tag="pre_vxp1_exp", debug=debugd) cm500 = Constant(-500, precision=ML_Int32) c0 = Constant(0, precision=ML_Int32) cond_scaling = pre_vxp1_exp > 2**(self.precision.get_exponent_size() - 2) scaling_factor_exp = Select(cond_scaling, cm500, c0) scaling_factor = ExponentInsertion(scaling_factor_exp, precision=self.precision, tag="scaling_factor") vxp1 = pre_vxp1 * scaling_factor vxp1.set_attributes(tag="vxp1", debug=debug_lftolx) vxp1_exp = ExponentExtraction(vxp1, tag="vxp1_exp", debug=debugd) vxp1_inv = DivisionSeed(vxp1, precision=self.precision, tag="vxp1_inv", debug=debug_lftolx, silent=True) vxp1_dirty_inv = ExponentInsertion(-vxp1_exp, precision=self.precision, tag="vxp1_dirty_inv", debug=debug_lftolx) table_index = BitLogicAnd(BitLogicRightShift( TypeCast(vxp1, precision=int_precision, debug=debuglx), self.precision.get_field_size() - 7, debug=debuglx), 0x7f, tag="table_index", debug=debuglx) # argument reduction # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd(TypeCast(vxp1_inv, precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), vxp1_dirty_inv, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) red_vxp1 = Select(cond_scaling, arg_red_index * vxp1 - 1.0, (arg_red_index * vx - 1.0) + arg_red_index) #red_vxp1 = arg_red_index * vxp1 - 1.0 red_vxp1.set_attributes(tag="red_vxp1", debug=debug_lftolx) log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) inv_err = S2**-6 # TODO: link to target DivisionSeed precision print "building mathematical polynomial" approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() + 1))) + 1 global_poly_object = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute) poly_object = global_poly_object.sub_poly(start_index=1) print "generating polynomial evaluation scheme" _poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, red_vxp1, unified_precision=self.precision) _poly.set_attributes(tag="poly", debug=debug_lftolx) print global_poly_object.get_sollya_object() vxp1_inv_exp = ExponentExtraction(vxp1_inv, tag="vxp1_inv_exp", debug=debugd) corr_exp = -vxp1_exp + scaling_factor_exp # vxp1_inv_exp #poly = (red_vxp1) * (1 + _poly) #poly.set_attributes(tag = "poly", debug = debug_lftolx, prevent_optimization = True) pre_result = -log_inv_hi + (red_vxp1 + red_vxp1 * _poly + (-corr_exp * log2_lo - log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = -corr_exp * log2_hi exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_exp", debug=debug_lftolx, prevent_optimization=True) #std_result = exact_log2_hi_exp + pre_result exact_log2_lo_exp = -corr_exp * log2_lo exact_log2_lo_exp.set_attributes( tag="exact_log2_lo_exp", debug=debug_lftolx) #, prevent_optimization = True) init = exact_log2_lo_exp - log_inv_lo init.set_attributes(tag="init", debug=debug_lftolx, prevent_optimization=True) fma0 = (red_vxp1 * _poly + init) # - log_inv_lo) fma0.set_attributes(tag="fma0", debug=debug_lftolx) step0 = fma0 step0.set_attributes( tag="step0", debug=debug_lftolx) #, prevent_optimization = True) step1 = step0 + red_vxp1 step1.set_attributes(tag="step1", debug=debug_lftolx, prevent_optimization=True) step2 = -log_inv_hi + step1 step2.set_attributes(tag="step2", debug=debug_lftolx, prevent_optimization=True) std_result = exact_log2_hi_exp + step2 std_result.set_attributes(tag="std_result", debug=debug_lftolx, prevent_optimization=True) # main scheme print "MDL scheme" pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, Return(vx), ConditionBlock(ctz_cond, Statement(Return(ctz_result), ), Statement(Return(std_result)))))) scheme = pre_scheme #print scheme.get_str(depth = None, display_precision = True) opt_eng = OptimizationEngine(self.processor) # fusing FMA print "MDL fusing FMA" scheme = opt_eng.fuse_multiply_add(scheme, silence=True) print "MDL abstract scheme" opt_eng.instantiate_abstract_precision(scheme, None) #print scheme.get_str(depth = None, display_precision = True) print "MDL instantiated scheme" opt_eng.instantiate_precision(scheme, default_precision=ML_Binary32) print "subexpression sharing" opt_eng.subexpression_sharing(scheme) print "silencing operation" opt_eng.silence_fp_operations(scheme) # registering scheme as function implementation func_implementation.set_scheme(scheme) # check processor support opt_eng.check_processor_support(scheme) # factorizing fast path opt_eng.factorize_fast_path(scheme) #print scheme.get_str(depth = None, display_precision = True) cg = CCodeGenerator(self.processor, declare_cst=False, disable_debug=not debug_flag, libm_compliant=libm_compliant) self.result = func_implementation.get_definition(cg, C_Code, static_cst=True) self.result.add_header("support_lib/ml_special_values.h") self.result.add_header("math.h") self.result.add_header("stdio.h") self.result.add_header("inttypes.h") #print self.result.get(cg) output_stream = open("%s.c" % func_implementation.get_name(), "w") output_stream.write(self.result.get(cg)) output_stream.close()
def generate_scheme(self): #func_implementation = CodeFunction(self.function_name, output_format = self.precision) vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_sollya_precision() # retrieving processor inverse approximation table #dummy_var = Variable("dummy", precision = self.precision) #dummy_div_seed = DivisionSeed(dummy_var, precision = self.precision) #inv_approx_table = self.processor.get_recursive_implementation(dummy_div_seed, language = None, table_getter = lambda self: self.approx_table_map) lo_bound_global = SollyaObject(0.0) hi_bound_global = SollyaObject(0.75) approx_interval = Interval(lo_bound_global, hi_bound_global) approx_interval_size = hi_bound_global - lo_bound_global # table creation table_index_size = 7 field_index_size = 2 exp_index_size = table_index_size - field_index_size table_size = 2**table_index_size table_index_range = range(table_size) local_degree = 9 coeff_table = ML_Table(dimensions = [table_size, local_degree], storage_precision = self.precision) #local_interval_size = approx_interval_size / SollyaObject(table_size) #for i in table_index_range: # degree = 6 # lo_bound = lo_bound_global + i * local_interval_size # hi_bound = lo_bound_global + (i+1) * local_interval_size # approx_interval = Interval(lo_bound, hi_bound) # local_poly_object, local_error = Polynomial.build_from_approximation_with_error(acos(x), degree, [self.precision] * (degree+1), approx_interval, absolute) # local_error = int(log2(sup(abs(local_error / acos(approx_interval))))) # print approx_interval, local_error exp_lo = 2**exp_index_size for i in table_index_range: lo_bound = (1.0 + (i % 2**field_index_size) * S2**-field_index_size) * S2**(i / 2**field_index_size - exp_lo) hi_bound = (1.0 + ((i % 2**field_index_size) + 1) * S2**-field_index_size) * S2**(i / 2**field_index_size - exp_lo) local_approx_interval = Interval(lo_bound, hi_bound) local_poly_object, local_error = Polynomial.build_from_approximation_with_error(acos(1 - x), local_degree, [self.precision] * (local_degree+1), local_approx_interval, sollya.absolute) local_error = int(log2(sup(abs(local_error / acos(1 - local_approx_interval))))) coeff_table print local_approx_interval, local_error for d in xrange(local_degree): coeff_table[i][d] = sollya.coeff(local_poly_object.get_sollya_object(), d) table_index = BitLogicRightShift(vx, vx.get_precision().get_field_size() - field_index_size) - (exp_lo << field_index_size) print "building mathematical polynomial" poly_degree = sup(sollya.guessdegree(acos(x), approx_interval, S2**-(self.precision.get_field_size()))) print "guessed polynomial degree: ", int(poly_degree) #global_poly_object = Polynomial.build_from_approximation(log10(1+x)/x, poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute) print "generating polynomial evaluation scheme" #_poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, _red_vx, unified_precision = self.precision) # building eval error map #eval_error_map = { # red_vx: Variable("red_vx", precision = self.precision, interval = red_vx.get_interval()), # log_inv_hi: Variable("log_inv_hi", precision = self.precision, interval = table_high_interval), # log_inv_lo: Variable("log_inv_lo", precision = self.precision, interval = table_low_interval), #} # computing gappa error #poly_eval_error = self.get_eval_error(result, eval_error_map) # main scheme print "MDL scheme" scheme = Statement(Return(vx)) return scheme
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx): """ generate the meta approximation for @p offset_fct over several intervals defined by @p indexing object For each sub-interval, a polynomial approximation with maximal_error @p target_eps is tabulated, and evaluated using format @p coeff_precision. The input variable is @p vx """ # computing degree for a different polynomial approximation on each # sub-interval poly_degree_list = [ int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps))) for offset, sub_interval in indexing.get_offseted_sub_list() ] poly_max_degree = max(poly_degree_list) # tabulating polynomial coefficients on split_num sub-interval of interval poly_table = ML_NewTable( dimensions=[indexing.split_num, poly_max_degree + 1], storage_precision=coeff_precision, const=True) offset_table = ML_NewTable(dimensions=[indexing.split_num], storage_precision=coeff_precision, const=True) max_error = 0.0 for sub_index in range(indexing.split_num): poly_degree = poly_degree_list[sub_index] offset, approx_interval = indexing.get_offseted_sub_interval(sub_index) offset_table[sub_index] = offset if poly_degree == 0: # managing constant approximation separately since it seems # to break sollya local_approx = coeff_precision.round_sollya_object( offset_fct(offset)(inf(approx_interval))) poly_table[sub_index][0] = local_approx for monomial_index in range(1, poly_max_degree + 1): poly_table[sub_index][monomial_index] = 0 approx_error = sollya.infnorm( offset_fct(offset) - local_approx, approx_interval) else: poly_object, approx_error = Polynomial.build_from_approximation_with_error( offset_fct(offset), poly_degree, [coeff_precision] * (poly_degree + 1), approx_interval, sollya.relative) for monomial_index in range(poly_max_degree + 1): if monomial_index <= poly_degree: poly_table[sub_index][ monomial_index] = poly_object.coeff_map[monomial_index] else: poly_table[sub_index][monomial_index] = 0 max_error = max(approx_error, max_error) Log.report(Log.Debug, "max approx error is {}", max_error) # indexing function: derive index from input @p vx value poly_index = indexing.get_index_node(vx) poly_index.set_attributes(tag="poly_index", debug=debug_multi) ext_precision = get_extended_fp_precision(coeff_precision) # building polynomial evaluation scheme offset = TableLoad(offset_table, poly_index, precision=coeff_precision, tag="offset", debug=debug_multi) poly = TableLoad(poly_table, poly_index, poly_max_degree, precision=coeff_precision, tag="poly_init", debug=debug_multi) red_vx = Subtraction(vx, offset, precision=vx.precision, tag="red_vx", debug=debug_multi) for monomial_index in range(poly_max_degree, -1, -1): coeff = TableLoad(poly_table, poly_index, monomial_index, precision=coeff_precision, tag="poly_%d" % monomial_index, debug=debug_multi) #fma_precision = coeff_precision if monomial_index > 1 else ext_precision fma_precision = coeff_precision poly = FMA(red_vx, poly, coeff, precision=fma_precision) #return Conversion(poly, precision=coeff_precision) #return poly.hi return poly
def generate_expr( self, code_object, optree, folded=False, result_var=None, initial=False, language=None, ## force to store result in a variable, wrapping CodeExpression # in CodeVariable force_variable_storing=False): """ code generation function """ language = self.language if language is None else language # search if <optree> has already been processed if self.has_memoization(optree): result = self.get_memoization(optree) if isinstance(result, CodeExpression) and force_variable_storing: # forcing storing and translation CodeExpression to CodeVariable # if force_variable_storing is set result_precision = result.precision prefix_tag = optree.get_tag( default="var_result" ) if force_variable_storing else "tmp_result" final_var = result_var if result_var else code_object.get_free_var_name( result_precision, prefix=prefix_tag, declare=True) code_object << self.generate_code_assignation( code_object, final_var, result.get()) result = CodeVariable(final_var, result_precision) return result result = None # implementation generation if isinstance(optree, CodeVariable): result = optree elif isinstance(optree, Variable): if optree.get_var_type() is Variable.Local: final_var = code_object.get_free_var_name( optree.get_precision(), prefix=optree.get_tag(), declare=True, var_ctor=Variable) result = CodeVariable(final_var, optree.get_precision()) else: result = CodeVariable(optree.get_tag(), optree.get_precision()) elif isinstance(optree, Signal): if optree.get_var_type() is Variable.Local: final_var = code_object.declare_signal(optree, optree.get_precision(), prefix=optree.get_tag()) result = CodeVariable(final_var, optree.get_precision()) else: result = CodeVariable(optree.get_tag(), optree.get_precision()) elif isinstance(optree, Constant): precision = optree.get_precision() # .get_base_format() if force_variable_storing or self.declare_cst or optree.get_precision( ).is_cst_decl_required(): cst_prefix = "cst" if optree.get_tag( ) is None else optree.get_tag() cst_varname = code_object.declare_cst(optree, prefix=cst_prefix) result = CodeVariable(cst_varname, precision) else: if precision is ML_Integer: result = CodeExpression("%d" % optree.get_value(), precision) else: try: result = CodeExpression( precision.get_cst(optree.get_value(), language=language), precision) except: result = CodeExpression( precision.get_cst(optree.get_value(), language=language), precision) Log.report( Log.Error, "Error during get_cst call for Constant: {} ", optree) # Exception print elif isinstance(optree, Assert): cond = optree.get_input(0) error_msg = optree.get_error_msg() severity = optree.get_severity() cond_code = self.generate_expr(code_object, cond, folded=False, language=language) code_object << " assert {cond} report {error_msg} severity {severity};\n".format( cond=cond_code.get(), error_msg=error_msg, severity=severity.descriptor) return None elif isinstance(optree, Wait): time_ns = optree.get_time_ns() code_object << "wait for {time_ns} ns;\n".format(time_ns=time_ns) return None elif isinstance(optree, SwitchBlock): switch_value = optree.inputs[0] # generating pre_statement self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) switch_value_code = self.generate_expr(code_object, switch_value, folded=folded, language=language) case_map = optree.get_case_map() code_object << "\nswitch(%s) {\n" % switch_value_code.get() for case in case_map: case_value = case case_statement = case_map[case] if isinstance(case_value, tuple): for sub_case in case: code_object << "case %s:\n" % sub_case else: code_object << "case %s:\n" % case code_object.open_level() self.generate_expr(code_object, case_statement, folded=folded, language=language) code_object.close_level() code_object << "}\n" return None elif isinstance(optree, ReferenceAssign): output_var = optree.inputs[0] result_value = optree.inputs[1] output_var_code = self.generate_expr(code_object, output_var, folded=False, language=language) def get_assign_symbol(node): if isinstance(node, Signal): assign_sign = "<=" elif isinstance(node, Variable): assign_sign = ":=" else: Log.report(Log.Error, "unsupported node for assign symbol:\n {}", node) return assign_sign if isinstance(output_var, Signal) or isinstance( output_var, Variable): assign_sign = get_assign_symbol(output_var) elif isinstance(output_var, VectorElementSelection) or isinstance( output_var, SubSignalSelection): select_input = output_var.get_input(0) assign_sign = get_assign_symbol(select_input) else: Log.report(Log.Error, "unsupported node for assign symbol:\n {}", node) if isinstance(result_value, Constant): # generate assignation result_value_code = self.generate_expr(code_object, result_value, folded=folded, language=language) code_object << self.generate_assignation( output_var_code.get(), result_value_code.get(), assign_sign=assign_sign) else: #result_value_code = self.generate_expr(code_object, result_value, folded = True, force_variable_storing = True, language = language) result_value_code = self.generate_expr(code_object, result_value, folded=True, language=language) code_object << self.generate_assignation( output_var_code.get(), result_value_code.get(), assign_sign=assign_sign) if optree.get_debug() and not self.disable_debug: self.generate_debug_msg(result_value, result_value_code, code_object, debug_object=optree.get_debug()) #code_object << self.generate_assignation(output_var_code.get(), result_value_code.get()) #code_object << output_var.get_precision().generate_c_assignation(output_var_code, result_value_code) return None elif isinstance(optree, RangeLoop): iterator = optree.get_input(0) loop_body = optree.get_input(1) loop_range = optree.get_loop_range() specifier = optree.get_specifier() range_pattern = "{lower} to {upper}" if specifier is RangeLoop.Increasing else "{upper} dowto {lower}" range_code = range_pattern.format(lower=sollya.inf(loop_range), upper=sollya.sup(loop_range)) iterator_code = self.generate_expr(code_object, iterator, folded=folded, language=language) code_object << "\n for {iterator} in {loop_range} loop\n".format( iterator=iterator_code.get(), loop_range=range_code) code_object.inc_level() body_code = self.generate_expr(code_object, loop_body, folded=folded, language=language) assert body_code is None code_object.dec_level() code_object << "end loop;\n" return None elif isinstance(optree, Loop): init_statement = optree.inputs[0] exit_condition = optree.inputs[1] loop_body = optree.inputs[2] self.generate_expr(code_object, init_statement, folded=folded, language=language) code_object << "\nfor (;%s;)" % self.generate_expr( code_object, exit_condition, folded=False, language=language).get() code_object.open_level() self.generate_expr(code_object, loop_body, folded=folded, language=language) code_object.close_level() return None elif isinstance(optree, Process): # generating pre_statement for process pre_statement = optree.get_pre_statement() self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) sensibility_list = [ self.generate_expr(code_object, op, folded=True, language=language).get() for op in optree.get_sensibility_list() ] sensibility_list = "({})".format(", ".join( sensibility_list)) if len(sensibility_list) != 0 else "" code_object << "process{}\n".format(sensibility_list) self.open_memoization_level() code_object.open_level( extra_shared_tables=[MultiSymbolTable.SignalSymbol], var_ctor=Variable) for process_stat in optree.inputs: self.generate_expr(code_object, process_stat, folded=folded, initial=False, language=language) code_object.close_level() self.close_memoization_level() code_object << "end process;\n\n" return None elif isinstance(optree, PlaceHolder): first_input = optree.get_input(0) first_input_code = self.generate_expr(code_object, first_input, folded=folded, language=language) for op in optree.get_inputs()[1:]: _ = self.generate_expr(code_object, op, folded=folded, language=language) result = first_input_code elif isinstance(optree, ComponentInstance): component_object = optree.get_component_object() component_name = component_object.get_name() code_object.declare_component(component_name, component_object) io_map = optree.get_io_map() component_tag = optree.get_tag() if component_tag is None: component_tag = "{component_name}_i{instance_id}".format( component_name=component_name, instance_id=optree.get_instance_id()) # component tag uniquifying component_tag = code_object.get_free_name(component_object, prefix=component_tag) mapped_io = {} for io_tag in io_map: mapped_io[io_tag] = self.generate_expr(code_object, io_map[io_tag], folded=True, language=language) code_object << "\n{component_tag} : {component_name}\n".format( component_name=component_name, component_tag=component_tag) code_object << " port map (\n" code_object << " " + ", \n ".join( "{} => {}".format(io_tag, mapped_io[io_tag].get()) for io_tag in mapped_io) code_object << "\n);\n" return None elif isinstance(optree, ConditionBlock): condition = optree.inputs[0] if_branch = optree.inputs[1] else_branch = optree.inputs[2] if len(optree.inputs) > 2 else None # generating pre_statement self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) cond_code = self.generate_expr(code_object, condition, folded=False, language=language) try: cond_likely = condition.get_likely() except AttributeError: Log.report( Log.Error, "The following condition has no (usable) likely attribute: {}", condition) code_object << "if %s then\n " % cond_code.get() code_object.inc_level() if_branch_code = self.generate_expr(code_object, if_branch, folded=False, language=language) code_object.dec_level() if else_branch: code_object << " else\n " code_object.inc_level() else_branch_code = self.generate_expr(code_object, else_branch, folded=True, language=language) code_object.dec_level() else: # code_object << "\n" pass code_object << "end if;\n" return None elif isinstance(optree, Select): # we go through all of select operands to # flatten the select tree def flatten_select(op, cond=None): """ Process recursively a Select operation to build a list of tuple (result, condition) """ if not isinstance(op, Select): return [(op, cond)] lcond = op.inputs[0] if cond is None else LogicalAnd( op.inputs[0], cond, precision=cond.get_precision()) return flatten_select(op.inputs[1], lcond) + flatten_select( op.inputs[2], cond) def legalize_select_input(select_input): if select_input.get_precision().get_bit_size( ) != optree.get_precision().get_bit_size(): return Conversion(select_input, precision=optree.get_precision()) else: return select_input prefix = optree.get_tag(default="setmp") result_varname = result_var if result_var != None else code_object.get_free_var_name( optree.get_precision(), prefix=prefix) result = CodeVariable(result_varname, optree.get_precision()) select_opcond_list = flatten_select(optree) if not select_opcond_list[-1][1] is None: Log.report( Log.Error, "last condition in flatten select differs from None") gen_list = [] for op, cond in select_opcond_list: op = legalize_select_input(op) op_code = self.generate_expr(code_object, op, folded=folded, language=language) if not cond is None: cond_code = self.generate_expr(code_object, cond, folded=True, force_variable_storing=True, language=language) gen_list.append((op_code, cond_code)) else: gen_list.append((op_code, None)) code_object << "{result} <= \n".format(result=result.get()) code_object.inc_level() for op_code, cond_code in gen_list: if not cond_code is None: code_object << "{op_code} when {cond_code} else\n".format( op_code=op_code.get(), cond_code=cond_code.get()) else: code_object << "{op_code};\n".format(op_code=op_code.get()) code_object.dec_level() elif isinstance(optree, TableLoad): table = optree.get_input(0) index = optree.get_input(1) index_code = self.generate_expr(code_object, index, folded=folded, language=language) prefix = optree.get_tag(default="table_value") result_varname = result_var if result_var != None else code_object.get_free_var_name( optree.get_precision(), prefix=prefix) result = CodeVariable(result_varname, optree.get_precision()) code_object << "with {index} select {result} <=\n".format( index=index_code.get(), result=result.get()) table_dimensions = table.get_precision().get_dimensions() assert len(table_dimensions) == 1 table_size = table_dimensions[0] default_value = 0 # linearizing table selection for tabid, value in enumerate(table.get_data()): code_object << "\t{} when {},\n".format( table.get_precision().get_storage_precision().get_cst( value), index.get_precision().get_cst(tabid)) code_object << "\t{} when others;\n".format(table.get_precision( ).get_storage_precision().get_cst(default_value)) # result is set elif isinstance(optree, Return): return_result = optree.inputs[0] return_code = self.generate_expr(code_object, return_result, folded=folded, language=language) code_object << "return %s;\n" % return_code.get() return None #return_code elif isinstance(optree, ExceptionOperation): if optree.get_specifier() in [ ExceptionOperation.RaiseException, ExceptionOperation.ClearException, ExceptionOperation.RaiseReturn ]: result_code = self.processor.generate_expr( self, code_object, optree, optree.inputs, folded=False, result_var=result_var, language=language) code_object << "%s;\n" % result_code.get() if optree.get_specifier() == ExceptionOperation.RaiseReturn: if self.libm_compliant: # libm compliant exception management code_object.add_header( "support_lib/ml_libm_compatibility.h") return_value = self.generate_expr( code_object, optree.get_return_value(), folded=folded, language=language) arg_value = self.generate_expr(code_object, optree.get_arg_value(), folded=folded, language=language) function_name = optree.function_name exception_list = [ op.get_value() for op in optree.inputs ] if ML_FPE_Inexact in exception_list: exception_list.remove(ML_FPE_Inexact) if len(exception_list) > 1: raise NotImplementedError if ML_FPE_Overflow in exception_list: code_object << "return ml_raise_libm_overflowf(%s, %s, \"%s\");\n" % ( return_value.get(), arg_value.get(), function_name) elif ML_FPE_Underflow in exception_list: code_object << "return ml_raise_libm_underflowf(%s, %s, \"%s\");\n" % ( return_value.get(), arg_value.get(), function_name) elif ML_FPE_Invalid in exception_list: code_object << "return %s;\n" % return_value.get() else: return_precision = optree.get_return_value( ).get_precision() self.generate_expr(code_object, Return(optree.get_return_value(), precision=return_precision), folded=folded, language=language) return None else: result = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded=folded, result_var=result_var, language=language) elif isinstance(optree, NoResultOperation): result_code = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded=False, result_var=result_var, language=language) code_object << "%s;\n" % result_code.get() return None elif isinstance(optree, Statement): for op in optree.inputs: if not self.has_memoization(op): self.generate_expr(code_object, op, folded=folded, initial=True, language=language) return None else: # building ordered list of required node by depth working_list = [op for op in optree.get_inputs()] processing_list = [op for op in working_list] resolved = {} while working_list != []: op = working_list.pop(0) # node has already been processed: SKIP if op in resolved: continue if isinstance(op, ML_Table): # ML_Table instances are skipped (should be generated directly by TableLoad) continue elif isinstance(op, ML_LeafNode): processing_list.append(op) else: memo = self.get_memoization(op) if not memo is None: # node has already been generated: STOP HERE resolved[op] = memo else: # enqueue node to be processed processing_list.append(op) # enqueue node inputs working_list += [op for op in op.get_inputs()] resolved[op] = memo # processing list in reverse order (starting with deeper node to avoid too much recursion) for op in processing_list[::-1]: _ = self.generate_expr(code_object, op, folded=folded, initial=initial, language=language) # processing main node generate_pre_process = self.generate_clear_exception if optree.get_clearprevious( ) else None result = self.processor.generate_expr( self, code_object, optree, optree.inputs, generate_pre_process=generate_pre_process, folded=folded, result_var=result_var, language=language) # registering result into memoization table self.add_memoization(optree, result) # debug management if optree.get_debug() and not self.disable_debug: self.generate_debug_msg(optree, result, code_object) if (initial or force_variable_storing or result_too_long(result)) and not isinstance( result, CodeVariable) and not result is None: # result could have been modified from initial optree result_precision = result.precision prefix_tag = optree.get_tag( default="var_result" ) if force_variable_storing else "tmp_result" final_var = result_var if result_var else code_object.get_free_var_name( result_precision, prefix=prefix_tag, declare=True) code_object << self.generate_code_assignation( code_object, final_var, result.get()) return CodeVariable(final_var, result_precision) return result
def random_log_sample(interval): lo = sollya.inf(interval) hi = sollya.sup(interval)
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme
def generate_reduction_fptaylor(x): # get sign and abs_x, must be the same at endpoints if sollya.sup(x) <= 0: sign_x_expr = "-1.0" abs_x_expr = "-x" abs_x = -x elif sollya.inf(x) >= 0: sign_x_expr = "1.0" abs_x_expr = "x" abs_x = x else: assert False, "Interval must not straddle 0" # get k, must be the same at endpoints unround_k = abs_x * n_invpi k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if k_low != k_high: assert False, "Interval must not straddle multples of pi" k = int(k_low) part = k % 2 r_expr = "abs_x - whole" r = abs_x - k * n_pi z_expr = "r" z = r if part == 1: flipped_poly_expr = "-poly" else: flipped_poly_expr = "poly" x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " abs_x rnd64= {};".format(abs_x_expr), " whole rnd64= {} * {};".format(k, n_pi), " r rnd64= abs_x - whole;", " z rnd64= {};".format(z_expr), " poly rnd64= {};".format(poly_expr), " flipped_poly rnd64= {};".format(flipped_poly_expr), " retval rnd64= flipped_poly*{};".format(sign_x_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def determine_error(self): sollya.settings.display = sollya.hexadecimal n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN) n_invpi = self.precision.round_sollya_object(1 / sollya.pi, sollya.RN) poly_expr = str(sollya.horner(self.poly_object.get_sollya_object())) poly_expr = poly_expr.replace("_x_", "z") poly_expr = poly_expr.replace("z^0x1p1", "z*z") config = fptaylor.CHECK_CONFIG.copy() del config["--abs-error"] config["--opt"] = "bb-eval" config["--rel-error-threshold"] = "0.0" config["--intermediate-opt"] = "false" config["--uncertainty"] = "false" def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real z in [{},{}];".format(x_low, x_high), "Definitions", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def generate_reduction_fptaylor(x): # get sign and abs_x, must be the same at endpoints if sollya.sup(x) <= 0: sign_x_expr = "-1.0" abs_x_expr = "-x" abs_x = -x elif sollya.inf(x) >= 0: sign_x_expr = "1.0" abs_x_expr = "x" abs_x = x else: assert False, "Interval must not straddle 0" # get k, must be the same at endpoints unround_k = abs_x * n_invpi k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if k_low != k_high: assert False, "Interval must not straddle multples of pi" k = int(k_low) part = k % 2 r_expr = "abs_x - whole" r = abs_x - k * n_pi z_expr = "r" z = r if part == 1: flipped_poly_expr = "-poly" else: flipped_poly_expr = "poly" x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " abs_x rnd64= {};".format(abs_x_expr), " whole rnd64= {} * {};".format(k, n_pi), " r rnd64= abs_x - whole;", " z rnd64= {};".format(z_expr), " poly rnd64= {};".format(poly_expr), " flipped_poly rnd64= {};".format(flipped_poly_expr), " retval rnd64= flipped_poly*{};".format(sign_x_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def split_domain(starting_domain, slivers): in_domains = [starting_domain] # abs out_domains = list() for I in in_domains: if sollya.inf(I) < 0 and sollya.sup(I) > 0: out_domains.append(sollya.Interval(sollya.inf(I), 0)) out_domains.append(sollya.Interval(0, sollya.sup(I))) else: out_domains.append(I) in_domains = out_domains # k out_domains = list() while len(in_domains) > 0: I = in_domains.pop() #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I)))) unround_mult = I * n_invpi mult_low = sollya.floor(sollya.inf(unround_mult)) mult_high = sollya.floor(sollya.sup(unround_mult)) if mult_low == mult_high or (mult_low == -1 and mult_high == 0): #print(" accepted") out_domains.append(I) continue if sollya.sup(I) <= 0: divider_low = (mult_low + 1) * n_pi divider_high = divider_low - divider_low * 2**-53 else: divider_high = (mult_low + 1) * n_pi divider_low = divider_high - divider_high * 2**-53 lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(lower_part) in_domains.append(upper_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x))) in_domains = [ d for d in in_domains if sollya.inf(d) != sollya.sup(d) ] return in_domains if self.skip_reduction: starting_domain = sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7) else: reduction_k = 20 starting_domain = sollya.Interval(-reduction_k * n_pi, reduction_k * n_pi) # analyse each piece in_domains = split_domain(starting_domain, self.slivers) errors = list() for I in in_domains: if self.skip_reduction: rel_err, abs_err = generate_fptaylor(I) else: rel_err, abs_err = generate_reduction_fptaylor(I) print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)), float(sollya.sup(I)), float(abs_err), float(rel_err))) errors.append((I, abs_err, rel_err)) def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [ float(sollya.inf(domain)), float(sollya.sup(domain)), ], "epsilon": float(epsilon), "operation": "sin" } return d if self.skip_reduction: d = generate_json(errors, sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7)) json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) else: specs = list() for k in range(1, reduction_k): d = generate_json(errors, sollya.Interval(-k * n_pi, k * n_pi)) specs.append(d) for i in range(len(specs)): d = specs[i] if i == len(specs) - 1: json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) break nd = specs[i + 1] if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]: continue json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str)
def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) # The main table is indexed by the 7 most significant bits # of the mantissa table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debuglld) # argument reduction # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) _red_vx = FMA(arg_red_index, _vx_mant, -1.0) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-inv_approx_table.index_size red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() * 1.1))) + 1 sollya.settings.display = sollya.hexadecimal global_poly_object, approx_error = Polynomial.build_from_approximation_with_error( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute, error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai)) Log.report( Log.Info, "poly_degree={}, approx_error={}".format( poly_degree, approx_error)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) #poly_object = global_poly_object.sub_poly(start_index=0,offset=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) Log.report(Log.Verbose, "generating polynomial evaluation scheme") pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly = FMA(pre_poly, _red_vx, global_poly_object.get_cst_coeff(0, self.precision)) _poly.set_attributes(tag="poly", debug=debug_lftolx) Log.report( Log.Verbose, "sollya global_poly_object: {}".format( global_poly_object.get_sollya_object())) Log.report( Log.Verbose, "sollya poly_object: {}".format( poly_object.get_sollya_object())) corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision) exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx
def generate_scheme(self): vx = self.implementation.add_input_variable("x", self.get_input_precision()) sollya_precision = self.get_input_precision().get_sollya_object() # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) # testing special value inputs test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") # if input is a signaling NaN, raise an invalid exception and returns # a quiet NaN return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) vx_exp = ExponentExtraction(vx, tag="vx_exp", debug=debugd) int_precision = self.precision.get_integer_format() # log2(vx) # r = vx_mant # e = vx_exp # vx reduced to r in [1, 2[ # log2(vx) = log2(r * 2^e) # = log2(r) + e # ## log2(r) is approximated by # log2(r) = log2(inv_seed(r) * r / inv_seed(r) # = log2(inv_seed(r) * r) - log2(inv_seed(r)) # inv_seed(r) in ]1/2, 1] => log2(inv_seed(r)) in ]-1, 0] # # inv_seed(r) * r ~ 1 # we can easily tabulate -log2(inv_seed(r)) # # retrieving processor inverse approximation table dummy_var = Variable("dummy", precision=self.precision) dummy_div_seed = DivisionSeed(dummy_var, precision=self.precision) inv_approx_table = self.processor.get_recursive_implementation( dummy_div_seed, language=None, table_getter=lambda self: self.approx_table_map) # table creation table_index_size = 7 log_table = ML_NewTable(dimensions=[2**table_index_size, 2], storage_precision=self.precision, tag=self.uniquify_name("inv_table")) # value for index 0 is set to 0.0 log_table[0][0] = 0.0 log_table[0][1] = 0.0 for i in range(1, 2**table_index_size): #inv_value = (1.0 + (self.processor.inv_approx_table[i] / S2**9) + S2**-52) * S2**-1 #inv_value = (1.0 + (inv_approx_table[i][0] / S2**9) ) * S2**-1 #print inv_approx_table[i][0], inv_value inv_value = inv_approx_table[i][0] value_high_bitsize = self.precision.get_field_size() - ( self.precision.get_exponent_size() + 1) value_high = round(log2(inv_value), value_high_bitsize, sollya.RN) value_low = round( log2(inv_value) - value_high, sollya_precision, sollya.RN) log_table[i][0] = value_high log_table[i][1] = value_low def compute_log(_vx, exp_corr_factor=None): _vx_mant = MantissaExtraction(_vx, tag="_vx_mant", precision=self.precision, debug=debug_lftolx) _vx_exp = ExponentExtraction(_vx, tag="_vx_exp", debug=debugd) # The main table is indexed by the 7 most significant bits # of the mantissa table_index = inv_approx_table.index_function(_vx_mant) table_index.set_attributes(tag="table_index", debug=debuglld) # argument reduction # Using AND -2 to exclude LSB set to 1 for Newton-Raphson convergence # TODO: detect if single operand inverse seed is supported by the targeted architecture pre_arg_red_index = TypeCast(BitLogicAnd( TypeCast(DivisionSeed(_vx_mant, precision=self.precision, tag="seed", debug=debug_lftolx, silent=True), precision=ML_UInt64), Constant(-2, precision=ML_UInt64), precision=ML_UInt64), precision=self.precision, tag="pre_arg_red_index", debug=debug_lftolx) arg_red_index = Select(Equal(table_index, 0), 1.0, pre_arg_red_index, tag="arg_red_index", debug=debug_lftolx) _red_vx = FMA(arg_red_index, _vx_mant, -1.0) _red_vx.set_attributes(tag="_red_vx", debug=debug_lftolx) inv_err = S2**-inv_approx_table.index_size red_interval = Interval(1 - inv_err, 1 + inv_err) # return in case of standard (non-special) input _log_inv_lo = TableLoad(log_table, table_index, 1, tag="log_inv_lo", debug=debug_lftolx) _log_inv_hi = TableLoad(log_table, table_index, 0, tag="log_inv_hi", debug=debug_lftolx) Log.report(Log.Verbose, "building mathematical polynomial") approx_interval = Interval(-inv_err, inv_err) poly_degree = sup( guessdegree( log2(1 + sollya.x) / sollya.x, approx_interval, S2** -(self.precision.get_field_size() * 1.1))) + 1 sollya.settings.display = sollya.hexadecimal global_poly_object, approx_error = Polynomial.build_from_approximation_with_error( log2(1 + sollya.x) / sollya.x, poly_degree, [self.precision] * (poly_degree + 1), approx_interval, sollya.absolute, error_function=lambda p, f, ai, mod, t: sollya.dirtyinfnorm( p - f, ai)) Log.report( Log.Info, "poly_degree={}, approx_error={}".format( poly_degree, approx_error)) poly_object = global_poly_object.sub_poly(start_index=1, offset=1) #poly_object = global_poly_object.sub_poly(start_index=0,offset=0) Attributes.set_default_silent(True) Attributes.set_default_rounding_mode(ML_RoundToNearest) Log.report(Log.Verbose, "generating polynomial evaluation scheme") pre_poly = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object, _red_vx, unified_precision=self.precision) _poly = FMA(pre_poly, _red_vx, global_poly_object.get_cst_coeff(0, self.precision)) _poly.set_attributes(tag="poly", debug=debug_lftolx) Log.report( Log.Verbose, "sollya global_poly_object: {}".format( global_poly_object.get_sollya_object())) Log.report( Log.Verbose, "sollya poly_object: {}".format( poly_object.get_sollya_object())) corr_exp = _vx_exp if exp_corr_factor == None else _vx_exp + exp_corr_factor Attributes.unset_default_rounding_mode() Attributes.unset_default_silent() pre_result = -_log_inv_hi + (_red_vx * _poly + (-_log_inv_lo)) pre_result.set_attributes(tag="pre_result", debug=debug_lftolx) exact_log2_hi_exp = Conversion(corr_exp, precision=self.precision) exact_log2_hi_exp.set_attributes(tag="exact_log2_hi_hex", debug=debug_lftolx) _result = exact_log2_hi_exp + pre_result return _result, _poly, _log_inv_lo, _log_inv_hi, _red_vx result, poly, log_inv_lo, log_inv_hi, red_vx = compute_log(vx) result.set_attributes(tag="result", debug=debug_lftolx) # specific input value predicate neg_input = Comparison(vx, 0, likely=False, specifier=Comparison.Less, debug=debugd, tag="neg_input") vx_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debugd, tag="nan_or_inf") vx_snan = Test(vx, specifier=Test.IsSignalingNaN, likely=False, debug=debugd, tag="vx_snan") vx_inf = Test(vx, specifier=Test.IsInfty, likely=False, debug=debugd, tag="vx_inf") vx_subnormal = Test(vx, specifier=Test.IsSubnormal, likely=False, debug=debugd, tag="vx_subnormal") vx_zero = Test(vx, specifier=Test.IsZero, likely=False, debug=debugd, tag="vx_zero") exp_mone = Equal(vx_exp, -1, tag="exp_minus_one", debug=debugd, likely=False) vx_one = Equal(vx, 1.0, tag="vx_one", likely=False, debug=debugd) # Specific specific for the case exp == -1 # log2(x) = log2(m) - 1 # # as m in [1, 2[, log2(m) in [0, 1[ # if r is close to 2, a catastrophic cancellation can occur # # r = seed(m) # log2(x) = log2(seed(m) * m / seed(m)) - 1 # = log2(seed(m) * m) - log2(seed(m)) - 1 # # for m really close to 2 => seed(m) = 0.5 # => log2(x) = log2(0.5 * m) # = result_exp_m1 = (-log_inv_hi - 1.0) + FMA(poly, red_vx, -log_inv_lo) result_exp_m1.set_attributes(tag="result_exp_m1", debug=debug_lftolx) m100 = -100 S2100 = Constant(S2**100, precision=self.precision) result_subnormal, _, _, _, _ = compute_log(vx * S2100, exp_corr_factor=m100) result_subnormal.set_attributes(tag="result_subnormal", debug=debug_lftolx) one_err = S2**-7 approx_interval_one = Interval(-one_err, one_err) red_vx_one = vx - 1.0 poly_degree_one = sup( guessdegree( log(1 + x) / x, approx_interval_one, S2** -(self.precision.get_field_size() + 1))) + 1 poly_object_one = Polynomial.build_from_approximation( log(1 + sollya.x) / sollya.x, poly_degree_one, [self.precision] * (poly_degree_one + 1), approx_interval_one, absolute).sub_poly(start_index=1) poly_one = PolynomialSchemeEvaluator.generate_horner_scheme( poly_object_one, red_vx_one, unified_precision=self.precision) poly_one.set_attributes(tag="poly_one", debug=debug_lftolx) result_one = red_vx_one + red_vx_one * poly_one cond_one = (vx < (1 + one_err)) & (vx > (1 - one_err)) cond_one.set_attributes(tag="cond_one", debug=debugd, likely=False) # main scheme pre_scheme = ConditionBlock( neg_input, Statement(ClearException(), Raise(ML_FPE_Invalid), Return(FP_QNaN(self.precision))), ConditionBlock( vx_nan_or_inf, ConditionBlock( vx_inf, Statement( ClearException(), Return(FP_PlusInfty(self.precision)), ), Statement(ClearException(), ConditionBlock(vx_snan, Raise(ML_FPE_Invalid)), Return(FP_QNaN(self.precision)))), ConditionBlock( vx_subnormal, ConditionBlock( vx_zero, Statement( ClearException(), Raise(ML_FPE_DivideByZero), Return(FP_MinusInfty(self.precision)), ), Statement(ClearException(), result_subnormal, Return(result_subnormal))), ConditionBlock( vx_one, Statement( ClearException(), Return(FP_PlusZero(self.precision)), ), ConditionBlock(exp_mone, Return(result_exp_m1), Return(result)))))) scheme = Statement(result, pre_scheme) return scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def piecewise_approximation(function, variable, precision, bound_low=-1.0, bound_high=1.0, num_intervals=16, max_degree=2, error_threshold=S2**-24, odd=False, even=False): """ Generate a piecewise approximation :param function: function to be approximated :type function: SollyaObject :param variable: input variable :type variable: Variable :param precision: variable's format :type precision: ML_Format :param bound_low: lower bound for the approximation interval :param bound_high: upper bound for the approximation interval :param num_intervals: number of sub-interval / sub-division of the main interval :param max_degree: maximum degree for an approximation on any sub-interval :param error_threshold: error bound for an approximation on any sub-interval :return: pair (scheme, error) where scheme is a graph node for an approximation scheme of function evaluated at variable, and error is the maximum approximation error encountered :rtype tuple(ML_Operation, SollyaObject): """ degree_generator = piecewise_approximation_degree_generator( function, bound_low, bound_high, num_intervals=num_intervals, error_threshold=error_threshold, ) degree_list = list(degree_generator) # if max_degree is None then we determine it locally if max_degree is None: max_degree = max(degree_list) # table to store coefficients of the approximation on each segment coeff_table = ML_NewTable( dimensions=[num_intervals, max_degree + 1], storage_precision=precision, tag="coeff_table", const=True # by default all approximation coeff table are const ) error_function = lambda p, f, ai, mod, t: sollya.dirtyinfnorm(p - f, ai) max_approx_error = 0.0 interval_size = (bound_high - bound_low) / num_intervals for i in range(num_intervals): subint_low = bound_low + i * interval_size subint_high = bound_low + (i + 1) * interval_size local_function = function(sollya.x + subint_low) local_interval = Interval(-interval_size, interval_size) local_degree = degree_list[i] if local_degree > max_degree: Log.report( Log.Warning, "local_degree {} exceeds max_degree bound ({}) in piecewise_approximation", local_degree, max_degree) # as max_degree defines the size of the table we can use # it as the degree for each sub-interval polynomial # as there is nothing to gain (yet) by using a smaller polynomial degree = max_degree # min(max_degree, local_degree) if function(subint_low) == 0.0: # if the lower bound is a zero to the function, we # need to force value=0 for the constant coefficient # and extend the approximation interval local_poly_degree_list = list( range(1 if even else 0, degree + 1, 2 if odd or even else 1)) poly_object, approx_error = Polynomial.build_from_approximation_with_error( function(sollya.x) / sollya.x, local_poly_degree_list, [precision] * len(local_poly_degree_list), Interval(-subint_high * 0.95, subint_high), sollya.absolute, error_function=error_function) # multiply by sollya.x poly_object = poly_object.sub_poly(offset=-1) else: try: poly_object, approx_error = Polynomial.build_from_approximation_with_error( local_function, degree, [precision] * (degree + 1), local_interval, sollya.absolute, error_function=error_function) except SollyaError as err: # try to see if function is constant on the interval (possible # failure cause for fpminmax) cst_value = precision.round_sollya_object( function(subint_low), sollya.RN) accuracy = error_threshold diff_with_cst_range = sollya.supnorm(cst_value, local_function, local_interval, sollya.absolute, accuracy) diff_with_cst = sup(abs(diff_with_cst_range)) if diff_with_cst < error_threshold: Log.report(Log.Info, "constant polynomial detected") poly_object = Polynomial([function(subint_low)] + [0] * degree) approx_error = diff_with_cst else: Log.report( Log.error, "degree: {} for index {}, diff_with_cst={} (vs error_threshold={}) ", degree, i, diff_with_cst, error_threshold, error=err) for ci in range(max_degree + 1): if ci in poly_object.coeff_map: coeff_table[i][ci] = poly_object.coeff_map[ci] else: coeff_table[i][ci] = 0.0 if approx_error > error_threshold: Log.report( Log.Warning, "piecewise_approximation on index {} exceeds error threshold: {} > {}", i, approx_error, error_threshold) max_approx_error = max(max_approx_error, abs(approx_error)) # computing offset diff = Subtraction(variable, Constant(bound_low, precision=precision), tag="diff", debug=debug_multi, precision=precision) int_prec = precision.get_integer_format() # delta = bound_high - bound_low delta_ratio = Constant(num_intervals / (bound_high - bound_low), precision=precision) # computing table index # index = nearestint(diff / delta * <num_intervals>) index = Max(0, Min( NearestInteger( Multiplication(diff, delta_ratio, precision=precision), precision=int_prec, ), num_intervals - 1), tag="index", debug=debug_multi, precision=int_prec) poly_var = Subtraction(diff, Multiplication( Conversion(index, precision=precision), Constant(interval_size, precision=precision)), precision=precision, tag="poly_var", debug=debug_multi) # generating indexed polynomial coeffs = [(ci, TableLoad(coeff_table, index, ci)) for ci in range(max_degree + 1)][::-1] poly_scheme = PolynomialSchemeEvaluator.generate_horner_scheme2( coeffs, poly_var, precision, {}, precision) return poly_scheme, max_approx_error