def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real z in [{},{}];".format(x_low, x_high), "Definitions", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.cos(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.cos(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def numeric_emulate(self, input_value): if self.sin_output: return sin(input_value) else: return cos(input_value)
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def SincosRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 8 cw_hi_precision = self.precision.get_field_size() - 4 ext_precision = { ML_Binary32: ML_Binary64, ML_Binary64: ML_Binary64 }[self.precision] int_precision = { ML_Binary32: ML_Int32, ML_Binary64: ML_Int64 }[self.precision] if self.precision is ML_Binary32: ph_bound = S2**10 else: ph_bound = S2**33 test_ph_bound = Comparison(vx, ph_bound, specifier=Comparison.GreaterOrEqual, precision=ML_Bool, likely=False) # argument reduction # m frac_pi_index = {ML_Binary32: 10, ML_Binary64: 14}[self.precision] C0 = Constant(0, precision=int_precision) C1 = Constant(1, precision=int_precision) C_offset = Constant(3 * S2**(frac_pi_index - 1), precision=int_precision) # 2^m / pi frac_pi = round(S2**frac_pi_index / pi, cw_hi_precision, sollya.RN) frac_pi_lo = round(S2**frac_pi_index / pi - frac_pi, sollya_precision, sollya.RN) # pi / 2^m, high part inv_frac_pi = round(pi / S2**frac_pi_index, cw_hi_precision, sollya.RN) # pi / 2^m, low part inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k vx.set_attributes(tag="vx", debug=debug_multi) vx_pi = Addition(Multiplication(vx, Constant(frac_pi, precision=self.precision), precision=self.precision), Multiplication(vx, Constant(frac_pi_lo, precision=self.precision), precision=self.precision), precision=self.precision, tag="vx_pi", debug=debug_multi) k = NearestInteger(vx_pi, precision=int_precision, tag="k", debug=debug_multi) # k in floating-point precision fk = Conversion(k, precision=self.precision, tag="fk", debug=debug_multi) inv_frac_pi_cst = Constant(inv_frac_pi, tag="inv_frac_pi", precision=self.precision, debug=debug_multi) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag="inv_frac_pi_lo", precision=self.precision, debug=debug_multi) # Cody-Waite reduction red_coeff1 = Multiplication(fk, inv_frac_pi_cst, precision=self.precision, exact=True) red_coeff2 = Multiplication(Negation(fk, precision=self.precision), inv_frac_pi_lo_cst, precision=self.precision, exact=True) # Should be exact / Sterbenz' Lemma pre_sub_mul = Subtraction(vx, red_coeff1, precision=self.precision, exact=True) # Fast2Sum s = Addition(pre_sub_mul, red_coeff2, precision=self.precision, unbreakable=True, tag="s", debug=debug_multi) z = Subtraction(s, pre_sub_mul, precision=self.precision, unbreakable=True, tag="z", debug=debug_multi) t = Subtraction(red_coeff2, z, precision=self.precision, unbreakable=True, tag="t", debug=debug_multi) red_vx_std = Addition(s, t, precision=self.precision) red_vx_std.set_attributes(tag="red_vx_std", debug=debug_multi) # To compute sine we offset x by 3pi/2 # which means add 3 * S2^(frac_pi_index-1) to k if self.sin_output: Log.report(Log.Info, "Computing Sin") offset_k = Addition(k, C_offset, precision=int_precision, tag="offset_k") else: Log.report(Log.Info, "Computing Cos") offset_k = k modk = Variable("modk", precision=int_precision, var_type=Variable.Local) red_vx = Variable("red_vx", precision=self.precision, var_type=Variable.Local) # Faster modulo using bitwise logic modk_std = BitLogicAnd(offset_k, 2**(frac_pi_index + 1) - 1, precision=int_precision, tag="modk", debug=debug_multi) approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)), pi / S2**(frac_pi_index + 1)) red_vx.set_interval(approx_interval) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) Log.report(Log.Info, "building tabulated approximation for sin and cos") error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) # polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme table_index_size = frac_pi_index + 1 cos_table = ML_NewTable(dimensions=[2**table_index_size, 1], storage_precision=self.precision, tag=self.uniquify_name("cos_table")) for i in range(2**(frac_pi_index + 1)): local_x = i * pi / S2**frac_pi_index cos_local = round(cos(local_x), self.precision.get_sollya_object(), sollya.RN) cos_table[i][0] = cos_local sin_index = Modulo(modk + 2**(frac_pi_index - 1), 2**(frac_pi_index + 1), precision=int_precision, tag="sin_index") #, debug = debug_multi) tabulated_cos = TableLoad(cos_table, modk, C0, precision=self.precision, tag="tab_cos", debug=debug_multi) tabulated_sin = -TableLoad(cos_table, sin_index, C0, precision=self.precision, tag="tab_sin", debug=debug_multi) poly_degree_cos = sup( guessdegree(cos(sollya.x), approx_interval, S2** -self.precision.get_precision()) + 2) poly_degree_sin = sup( guessdegree( sin(sollya.x) / sollya.x, approx_interval, S2** -self.precision.get_precision()) + 2) poly_degree_cos_list = range(0, int(poly_degree_cos) + 3) poly_degree_sin_list = range(0, int(poly_degree_sin) + 3) # cosine polynomial: limiting first and second coefficient precision to 1-bit poly_cos_prec_list = [self.precision] * len(poly_degree_cos_list) # sine polynomial: limiting first coefficient precision to 1-bit poly_sin_prec_list = [self.precision] * len(poly_degree_sin_list) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building mathematical polynomials for sin and cos") # Polynomial approximations Log.report(Log.Info, "cos") poly_object_cos, poly_error_cos = Polynomial.build_from_approximation_with_error( cos(sollya.x), poly_degree_cos_list, poly_cos_prec_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "sin") poly_object_sin, poly_error_sin = Polynomial.build_from_approximation_with_error( sin(sollya.x), poly_degree_sin_list, poly_sin_prec_list, approx_interval, sollya.absolute, error_function=error_function) Log.report( Log.Info, "poly error cos: {} / {:d}".format( poly_error_cos, int(sollya.log2(poly_error_cos)))) Log.report( Log.Info, "poly error sin: {0} / {1:d}".format( poly_error_sin, int(sollya.log2(poly_error_sin)))) Log.report(Log.Info, "poly cos : %s" % poly_object_cos) Log.report(Log.Info, "poly sin : %s" % poly_object_sin) # Polynomial evaluation scheme poly_cos = polynomial_scheme_builder( poly_object_cos.sub_poly(start_index=1), red_vx, unified_precision=self.precision) poly_sin = polynomial_scheme_builder( poly_object_sin.sub_poly(start_index=2), red_vx, unified_precision=self.precision) poly_cos.set_attributes(tag="poly_cos", debug=debug_multi) poly_sin.set_attributes(tag="poly_sin", debug=debug_multi, unbreakable=True) # TwoProductFMA mul_cos_x = tabulated_cos * poly_cos mul_cos_y = FusedMultiplyAdd(tabulated_cos, poly_cos, -mul_cos_x, precision=self.precision) mul_sin_x = tabulated_sin * poly_sin mul_sin_y = FusedMultiplyAdd(tabulated_sin, poly_sin, -mul_sin_x, precision=self.precision) mul_coeff_sin_hi = tabulated_sin * red_vx mul_coeff_sin_lo = FusedMultiplyAdd(tabulated_sin, red_vx, -mul_coeff_sin_hi) mul_cos = Addition(mul_cos_x, mul_cos_y, precision=self.precision, tag="mul_cos") #, debug = debug_multi) mul_sin = Negation(Addition(mul_sin_x, mul_sin_y, precision=self.precision), precision=self.precision, tag="mul_sin") #, debug = debug_multi) mul_coeff_sin = Negation(Addition(mul_coeff_sin_hi, mul_coeff_sin_lo, precision=self.precision), precision=self.precision, tag="mul_coeff_sin") #, debug = debug_multi) mul_cos_x.set_attributes( tag="mul_cos_x", precision=self.precision) #, debug = debug_multi) mul_cos_y.set_attributes( tag="mul_cos_y", precision=self.precision) #, debug = debug_multi) mul_sin_x.set_attributes( tag="mul_sin_x", precision=self.precision) #, debug = debug_multi) mul_sin_y.set_attributes( tag="mul_sin_y", precision=self.precision) #, debug = debug_multi) cos_eval_d_1 = (((mul_cos + mul_sin) + mul_coeff_sin) + tabulated_cos) cos_eval_d_1.set_attributes(tag="cos_eval_d_1", precision=self.precision, debug=debug_multi) result_1 = Statement(Return(cos_eval_d_1)) ####################################################################### # LARGE ARGUMENT MANAGEMENT # # (lar: Large Argument Reduction) # ####################################################################### # payne and hanek argument reduction for large arguments ph_k = frac_pi_index ph_frac_pi = round(S2**ph_k / pi, 1500, sollya.RN) ph_inv_frac_pi = pi / S2**ph_k ph_statement, ph_acc, ph_acc_int = generate_payne_hanek(vx, ph_frac_pi, self.precision, n=100, k=ph_k) # assigning Large Argument Reduction reduced variable lar_vx = Variable("lar_vx", precision=self.precision, var_type=Variable.Local) lar_red_vx = Addition(Multiplication(lar_vx, inv_frac_pi, precision=self.precision), Multiplication(lar_vx, inv_frac_pi_lo, precision=self.precision), precision=self.precision, tag="lar_red_vx", debug=debug_multi) C32 = Constant(2**(ph_k + 1), precision=int_precision, tag="C32") ph_acc_int_red = Select(ph_acc_int < C0, C32 + ph_acc_int, ph_acc_int, precision=int_precision, tag="ph_acc_int_red") if self.sin_output: lar_offset_k = Addition(ph_acc_int_red, C_offset, precision=int_precision, tag="lar_offset_k") else: lar_offset_k = ph_acc_int_red ph_acc_int_red.set_attributes(tag="ph_acc_int_red", debug=debug_multi) lar_modk = BitLogicAnd(lar_offset_k, 2**(frac_pi_index + 1) - 1, precision=int_precision, tag="lar_modk", debug=debug_multi) lar_statement = Statement(ph_statement, ReferenceAssign(lar_vx, ph_acc, debug=debug_multi), ReferenceAssign(red_vx, lar_red_vx, debug=debug_multi), ReferenceAssign(modk, lar_modk), prevent_optimization=True) test_NaN_or_Inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, tag="NaN_or_Inf", debug=debug_multi) return_NaN_or_Inf = Statement(Return(FP_QNaN(self.precision))) scheme = ConditionBlock( test_NaN_or_Inf, Statement(ClearException(), return_NaN_or_Inf), Statement( modk, red_vx, ConditionBlock( test_ph_bound, lar_statement, Statement( ReferenceAssign(modk, modk_std), ReferenceAssign(red_vx, red_vx_std), )), result_1)) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = self.implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "target: %s " % self.processor.target_name) # display parameter information Log.report(Log.Info, "accuracy : %s " % self.accuracy) Log.report(Log.Info, "input interval: %s " % self.input_interval) accuracy_goal = self.accuracy.get_goal() Log.report(Log.Info, "accuracy_goal=%f" % accuracy_goal) table_size_log = self.table_size_log integer_size = 31 integer_precision = ML_Int32 max_bound = sup(abs(self.input_interval)) max_bound_log = int(ceil(log2(max_bound))) Log.report(Log.Info, "max_bound_log=%s " % max_bound_log) scaling_power = integer_size - max_bound_log Log.report(Log.Info, "scaling power: %s " % scaling_power) storage_precision = ML_Custom_FixedPoint_Format(1, 30, signed=True) Log.report(Log.Info, "tabulating cosine and sine") # cosine and sine fused table fused_table = ML_NewTable( dimensions=[2**table_size_log, 2], storage_precision=storage_precision, tag="fast_lib_shared_table") # self.uniquify_name("cossin_table")) # filling table for i in range(2**table_size_log): local_x = i / S2**table_size_log * S2**max_bound_log cos_local = cos( local_x ) # nearestint(cos(local_x) * S2**storage_precision.get_frac_size()) sin_local = sin( local_x ) # nearestint(sin(local_x) * S2**storage_precision.get_frac_size()) fused_table[i][0] = cos_local fused_table[i][1] = sin_local # argument reduction evaluation scheme # scaling_factor = Constant(S2**scaling_power, precision = self.precision) red_vx_precision = ML_Custom_FixedPoint_Format(31 - scaling_power, scaling_power, signed=True) Log.report( Log.Verbose, "red_vx_precision.get_c_bit_size()=%d" % red_vx_precision.get_c_bit_size()) # red_vx = NearestInteger(vx * scaling_factor, precision = integer_precision) red_vx = Conversion(vx, precision=red_vx_precision, tag="red_vx", debug=debug_fixed32) computation_precision = red_vx_precision # self.precision output_precision = self.io_precisions[0] Log.report(Log.Info, "computation_precision is %s" % computation_precision) Log.report(Log.Info, "storage_precision is %s" % storage_precision) Log.report(Log.Info, "output_precision is %s" % output_precision) hi_mask_value = 2**32 - 2**(32 - table_size_log - 1) hi_mask = Constant(hi_mask_value, precision=ML_Int32) Log.report(Log.Info, "hi_mask=0x%x" % hi_mask_value) red_vx_hi_int = BitLogicAnd(TypeCast(red_vx, precision=ML_Int32), hi_mask, precision=ML_Int32, tag="red_vx_hi_int", debug=debugd) red_vx_hi = TypeCast(red_vx_hi_int, precision=red_vx_precision, tag="red_vx_hi", debug=debug_fixed32) red_vx_lo = red_vx - red_vx_hi red_vx_lo.set_attributes(precision=red_vx_precision, tag="red_vx_lo", debug=debug_fixed32) table_index = BitLogicRightShift(TypeCast(red_vx, precision=ML_Int32), scaling_power - (table_size_log - max_bound_log), precision=ML_Int32, tag="table_index", debug=debugd) tabulated_cos = TableLoad(fused_table, table_index, 0, tag="tab_cos", precision=storage_precision, debug=debug_fixed32) tabulated_sin = TableLoad(fused_table, table_index, 1, tag="tab_sin", precision=storage_precision, debug=debug_fixed32) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "building polynomial approximation for cosine") # cosine polynomial approximation poly_interval = Interval(0, S2**(max_bound_log - table_size_log)) Log.report(Log.Info, "poly_interval=%s " % poly_interval) cos_poly_degree = 2 # int(sup(guessdegree(cos(x), poly_interval, accuracy_goal))) Log.report(Log.Verbose, "cosine polynomial approximation") cos_poly_object, cos_approx_error = Polynomial.build_from_approximation_with_error( cos(x), [0, 2], [0] + [computation_precision.get_bit_size()], poly_interval, sollya.absolute, error_function=error_function) #cos_eval_scheme = PolynomialSchemeEvaluator.generate_horner_scheme(cos_poly_object, red_vx_lo, unified_precision = computation_precision) Log.report(Log.Info, "cos_approx_error=%e" % cos_approx_error) cos_coeff_list = cos_poly_object.get_ordered_coeff_list() coeff_C0 = cos_coeff_list[0][1] coeff_C2 = Constant(cos_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) Log.report(Log.Info, "building polynomial approximation for sine") # sine polynomial approximation sin_poly_degree = 2 # int(sup(guessdegree(sin(x)/x, poly_interval, accuracy_goal))) Log.report(Log.Info, "sine poly degree: %e" % sin_poly_degree) Log.report(Log.Verbose, "sine polynomial approximation") sin_poly_object, sin_approx_error = Polynomial.build_from_approximation_with_error( sin(sollya.x) / sollya.x, [0, 2], [0] + [computation_precision.get_bit_size()] * (sin_poly_degree + 1), poly_interval, sollya.absolute, error_function=error_function) sin_coeff_list = sin_poly_object.get_ordered_coeff_list() coeff_S0 = sin_coeff_list[0][1] coeff_S2 = Constant(sin_coeff_list[1][1], precision=ML_Custom_FixedPoint_Format(-1, 32, signed=True)) # scheme selection between sine and cosine if self.cos_output: scheme = self.generate_cos_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) else: scheme = self.generate_sin_scheme(computation_precision, tabulated_cos, tabulated_sin, coeff_S2, coeff_C2, red_vx_lo) result = Conversion(scheme, precision=self.io_precisions[0]) Log.report( Log.Verbose, "result operation tree :\n %s " % result.get_str( display_precision=True, depth=None, memoization_map={})) scheme = Statement(Return(result)) return scheme
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = Abs(self.implementation.add_input_variable("x", self.precision), tag = "vx") Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) debug_precision = {ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx}[self.precision] test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf") test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test") test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign") test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan") return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 3 # argument reduction frac_pi_index = 3 frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN) inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN) inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k = E(x * frac_pi) vx_pi = Multiplication(vx, frac_pi, precision = self.precision) k = NearestInteger(vx_pi, precision = ML_Int32, tag = "k", debug = True) fk = Conversion(k, precision = self.precision, tag = "fk") inv_frac_pi_cst = Constant(inv_frac_pi, tag = "inv_frac_pi", precision = self.precision) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag = "inv_frac_pi_lo", precision = self.precision) red_vx_hi = (vx - inv_frac_pi_cst * fk) red_vx_hi.set_attributes(tag = "red_vx_hi", debug = debug_precision, precision = self.precision) red_vx_lo_sub = inv_frac_pi_lo_cst * fk red_vx_lo_sub.set_attributes(tag = "red_vx_lo_sub", debug = debug_precision, unbreakable = True, precision = self.precision) vx_d = Conversion(vx, precision = ML_Binary64, tag = "vx_d") pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk) pre_red_vx_d_hi.set_attributes(tag = "pre_red_vx_d_hi", precision = ML_Binary64, debug = debug_lftolx) pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d.set_attributes(tag = "pre_red_vx_d", debug = debug_lftolx, precision = ML_Binary64) modk = Modulo(k, 2**(frac_pi_index+1), precision = ML_Int32, tag = "switch_value", debug = True) sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index-1)), 2**(frac_pi_index-1)) red_vx = Select(sel_c, -pre_red_vx, pre_red_vx) red_vx.set_attributes(tag = "red_vx", debug = debug_precision, precision = self.precision) red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d) red_vx_d.set_attributes(tag = "red_vx_d", debug = debug_lftolx, precision = ML_Binary64) approx_interval = Interval(-pi/(S2**(frac_pi_index+1)), pi / S2**(frac_pi_index+1)) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) error_goal_approx = S2**-self.precision.get_precision() Log.report(Log.Info, "building mathematical polynomial") poly_degree_vector = [None] * 2**(frac_pi_index+1) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme index_relative = [] poly_object_vector = [None] * 2**(frac_pi_index+1) for i in range(2**(frac_pi_index+1)): sub_func = cos(sollya.x+i*pi/S2**frac_pi_index) degree = int(sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1 degree_list = range(degree+1) a_interval = approx_interval if i == 0: # ad-hoc, TODO: to be cleaned degree = 6 degree_list = range(0, degree+1, 2) elif i % 2**(frac_pi_index) == 2**(frac_pi_index-1): # for pi/2 and 3pi/2, an approx to sin=cos(pi/2+x) # must be generated degree_list = range(1, degree+1, 2) if i == 3 or i == 5 or i == 7 or i == 9: precision_list = [sollya.binary64] + [sollya.binary32] *(degree) else: precision_list = [sollya.binary32] * (degree+1) poly_degree_vector[i] = degree constraint = sollya.absolute delta = (2**(frac_pi_index - 3)) centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index-1) if centered_i < delta and centered_i > -delta and centered_i != 0: constraint = sollya.relative index_relative.append(i) Log.report(Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index+1))) poly_object_vector[i], _ = Polynomial.build_from_approximation_with_error(sub_func, degree_list, precision_list, a_interval, constraint, error_function = error_function) # unified power map for red_sx^n upm = {} rel_error_list = [] poly_scheme_vector = [None] * (2**(frac_pi_index+1)) for i in range(2**(frac_pi_index+1)): poly_object = poly_object_vector[i] poly_precision = self.precision if i == 3 or i == 5 or i == 7 or i == 9: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) poly_hi = (c0 + c1 * red_vx) poly_hi.set_precision(ML_Binary64) red_vx_d_2 = red_vx_d * red_vx_d poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder(poly_object.sub_poly(start_index = 2, offset = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(unbreakable = True) elif i == 4: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = ML_Binary64) poly_scheme = c1 * red_vx_d + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_precision(ML_Binary64) else: poly_scheme = polynomial_scheme_builder(poly_object, red_vx, unified_precision = poly_precision, power_map_ = upm) #if i == 3: # c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision) # c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) # poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(tag = "poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug = debug_precision) poly_scheme_vector[i] = poly_scheme #try: if is_gappa_installed() and i == 3: opt_scheme = self.opt_engine.optimization_process(poly_scheme, self.precision, copy = True, fuse_fma = self.fuse_fma) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, }
tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision = self.precision, interval = approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, } print "opt_scheme" print opt_scheme.get_str(depth = None, display_precision = True, memoization_map = {}) eval_error = self.gappa_engine.get_eval_error_v2(self.opt_engine, opt_scheme, cg_eval_error_copy_map, gappa_filename = "red_arg_%d.g" % i) poly_range = cos(approx_interval+i*pi/S2**frac_pi_index) rel_error_list.append(eval_error / poly_range) #for rel_error in rel_error_list: # print sup(abs(rel_error)) #return # case 17 #poly17 = poly_object_vector[17] #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision) #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision) #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) half = 2**frac_pi_index
def generate_scheme(self): # declaring CodeFunction and retrieving input variable vx = Abs(self.implementation.add_input_variable("x", self.precision), tag="vx") Log.report(Log.Info, "generating implementation scheme") if self.debug_flag: Log.report(Log.Info, "debug has been enabled") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) debug_precision = { ML_Binary32: debug_ftox, ML_Binary64: debug_lftolx }[self.precision] test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=True, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=True, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=True, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=True, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input sollya_precision = self.precision.get_sollya_object() hi_precision = self.precision.get_field_size() - 3 # argument reduction frac_pi_index = 3 frac_pi = round(S2**frac_pi_index / pi, sollya_precision, sollya.RN) inv_frac_pi = round(pi / S2**frac_pi_index, hi_precision, sollya.RN) inv_frac_pi_lo = round(pi / S2**frac_pi_index - inv_frac_pi, sollya_precision, sollya.RN) # computing k = E(x * frac_pi) vx_pi = Multiplication(vx, frac_pi, precision=self.precision) k = NearestInteger(vx_pi, precision=ML_Int32, tag="k", debug=True) fk = Conversion(k, precision=self.precision, tag="fk") inv_frac_pi_cst = Constant(inv_frac_pi, tag="inv_frac_pi", precision=self.precision) inv_frac_pi_lo_cst = Constant(inv_frac_pi_lo, tag="inv_frac_pi_lo", precision=self.precision) red_vx_hi = (vx - inv_frac_pi_cst * fk) red_vx_hi.set_attributes(tag="red_vx_hi", debug=debug_precision, precision=self.precision) red_vx_lo_sub = inv_frac_pi_lo_cst * fk red_vx_lo_sub.set_attributes(tag="red_vx_lo_sub", debug=debug_precision, unbreakable=True, precision=self.precision) vx_d = Conversion(vx, precision=ML_Binary64, tag="vx_d") pre_red_vx = red_vx_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d_hi = (vx_d - inv_frac_pi_cst * fk) pre_red_vx_d_hi.set_attributes(tag="pre_red_vx_d_hi", precision=ML_Binary64, debug=debug_lftolx) pre_red_vx_d = pre_red_vx_d_hi - inv_frac_pi_lo_cst * fk pre_red_vx_d.set_attributes(tag="pre_red_vx_d", debug=debug_lftolx, precision=ML_Binary64) modk = Modulo(k, 2**(frac_pi_index + 1), precision=ML_Int32, tag="switch_value", debug=True) sel_c = Equal(BitLogicAnd(modk, 2**(frac_pi_index - 1)), 2**(frac_pi_index - 1)) red_vx = Select(sel_c, -pre_red_vx, pre_red_vx) red_vx.set_attributes(tag="red_vx", debug=debug_precision, precision=self.precision) red_vx_d = Select(sel_c, -pre_red_vx_d, pre_red_vx_d) red_vx_d.set_attributes(tag="red_vx_d", debug=debug_lftolx, precision=ML_Binary64) approx_interval = Interval(-pi / (S2**(frac_pi_index + 1)), pi / S2**(frac_pi_index + 1)) Log.report(Log.Info, "approx interval: %s\n" % approx_interval) error_goal_approx = S2**-self.precision.get_precision() Log.report(Log.Info, "building mathematical polynomial") poly_degree_vector = [None] * 2**(frac_pi_index + 1) error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme index_relative = [] poly_object_vector = [None] * 2**(frac_pi_index + 1) for i in range(2**(frac_pi_index + 1)): sub_func = cos(sollya.x + i * pi / S2**frac_pi_index) degree = int( sup(guessdegree(sub_func, approx_interval, error_goal_approx))) + 1 degree_list = range(degree + 1) a_interval = approx_interval if i == 0: # ad-hoc, TODO: to be cleaned degree = 6 degree_list = range(0, degree + 1, 2) elif i % 2**(frac_pi_index) == 2**(frac_pi_index - 1): # for pi/2 and 3pi/2, an approx to sin=cos(pi/2+x) # must be generated degree_list = range(1, degree + 1, 2) if i == 3 or i == 5 or i == 7 or i == 9: precision_list = [sollya.binary64 ] + [sollya.binary32] * (degree) else: precision_list = [sollya.binary32] * (degree + 1) poly_degree_vector[i] = degree constraint = sollya.absolute delta = (2**(frac_pi_index - 3)) centered_i = (i % 2**(frac_pi_index)) - 2**(frac_pi_index - 1) if centered_i < delta and centered_i > -delta and centered_i != 0: constraint = sollya.relative index_relative.append(i) Log.report( Log.Info, "generating approximation for %d/%d" % (i, 2**(frac_pi_index + 1))) poly_object_vector[ i], _ = Polynomial.build_from_approximation_with_error( sub_func, degree_list, precision_list, a_interval, constraint, error_function=error_function) # unified power map for red_sx^n upm = {} rel_error_list = [] poly_scheme_vector = [None] * (2**(frac_pi_index + 1)) for i in range(2**(frac_pi_index + 1)): poly_object = poly_object_vector[i] poly_precision = self.precision if i == 3 or i == 5 or i == 7 or i == 9: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * red_vx) poly_hi.set_precision(ML_Binary64) red_vx_d_2 = red_vx_d * red_vx_d poly_scheme = poly_hi + red_vx_d_2 * polynomial_scheme_builder( poly_object.sub_poly(start_index=2, offset=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_attributes(unbreakable=True) elif i == 4: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=ML_Binary64) poly_scheme = c1 * red_vx_d + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), red_vx, unified_precision=self.precision, power_map_=upm) poly_scheme.set_precision(ML_Binary64) else: poly_scheme = polynomial_scheme_builder( poly_object, red_vx, unified_precision=poly_precision, power_map_=upm) #if i == 3: # c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision = self.precision) # c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision = self.precision) # poly_scheme = (c0 + c1 * red_vx) + polynomial_scheme_builder(poly_object.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) poly_scheme.set_attributes(tag="poly_cos%dpi%d" % (i, 2**(frac_pi_index)), debug=debug_precision) poly_scheme_vector[i] = poly_scheme #try: if is_gappa_installed() and i == 3: opt_scheme = self.opt_engine.optimization_process( poly_scheme, self.precision, copy=True, fuse_fma=self.fuse_fma) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_scheme, tag_map) gappa_vx = Variable("red_vx", precision=self.precision, interval=approx_interval) cg_eval_error_copy_map = { tag_map["red_vx"]: gappa_vx, tag_map["red_vx_d"]: gappa_vx, } print "opt_scheme" print opt_scheme.get_str(depth=None, display_precision=True, memoization_map={}) eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_scheme, cg_eval_error_copy_map, gappa_filename="red_arg_%d.g" % i) poly_range = cos(approx_interval + i * pi / S2**frac_pi_index) rel_error_list.append(eval_error / poly_range) #for rel_error in rel_error_list: # print sup(abs(rel_error)) #return # case 17 #poly17 = poly_object_vector[17] #c0 = Constant(coeff(poly17.get_sollya_object(), 0), precision = self.precision) #c1 = Constant(coeff(poly17.get_sollya_object(), 1), precision = self.precision) #poly_scheme_vector[17] = FusedMultiplyAdd(c1, red_vx, c0, specifier = FusedMultiplyAdd.Standard) + polynomial_scheme_builder(poly17.sub_poly(start_index = 2), red_vx, unified_precision = self.precision, power_map_ = upm) half = 2**frac_pi_index sub_half = 2**(frac_pi_index - 1) # determine if the reduced input is within the second and third quarter (not first nor fourth) # to negate the cosine output factor_cond = BitLogicAnd(BitLogicXor( BitLogicRightShift(modk, frac_pi_index), BitLogicRightShift(modk, frac_pi_index - 1)), 1, tag="factor_cond", debug=True) CM1 = Constant(-1, precision=self.precision) C1 = Constant(1, precision=self.precision) factor = Select(factor_cond, CM1, C1, tag="factor", debug=debug_precision) factor2 = Select(Equal(modk, Constant(sub_half)), CM1, C1, tag="factor2", debug=debug_precision) switch_map = {} if 0: for i in range(2**(frac_pi_index + 1)): switch_map[i] = Return(poly_scheme_vector[i]) else: for i in range(2**(frac_pi_index - 1)): switch_case = (i, half - i) #switch_map[i] = Return(poly_scheme_vector[i]) #switch_map[half-i] = Return(-poly_scheme_vector[i]) if i != 0: switch_case = switch_case + (half + i, 2 * half - i) #switch_map[half+i] = Return(-poly_scheme_vector[i]) #switch_map[2*half-i] = Return(poly_scheme_vector[i]) if poly_scheme_vector[i].get_precision() != self.precision: poly_result = Conversion(poly_scheme_vector[i], precision=self.precision) else: poly_result = poly_scheme_vector[i] switch_map[switch_case] = Return(factor * poly_result) #switch_map[sub_half] = Return(-poly_scheme_vector[sub_half]) #switch_map[half + sub_half] = Return(poly_scheme_vector[sub_half]) switch_map[(sub_half, half + sub_half)] = Return( factor2 * poly_scheme_vector[sub_half]) result = SwitchBlock(modk, switch_map) ####################################################################### # LARGE ARGUMENT MANAGEMENT # # (lar: Large Argument Reduction) # ####################################################################### # payne and hanek argument reduction for large arguments #red_func_name = "payne_hanek_cosfp32" # "payne_hanek_fp32_asm" red_func_name = "payne_hanek_fp32_asm" payne_hanek_func_op = FunctionOperator( red_func_name, arg_map={0: FO_Arg(0)}, require_header=["support_lib/ml_red_arg.h"]) payne_hanek_func = FunctionObject(red_func_name, [ML_Binary32], ML_Binary64, payne_hanek_func_op) payne_hanek_func_op.declare_prototype = payne_hanek_func #large_arg_red = FunctionCall(payne_hanek_func, vx) large_arg_red = payne_hanek_func(vx) red_bound = S2**20 cond = Abs(vx) >= red_bound cond.set_attributes(tag="cond", likely=False) lar_neark = NearestInteger(large_arg_red, precision=ML_Int64) lar_modk = Modulo(lar_neark, Constant(16, precision=ML_Int64), tag="lar_modk", debug=True) # Modulo is supposed to be already performed (by payne_hanek_cosfp32) #lar_modk = NearestInteger(large_arg_red, precision = ML_Int64) pre_lar_red_vx = large_arg_red - Conversion(lar_neark, precision=ML_Binary64) pre_lar_red_vx.set_attributes(precision=ML_Binary64, debug=debug_lftolx, tag="pre_lar_red_vx") lar_red_vx = Conversion(pre_lar_red_vx, precision=self.precision, debug=debug_precision, tag="lar_red_vx") lar_red_vx_lo = Conversion( pre_lar_red_vx - Conversion(lar_red_vx, precision=ML_Binary64), precision=self.precision) lar_red_vx_lo.set_attributes(tag="lar_red_vx_lo", precision=self.precision) lar_k = 3 # large arg reduction Universal Power Map lar_upm = {} lar_switch_map = {} approx_interval = Interval(-0.5, 0.5) for i in range(2**(lar_k + 1)): frac_pi = pi / S2**lar_k func = cos(frac_pi * i + frac_pi * sollya.x) degree = 6 error_mode = sollya.absolute if i % 2**(lar_k) == 2**(lar_k - 1): # close to sin(x) cases func = -sin(frac_pi * x) if i == 2**(lar_k - 1) else sin(frac_pi * x) degree_list = range(0, degree + 1, 2) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func / x, degree_list, precision_list, approx_interval, error_mode) poly_object = poly_object.sub_poly(offset=-1) else: degree_list = range(degree + 1) precision_list = [sollya.binary32] * len(degree_list) poly_object, _ = Polynomial.build_from_approximation_with_error( func, degree_list, precision_list, approx_interval, error_mode) if i == 3 or i == 5 or i == 7 or i == 9 or i == 11 or i == 13: poly_precision = ML_Binary64 c0 = Constant(coeff(poly_object.get_sollya_object(), 0), precision=ML_Binary64) c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) poly_hi = (c0 + c1 * lar_red_vx) poly_hi.set_precision(ML_Binary64) pre_poly_scheme = poly_hi + polynomial_scheme_builder( poly_object.sub_poly(start_index=2), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) pre_poly_scheme.set_attributes(precision=ML_Binary64) poly_scheme = Conversion(pre_poly_scheme, precision=self.precision) elif i == 4 or i == 12: c1 = Constant(coeff(poly_object.get_sollya_object(), 1), precision=self.precision) c3 = Constant(coeff(poly_object.get_sollya_object(), 3), precision=self.precision) c5 = Constant(coeff(poly_object.get_sollya_object(), 5), precision=self.precision) poly_hi = polynomial_scheme_builder( poly_object.sub_poly(start_index=3), lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) poly_hi.set_attributes(tag="poly_lar_%d_hi" % i, precision=ML_Binary64) poly_scheme = Conversion(FusedMultiplyAdd( c1, lar_red_vx, poly_hi, precision=ML_Binary64) + c1 * lar_red_vx_lo, precision=self.precision) else: poly_scheme = polynomial_scheme_builder( poly_object, lar_red_vx, unified_precision=self.precision, power_map_=lar_upm) # poly_scheme = polynomial_scheme_builder(poly_object, lar_red_vx, unified_precision = self.precision, power_map_ = lar_upm) poly_scheme.set_attributes(tag="lar_poly_%d" % i, debug=debug_precision) lar_switch_map[(i, )] = Return(poly_scheme) lar_result = SwitchBlock(lar_modk, lar_switch_map) # main scheme #Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") # scheme = Statement(ConditionBlock(cond, lar_result, result)) Log.report(Log.Info, "Construction of the initial MDL scheme") scheme = Statement(pre_red_vx_d, red_vx_lo_sub, ConditionBlock(cond, lar_result, result)) return scheme
def generate_scheme(self): x = self.implementation.add_input_variable("x", self.precision) n_hpi = self.precision.round_sollya_object(sollya.pi / 2, sollya.RN) if not self.skip_reduction: abs_x = Abs(x, tag="abs_x") n_invhpi = self.precision.round_sollya_object( 2 / sollya.pi, sollya.RN) invhpi = Constant(n_invhpi, tag="invhpi") unround_k = Multiplication(abs_x, invhpi, tag="unround_k") k = Floor(unround_k, precision=self.precision, tag="k") hpi = Constant(n_hpi, tag="hpi") whole = Multiplication(k, hpi, tag="whole") r = Subtraction(abs_x, whole, tag="r") ik = Conversion(k, precision=ML_Binary32.get_integer_format(), tag="ik") part = Modulo(ik, 4, precision=ML_Binary32.get_integer_format(), tag="part") pre_part = Modulo(part, 2, precision=ML_Binary32.get_integer_format(), tag="pre_part") flip = Subtraction(hpi, r, tag="flip") do_flip = Equal(pre_part, 0, tag="do_flip") z = Select(do_flip, r, flip) else: z = x approx_interval = sollya.Interval(-2**-10, n_hpi + 2**-10) approx_func = sollya.cos(sollya.x) builder = Polynomial.build_from_approximation sollya.settings.prec = 2**10 poly_object = builder(approx_func, range(0, self.poly_degree + 1, 2), [self.precision] * (self.poly_degree + 1), approx_interval, sollya.relative) self.poly_object = poly_object schemer = PolynomialSchemeEvaluator.generate_horner_scheme poly = schemer(poly_object, z) self.poly = poly if not self.skip_reduction: post_bool = LogicalOr(Equal(part, 1, tag="part_eq_1"), Equal(part, 2, tag="part_eq_2")) flipped_poly = Select(post_bool, -poly, poly) retval = flipped_poly else: retval = poly scheme = Return(retval, precision=self.precision) return scheme
def generate_reduction_fptaylor(x): # get sign and abs_x, must be the same at endpoints if sollya.sup(x) <= 0: abs_x_expr = "-x" abs_x = -x elif sollya.inf(x) >= 0: abs_x_expr = "x" abs_x = x else: assert False, "Interval must not straddle 0" # get k, must be the same at endpoints unround_k = abs_x * n_invhpi k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if k_low != k_high: assert False, "Interval must not straddle multples of pi/2" k = int(k_low) part = k % 4 pre_part = part % 2 r_expr = "abs_x - whole" r = abs_x - k * n_hpi if pre_part == 0: z_expr = "r" z = r else: z_expr = "{} - r".format(n_hpi) z = n_hpi - r if part >= 1: flipped_poly_expr = "-poly" else: flipped_poly_expr = "poly" x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " abs_x rnd64= {};".format(abs_x_expr), " whole rnd64= {} * {};".format(k, n_hpi), " r rnd64= abs_x - whole;", " z rnd64= {};".format(z_expr), " poly rnd64= {};".format(poly_expr), " flipped_poly rnd64= {};".format(flipped_poly_expr), " retval rnd64= flipped_poly;", "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.cos(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.cos(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
import sollya import random import itertools period = 2 + 10.0 * random.random() print("period={}".format(float(2 * sollya.pi(100) / period))) f = lambda x: sollya.cos(period * x) def diff(func, x0, u=0.00001): return (func(x0 + u) - func(x0)) / u def search_equality(func, x0, xstart, epsilon=0.001, step=0.0001): f_x0 = func(x0) h = func(xstart) - f_x0 while abs(h) > epsilon: dxs = diff(func, xstart) if dxs > 0 and h > 0: xstart -= step elif dxs > 0 and h < 0: xstart += step elif dxs < 0 and h > 0: xstart += step else: xstart -= step h = func(xstart) - f_x0 return x0, xstart
def generate_scheme(self): x = self.implementation.add_input_variable("x", self.precision) n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN) if not self.skip_reduction: abs_x = Abs(x, tag="abs_x") n_invpi = self.precision.round_sollya_object( 1 / sollya.pi, sollya.RN) invpi = Constant(n_invpi, tag="invpi") unround_k = Multiplication(abs_x, invpi, tag="unround_k") k = Floor(unround_k, precision=self.precision, tag="k") pi = Constant(n_pi, tag="pi") whole = Multiplication(k, pi, tag="whole") r = Subtraction(abs_x, whole, tag="r") ik = Conversion(k, precision=ML_Binary32.get_integer_format(), tag="ik") part = Modulo(ik, 2, precision=ML_Binary32.get_integer_format(), tag="part") z = r else: z = x approx_interval = sollya.Interval(-2**-7, n_pi + 2**-7) approx_func = sollya.cos(sollya.x) builder = Polynomial.build_from_approximation for p in range(10, 20): try: sollya.settings.prec = 2**p poly_object = builder(approx_func, range(0, self.poly_degree + 1, 2), [self.precision] * (self.poly_degree + 1), approx_interval, sollya.relative) except SollyaError: continue if str(poly_object.get_sollya_object()) == "0": continue break self.poly_object = poly_object schemer = PolynomialSchemeEvaluator.generate_horner_scheme poly = schemer(poly_object, z) print("LSKDFKLJK", poly_object.get_sollya_object()) self.poly = poly if not self.skip_reduction: post_bool = Equal(part, 1, tag="part_eq_1") flipped_poly = Select(post_bool, -poly, poly) retval = flipped_poly else: retval = poly scheme = Return(retval, precision=self.precision) return scheme