def sollya_gamma_fct(x, diff_order, prec): """ wrapper to use bigfloat implementation of exponential rather than sollya's implementation directly. This wrapper implements sollya's function API. :param x: numerical input value (may be an Interval) :param diff_order: differential order :param prec: numerical precision expected (min) """ fct = None if diff_order == 0: fct = sollya_gamma elif diff_order == 1: fct = sollya_gamma_d0 elif diff_order == 2: fct = sollya_gamma_d1 else: raise NotImplementedError with bigfloat.precision(prec): if x.is_range(): lo = sollya.inf(x) hi = sollya.sup(x) return sollya.Interval(fct(lo), fct(hi)) else: return fct(x)
def get_value_str(self, value): if value is Gappa_Unknown: return "?" elif isinstance(value, sollya.SollyaObject) and value.is_range(): return "[%s, %s]" % (sollya.inf(value), sollya.sup(value)) else: return str(value)
def generate_test_case(self, input_signals, io_map, index, test_range=Interval(-1.0, 1.0)): """ generic test case generation: generate a random input with index @p index Args: index (int): integer index of the test case Returns: dict: mapping (input tag -> numeric value) """ # extracting test interval boundaries low_input = sollya.inf(test_range) high_input = sollya.sup(test_range) input_values = {} for input_tag in input_signals: input_signal = io_map[input_tag] # FIXME: correct value generation depending on signal precision input_precision = input_signal.get_precision().get_base_format() if isinstance(input_precision, ML_FP_Format): input_value = generate_random_fp_value(input_precision, low_input, high_input) elif isinstance(input_precision, ML_Fixed_Format): # TODO: does not depend on low and high range bounds input_value = generate_random_fixed_value(input_precision) else: input_value = random.randrange( 2**input_precision.get_bit_size()) # registering input value input_values[input_tag] = input_value return input_values
def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " r rnd64= x;", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.exp(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def get_integer_format(backend, optree): """ return integer format to use for optree """ int_range = optree.get_interval() if int_range == None: return backend.default_integer_format elif inf(int_range) < 0: # signed if sup(int_range) > 2**31 - 1 or inf(int_range) < -2**31: return ML_Int64 else: return ML_Int32 else: # unsigned if sup(int_range) >= 2**32 - 1: return ML_UInt64 else: return ML_UInt32
def inf(obj): """ generic getter for interval inferior bound """ if isinstance(obj, SollyaObject) and obj.is_range(): return sollya.inf(obj) elif isinstance(obj, (MetaInterval, MetaIntervalList)): return obj.inf else: raise NotImplementedError
def test_interval_out_of_bound_risk(x_range, y_range): """ Try to determine from x and y's interval if there is a risk of underflow or overflow """ div_range = abs(x_range / y_range) underflow_risk = sollya.inf(div_range) < S2**( self.precision.get_emin_normal() + 2) overflow_risk = sollya.sup(div_range) > S2**( self.precision.get_emax() - 2) return underflow_risk or overflow_risk
def is_simplifiable_to_cst(node): """ node can be simplified to a constant """ node_interval = node.get_interval() if node_interval is None or isinstance(node, Constant): return False elif isinstance(node_interval, SollyaObject) and node_interval.is_range(): return sollya.inf(node_interval) == sollya.sup(node_interval) elif isinstance(node_interval, (MetaInterval, MetaIntervalList)): return not node_interval.is_empty and (node_interval.inf == node_interval.sup) else: return False
def split_domain(starting_domain, slivers): in_domains = [starting_domain] # abs out_domains = list() for I in in_domains: if sollya.inf(I) < 0 and sollya.sup(I) > 0: out_domains.append(sollya.Interval(sollya.inf(I), 0)) out_domains.append(sollya.Interval(0, sollya.sup(I))) else: out_domains.append(I) in_domains = out_domains # k out_domains = list() while len(in_domains) > 0: I = in_domains.pop() #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I)))) unround_mult = I * n_invpi mult_low = sollya.floor(sollya.inf(unround_mult)) mult_high = sollya.floor(sollya.sup(unround_mult)) if mult_low == mult_high or (mult_low == -1 and mult_high == 0): #print(" accepted") out_domains.append(I) continue if sollya.sup(I) <= 0: divider_low = (mult_low + 1) * n_pi divider_high = divider_low - divider_low * 2**-53 else: divider_high = (mult_low + 1) * n_pi divider_low = divider_high - divider_high * 2**-53 lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(lower_part) in_domains.append(upper_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x))) in_domains = [ d for d in in_domains if sollya.inf(d) != sollya.sup(d) ] return in_domains
def addsub_signed_predicate(lhs, lhs_prec, rhs, rhs_prec, op=operator.__sub__, default=True): """ determine whether subtraction output on a signed or unsigned format """ left_range = evaluate_range(lhs) right_range = evaluate_range(rhs) result_range = safe(op)(left_range, right_range) if result_range is None: return default elif sollya.inf(result_range) < 0: return True else: return False
def simplify(self, node): def get_node_input(index): # look for input into simpifield list # and return directly node input if simplified input is None return node.get_input(index) result = None if node in self.memoization_map: return self.memoization_map[node] else: if not is_leaf_node(node): for index, op in enumerate(node.inputs): new_op = self.simplify(op) # replacing modified inputs if not new_op is None: node.set_input(index, new_op) if is_simplifiable_to_cst(node): new_node = Constant( sollya.inf(node.get_interval()), precision=node.get_precision() ) forward_attributes(node, new_node) result = new_node elif isinstance(node, Multiplication) and is_simplifiable_multiplication(node, get_node_input(0), get_node_input(1)): result = simplify_multiplication(node) elif isinstance(node, Min): simplified_min = is_simplifiable_min(node, get_node_input(0), get_node_input(1)) if simplified_min: result = simplified_min elif isinstance(node, Max): simplified_max = is_simplifiable_max(node, get_node_input(0), get_node_input(1)) if simplified_max: result = simplified_max elif isinstance(node, Comparison): cmp_value = is_simplifiable_cmp(node, get_node_input(0), get_node_input(1)) if cmp_value is BooleanValue.AlwaysTrue: result = generate_uniform_cst(True, node.get_precision()) elif cmp_value is BooleanValue.AlwaysFalse: result = generate_uniform_cst(False, node.get_precision()) elif isinstance(node, Test): test_value = is_simplifiable_test(node, node.inputs) if test_value is BooleanValue.AlwaysTrue: result = generate_uniform_cst(True, node.get_precision()) elif test_value is BooleanValue.AlwaysFalse: result = generate_uniform_cst(False, node.get_precision()) elif isinstance(node, ConditionBlock): result = simplify_condition_block(node) elif isinstance(node, LogicOperation): result = simplify_logical_op(node) if not result is None: Log.report(LOG_VERBOSE_NUMERICAL_SIMPLIFICATION, "{} has been simplified to {}", node, result) self.memoization_map[node] = result return result
def get_value_str(self, value): if value is Gappa_Unknown: return "?" elif isinstance(value, MetaInterval): return self.get_value_str(value.interval) elif isinstance(value, MetaIntervalList): # MetaIntervalList should have been catched early and # should have generated a disjonction of cases raise NotImplementedError elif isinstance(value, sollya.SollyaObject) and value.is_range(): return "[%s, %s]" % (sollya.inf(value), sollya.sup(value)) else: return str(value)
def findMaxIssue(res): """ Find the issue with the maximum error Parameters: - res: (sollya object) result from the checkModulusFilterInSpecification function Returns the maximum value (0 if not available) """ maxError = 0 for b in dict(res)["results"]: # for every band okay = dict(b)["okay"] if not okay: for i in dict(b)["issue"]: # for every issues H = dict(i)["H"] betaInf = dict(dict(i)["specification"])["betaInf"] betaSup = dict(dict(i)["specification"])["betaSup"] if sollya.inf(H) > betaSup: maxError = sollya.max(maxError, sollya.sup(H) - betaSup) else: maxError = sollya.max(maxError, betaSup - sollya.inf(H)) return maxError
def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [float(sollya.inf(domain)), float(sollya.sup(domain)),], "epsilon": float(epsilon), "operation": "log" } return d
def split_domain(starting_domain, slivers): in_domains = [starting_domain] out_domains = list() while len(in_domains) > 0: I = in_domains.pop() unround_e = sollya.log2(I) e_low = sollya.floor(sollya.inf(unround_e)) e_high = sollya.floor(sollya.sup(unround_e)) #print("in: [{}, {}] ({}, {})".format(float(sollya.inf(I)), float(sollya.sup(I)), int(e_low), int(e_high))) if e_low == e_high: #print(" accepted") out_domains.append(I) continue e_range = sollya.Interval(e_low, e_low+1) I_range = 2**e_range for _ in range(100): mid = sollya.mid(I_range) e = sollya.floor(sollya.log2(mid)) if e == e_low: I_range = sollya.Interval(mid, sollya.sup(I_range)) else: I_range = sollya.Interval(sollya.inf(I_range), mid) divider_high = sollya.sup(I_range) divider_low = sollya.inf(I_range) lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(upper_part) in_domains.append(lower_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x:float(sollya.inf(x))) in_domains = [d for d in in_domains if sollya.inf(d) != sollya.sup(d)] return in_domains
def solve_format_shift(optree): """ Legalize shift node """ assert isinstance(optree, BitLogicRightShift) or isinstance( optree, BitLogicLeftShift) shift_input = optree.get_input(0) shift_input_precision = shift_input.get_precision() shift_amount = optree.get_input(1) shift_amount_prec = shift_amount.get_precision() if is_fixed_point(shift_amount_prec): sa_range = evaluate_range(shift_amount) if sollya.inf(sa_range) < 0: Log.report( Log.Error, "shift amount of {} may be negative {}\n".format( optree, sa_range)) if is_fixed_point(shift_input_precision): return shift_input_precision else: return optree.get_precision()
def get_precision_rng(precision, value_range=None): if value_range is None: # default full-range value generation base_format = precision.get_base_format() if isinstance(base_format, ML_FP_MultiElementFormat): return MPFPRandomGen(precision) elif isinstance(base_format, ML_FP_Format): return FPRandomGen(precision, include_snan=False) elif isinstance(base_format, ML_Fixed_Format): return FixedPointRandomGen(precision) else: Log.report(Log.Error, "unsupported format {}/{} in get_precision_rng", precision, base_format) else: low_bound = sollya.inf(value_range) high_bound = sollya.sup(value_range) return get_precision_rng_with_defined_range(precision, low_bound, high_bound)
def generate_scheme(self): ## convert @p value from an input floating-point precision # @p in_precision to an output support format @p out_precision io_precision = self.precision # declaring main input variable vx = self.implementation.add_input_signal("x", io_precision) # rounding mode input rnd_mode = self.implementation.add_input_signal( "rnd_mode", rnd_mode_format) # size of most significant table index (for linear slope tabulation) alpha = self.alpha # 6 # size of medium significant table index (for initial value table index LSB) beta = self.beta # 5 # size of least significant table index (for linear offset tabulation) gamma = self.gamma # 5 guard_bits = self.guard_bits # 3 vx.set_interval(self.interval) range_hi = sollya.sup(self.interval) range_lo = sollya.inf(self.interval) f_hi = self.function(range_hi) f_lo = self.function(range_lo) # fixed by format used for reduced_x range_size = range_hi - range_lo range_size_log2 = int(sollya.log2(range_size)) assert 2**range_size_log2 == range_size print("range_size_log2={}".format(range_size_log2)) reduced_x = Conversion(BitLogicRightShift(vx - range_lo, range_size_log2), precision=fixed_point(0, alpha + beta + gamma, signed=False), tag="reduced_x", debug=debug_fixed) alpha_index = get_fixed_slice(reduced_x, 0, alpha - 1, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromMSBToLSB, tag="alpha_index", debug=debug_std) gamma_index = get_fixed_slice(reduced_x, gamma - 1, 0, align_hi=FixedPointPosition.FromLSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="gamma_index", debug=debug_std) beta_index = get_fixed_slice(reduced_x, alpha, gamma, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="beta_index", debug=debug_std) # Assuming monotonic function f_absmax = max(abs(f_hi), abs(f_lo)) f_absmin = min(abs(f_hi), abs(f_lo)) f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1 f_lsb = int(sollya.floor(sollya.log2(f_absmin))) storage_lsb = f_lsb - io_precision.get_bit_size() - guard_bits f_int_size = f_msb f_frac_size = -storage_lsb storage_format = fixed_point(f_int_size, f_frac_size, signed=False) Log.report(Log.Info, "storage_format is {}".format(storage_format)) # table of initial value index tiv_index = Concatenation(alpha_index, beta_index, tag="tiv_index", debug=debug_std) # table of offset value index to_index = Concatenation(alpha_index, gamma_index, tag="to_index", debug=debug_std) tiv_index_size = alpha + beta to_index_size = alpha + gamma Log.report(Log.Info, "initial table structures") table_iv = ML_NewTable(dimensions=[2**tiv_index_size], storage_precision=storage_format, tag="tiv") table_offset = ML_NewTable(dimensions=[2**to_index_size], storage_precision=storage_format, tag="to") slope_table = [None] * (2**alpha) slope_delta = 1.0 / sollya.SollyaObject(2**alpha) delta_u = range_size * slope_delta * 2**-15 Log.report(Log.Info, "computing slope value") for i in range(2**alpha): # slope is computed at the middle of range_size interval slope_x = range_lo + (i + 0.5) * range_size * slope_delta # TODO: gross approximation of derivatives f_xpu = self.function(slope_x + delta_u / 2) f_xmu = self.function(slope_x - delta_u / 2) slope = (f_xpu - f_xmu) / delta_u slope_table[i] = slope range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size) Log.report(Log.Info, "computing value for initial-value table") for i in range(2**tiv_index_size): slope_index = i / 2**beta iv_x = range_lo + i * range_rcp_steps * range_size offset_x = 0.5 * range_rcp_steps * range_size # initial value is computed so that the piecewise linear # approximation intersects the function at iv_x + offset_x iv_y = self.function( iv_x + offset_x) - offset_x * slope_table[int(slope_index)] initial_value = storage_format.round_sollya_object(iv_y) table_iv[i] = initial_value # determining table of initial value interval tiv_min = table_iv[0] tiv_max = table_iv[0] for i in range(1, 2**tiv_index_size): tiv_min = min(tiv_min, table_iv[i]) tiv_max = max(tiv_max, table_iv[i]) table_iv.set_interval(Interval(tiv_min, tiv_max)) offset_step = range_size / S2**(alpha + beta + gamma) for i in range(2**alpha): Log.report(Log.Info, "computing offset value for sub-table {}".format(i)) for j in range(2**gamma): to_i = i * 2**gamma + j offset = slope_table[i] * j * offset_step table_offset[to_i] = offset # determining table of offset interval to_min = table_offset[0] to_max = table_offset[0] for i in range(1, 2**(alpha + gamma)): to_min = min(to_min, table_offset[i]) to_max = max(to_max, table_offset[i]) offset_interval = Interval(to_min, to_max) table_offset.set_interval(offset_interval) initial_value = TableLoad(table_iv, tiv_index, precision=storage_format, tag="initial_value", debug=debug_fixed) offset_precision = get_fixed_type_from_interval(offset_interval, 16) print("offset_precision is {} ({} bits)".format( offset_precision, offset_precision.get_bit_size())) table_offset.get_precision().storage_precision = offset_precision # rounding table value for i in range(1, 2**(alpha + gamma)): table_offset[i] = offset_precision.round_sollya_object( table_offset[i]) offset_value = TableLoad(table_offset, to_index, precision=offset_precision, tag="offset_value", debug=debug_fixed) Log.report( Log.Verbose, "initial_value's interval: {}, offset_value's interval: {}".format( evaluate_range(initial_value), evaluate_range(offset_value))) final_add = initial_value + offset_value round_bit = final_add # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB) vr_out = Conversion(initial_value + offset_value, precision=io_precision, tag="vr_out", debug=debug_fixed) self.implementation.add_output_signal("vr_out", vr_out) # Approximation error evaluation approx_error = 0.0 for i in range(2**alpha): for j in range(2**beta): tiv_i = (i * 2**beta + j) # = range_lo + tiv_i * range_rcp_steps * range_size iv = table_iv[tiv_i] for k in range(2**gamma): to_i = i * 2**gamma + k offset = table_offset[to_i] approx_value = offset + iv table_x = range_lo + range_size * ( (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta + gamma) local_error = abs(1 / (table_x) - approx_value) approx_error = max(approx_error, local_error) error_log2 = float(sollya.log2(approx_error)) print("approx_error is {}, error_log2 is {}".format( float(approx_error), error_log2)) # table size table_iv_size = 2**(alpha + beta) table_offset_size = 2**(alpha + gamma) print("tables' size are {} entries".format(table_iv_size + table_offset_size)) return [self.implementation]
def random_log_sample(interval): lo = sollya.inf(interval) hi = sollya.sup(interval)
def generate_expr( self, code_object, optree, folded=False, result_var=None, initial=False, language=None, ## force to store result in a variable, wrapping CodeExpression # in CodeVariable force_variable_storing=False): """ code generation function """ language = self.language if language is None else language # search if <optree> has already been processed if self.has_memoization(optree): result = self.get_memoization(optree) if isinstance(result, CodeExpression) and force_variable_storing: # forcing storing and translation CodeExpression to CodeVariable # if force_variable_storing is set result_precision = result.precision prefix_tag = optree.get_tag( default="var_result" ) if force_variable_storing else "tmp_result" final_var = result_var if result_var else code_object.get_free_var_name( result_precision, prefix=prefix_tag, declare=True) code_object << self.generate_code_assignation( code_object, final_var, result.get()) result = CodeVariable(final_var, result_precision) return result result = None # implementation generation if isinstance(optree, CodeVariable): result = optree elif isinstance(optree, Variable): if optree.get_var_type() is Variable.Local: final_var = code_object.get_free_var_name( optree.get_precision(), prefix=optree.get_tag(), declare=True, var_ctor=Variable) result = CodeVariable(final_var, optree.get_precision()) else: result = CodeVariable(optree.get_tag(), optree.get_precision()) elif isinstance(optree, Signal): if optree.get_var_type() is Variable.Local: final_var = code_object.declare_signal(optree, optree.get_precision(), prefix=optree.get_tag()) result = CodeVariable(final_var, optree.get_precision()) else: result = CodeVariable(optree.get_tag(), optree.get_precision()) elif isinstance(optree, Constant): precision = optree.get_precision() # .get_base_format() if force_variable_storing or self.declare_cst or optree.get_precision( ).is_cst_decl_required(): cst_prefix = "cst" if optree.get_tag( ) is None else optree.get_tag() cst_varname = code_object.declare_cst(optree, prefix=cst_prefix) result = CodeVariable(cst_varname, precision) else: if precision is ML_Integer: result = CodeExpression("%d" % optree.get_value(), precision) else: try: result = CodeExpression( precision.get_cst(optree.get_value(), language=language), precision) except: result = CodeExpression( precision.get_cst(optree.get_value(), language=language), precision) Log.report( Log.Error, "Error during get_cst call for Constant: {} ", optree) # Exception print elif isinstance(optree, Assert): cond = optree.get_input(0) error_msg = optree.get_error_msg() severity = optree.get_severity() cond_code = self.generate_expr(code_object, cond, folded=False, language=language) code_object << " assert {cond} report {error_msg} severity {severity};\n".format( cond=cond_code.get(), error_msg=error_msg, severity=severity.descriptor) return None elif isinstance(optree, Wait): time_ns = optree.get_time_ns() code_object << "wait for {time_ns} ns;\n".format(time_ns=time_ns) return None elif isinstance(optree, SwitchBlock): switch_value = optree.inputs[0] # generating pre_statement self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) switch_value_code = self.generate_expr(code_object, switch_value, folded=folded, language=language) case_map = optree.get_case_map() code_object << "\nswitch(%s) {\n" % switch_value_code.get() for case in case_map: case_value = case case_statement = case_map[case] if isinstance(case_value, tuple): for sub_case in case: code_object << "case %s:\n" % sub_case else: code_object << "case %s:\n" % case code_object.open_level() self.generate_expr(code_object, case_statement, folded=folded, language=language) code_object.close_level() code_object << "}\n" return None elif isinstance(optree, ReferenceAssign): output_var = optree.inputs[0] result_value = optree.inputs[1] output_var_code = self.generate_expr(code_object, output_var, folded=False, language=language) def get_assign_symbol(node): if isinstance(node, Signal): assign_sign = "<=" elif isinstance(node, Variable): assign_sign = ":=" else: Log.report(Log.Error, "unsupported node for assign symbol:\n {}", node) return assign_sign if isinstance(output_var, Signal) or isinstance( output_var, Variable): assign_sign = get_assign_symbol(output_var) elif isinstance(output_var, VectorElementSelection) or isinstance( output_var, SubSignalSelection): select_input = output_var.get_input(0) assign_sign = get_assign_symbol(select_input) else: Log.report(Log.Error, "unsupported node for assign symbol:\n {}", node) if isinstance(result_value, Constant): # generate assignation result_value_code = self.generate_expr(code_object, result_value, folded=folded, language=language) code_object << self.generate_assignation( output_var_code.get(), result_value_code.get(), assign_sign=assign_sign) else: #result_value_code = self.generate_expr(code_object, result_value, folded = True, force_variable_storing = True, language = language) result_value_code = self.generate_expr(code_object, result_value, folded=True, language=language) code_object << self.generate_assignation( output_var_code.get(), result_value_code.get(), assign_sign=assign_sign) if optree.get_debug() and not self.disable_debug: self.generate_debug_msg(result_value, result_value_code, code_object, debug_object=optree.get_debug()) #code_object << self.generate_assignation(output_var_code.get(), result_value_code.get()) #code_object << output_var.get_precision().generate_c_assignation(output_var_code, result_value_code) return None elif isinstance(optree, RangeLoop): iterator = optree.get_input(0) loop_body = optree.get_input(1) loop_range = optree.get_loop_range() specifier = optree.get_specifier() range_pattern = "{lower} to {upper}" if specifier is RangeLoop.Increasing else "{upper} dowto {lower}" range_code = range_pattern.format(lower=sollya.inf(loop_range), upper=sollya.sup(loop_range)) iterator_code = self.generate_expr(code_object, iterator, folded=folded, language=language) code_object << "\n for {iterator} in {loop_range} loop\n".format( iterator=iterator_code.get(), loop_range=range_code) code_object.inc_level() body_code = self.generate_expr(code_object, loop_body, folded=folded, language=language) assert body_code is None code_object.dec_level() code_object << "end loop;\n" return None elif isinstance(optree, Loop): init_statement = optree.inputs[0] exit_condition = optree.inputs[1] loop_body = optree.inputs[2] self.generate_expr(code_object, init_statement, folded=folded, language=language) code_object << "\nfor (;%s;)" % self.generate_expr( code_object, exit_condition, folded=False, language=language).get() code_object.open_level() self.generate_expr(code_object, loop_body, folded=folded, language=language) code_object.close_level() return None elif isinstance(optree, Process): # generating pre_statement for process pre_statement = optree.get_pre_statement() self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) sensibility_list = [ self.generate_expr(code_object, op, folded=True, language=language).get() for op in optree.get_sensibility_list() ] sensibility_list = "({})".format(", ".join( sensibility_list)) if len(sensibility_list) != 0 else "" code_object << "process{}\n".format(sensibility_list) self.open_memoization_level() code_object.open_level( extra_shared_tables=[MultiSymbolTable.SignalSymbol], var_ctor=Variable) for process_stat in optree.inputs: self.generate_expr(code_object, process_stat, folded=folded, initial=False, language=language) code_object.close_level() self.close_memoization_level() code_object << "end process;\n\n" return None elif isinstance(optree, PlaceHolder): first_input = optree.get_input(0) first_input_code = self.generate_expr(code_object, first_input, folded=folded, language=language) for op in optree.get_inputs()[1:]: _ = self.generate_expr(code_object, op, folded=folded, language=language) result = first_input_code elif isinstance(optree, ComponentInstance): component_object = optree.get_component_object() component_name = component_object.get_name() code_object.declare_component(component_name, component_object) io_map = optree.get_io_map() component_tag = optree.get_tag() if component_tag is None: component_tag = "{component_name}_i{instance_id}".format( component_name=component_name, instance_id=optree.get_instance_id()) # component tag uniquifying component_tag = code_object.get_free_name(component_object, prefix=component_tag) mapped_io = {} for io_tag in io_map: mapped_io[io_tag] = self.generate_expr(code_object, io_map[io_tag], folded=True, language=language) code_object << "\n{component_tag} : {component_name}\n".format( component_name=component_name, component_tag=component_tag) code_object << " port map (\n" code_object << " " + ", \n ".join( "{} => {}".format(io_tag, mapped_io[io_tag].get()) for io_tag in mapped_io) code_object << "\n);\n" return None elif isinstance(optree, ConditionBlock): condition = optree.inputs[0] if_branch = optree.inputs[1] else_branch = optree.inputs[2] if len(optree.inputs) > 2 else None # generating pre_statement self.generate_expr(code_object, optree.get_pre_statement(), folded=folded, language=language) cond_code = self.generate_expr(code_object, condition, folded=False, language=language) try: cond_likely = condition.get_likely() except AttributeError: Log.report( Log.Error, "The following condition has no (usable) likely attribute: {}", condition) code_object << "if %s then\n " % cond_code.get() code_object.inc_level() if_branch_code = self.generate_expr(code_object, if_branch, folded=False, language=language) code_object.dec_level() if else_branch: code_object << " else\n " code_object.inc_level() else_branch_code = self.generate_expr(code_object, else_branch, folded=True, language=language) code_object.dec_level() else: # code_object << "\n" pass code_object << "end if;\n" return None elif isinstance(optree, Select): # we go through all of select operands to # flatten the select tree def flatten_select(op, cond=None): """ Process recursively a Select operation to build a list of tuple (result, condition) """ if not isinstance(op, Select): return [(op, cond)] lcond = op.inputs[0] if cond is None else LogicalAnd( op.inputs[0], cond, precision=cond.get_precision()) return flatten_select(op.inputs[1], lcond) + flatten_select( op.inputs[2], cond) def legalize_select_input(select_input): if select_input.get_precision().get_bit_size( ) != optree.get_precision().get_bit_size(): return Conversion(select_input, precision=optree.get_precision()) else: return select_input prefix = optree.get_tag(default="setmp") result_varname = result_var if result_var != None else code_object.get_free_var_name( optree.get_precision(), prefix=prefix) result = CodeVariable(result_varname, optree.get_precision()) select_opcond_list = flatten_select(optree) if not select_opcond_list[-1][1] is None: Log.report( Log.Error, "last condition in flatten select differs from None") gen_list = [] for op, cond in select_opcond_list: op = legalize_select_input(op) op_code = self.generate_expr(code_object, op, folded=folded, language=language) if not cond is None: cond_code = self.generate_expr(code_object, cond, folded=True, force_variable_storing=True, language=language) gen_list.append((op_code, cond_code)) else: gen_list.append((op_code, None)) code_object << "{result} <= \n".format(result=result.get()) code_object.inc_level() for op_code, cond_code in gen_list: if not cond_code is None: code_object << "{op_code} when {cond_code} else\n".format( op_code=op_code.get(), cond_code=cond_code.get()) else: code_object << "{op_code};\n".format(op_code=op_code.get()) code_object.dec_level() elif isinstance(optree, TableLoad): table = optree.get_input(0) index = optree.get_input(1) index_code = self.generate_expr(code_object, index, folded=folded, language=language) prefix = optree.get_tag(default="table_value") result_varname = result_var if result_var != None else code_object.get_free_var_name( optree.get_precision(), prefix=prefix) result = CodeVariable(result_varname, optree.get_precision()) code_object << "with {index} select {result} <=\n".format( index=index_code.get(), result=result.get()) table_dimensions = table.get_precision().get_dimensions() assert len(table_dimensions) == 1 table_size = table_dimensions[0] default_value = 0 # linearizing table selection for tabid, value in enumerate(table.get_data()): code_object << "\t{} when {},\n".format( table.get_precision().get_storage_precision().get_cst( value), index.get_precision().get_cst(tabid)) code_object << "\t{} when others;\n".format(table.get_precision( ).get_storage_precision().get_cst(default_value)) # result is set elif isinstance(optree, Return): return_result = optree.inputs[0] return_code = self.generate_expr(code_object, return_result, folded=folded, language=language) code_object << "return %s;\n" % return_code.get() return None #return_code elif isinstance(optree, ExceptionOperation): if optree.get_specifier() in [ ExceptionOperation.RaiseException, ExceptionOperation.ClearException, ExceptionOperation.RaiseReturn ]: result_code = self.processor.generate_expr( self, code_object, optree, optree.inputs, folded=False, result_var=result_var, language=language) code_object << "%s;\n" % result_code.get() if optree.get_specifier() == ExceptionOperation.RaiseReturn: if self.libm_compliant: # libm compliant exception management code_object.add_header( "support_lib/ml_libm_compatibility.h") return_value = self.generate_expr( code_object, optree.get_return_value(), folded=folded, language=language) arg_value = self.generate_expr(code_object, optree.get_arg_value(), folded=folded, language=language) function_name = optree.function_name exception_list = [ op.get_value() for op in optree.inputs ] if ML_FPE_Inexact in exception_list: exception_list.remove(ML_FPE_Inexact) if len(exception_list) > 1: raise NotImplementedError if ML_FPE_Overflow in exception_list: code_object << "return ml_raise_libm_overflowf(%s, %s, \"%s\");\n" % ( return_value.get(), arg_value.get(), function_name) elif ML_FPE_Underflow in exception_list: code_object << "return ml_raise_libm_underflowf(%s, %s, \"%s\");\n" % ( return_value.get(), arg_value.get(), function_name) elif ML_FPE_Invalid in exception_list: code_object << "return %s;\n" % return_value.get() else: return_precision = optree.get_return_value( ).get_precision() self.generate_expr(code_object, Return(optree.get_return_value(), precision=return_precision), folded=folded, language=language) return None else: result = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded=folded, result_var=result_var, language=language) elif isinstance(optree, NoResultOperation): result_code = self.processor.generate_expr(self, code_object, optree, optree.inputs, folded=False, result_var=result_var, language=language) code_object << "%s;\n" % result_code.get() return None elif isinstance(optree, Statement): for op in optree.inputs: if not self.has_memoization(op): self.generate_expr(code_object, op, folded=folded, initial=True, language=language) return None else: # building ordered list of required node by depth working_list = [op for op in optree.get_inputs()] processing_list = [op for op in working_list] resolved = {} while working_list != []: op = working_list.pop(0) # node has already been processed: SKIP if op in resolved: continue if isinstance(op, ML_Table): # ML_Table instances are skipped (should be generated directly by TableLoad) continue elif isinstance(op, ML_LeafNode): processing_list.append(op) else: memo = self.get_memoization(op) if not memo is None: # node has already been generated: STOP HERE resolved[op] = memo else: # enqueue node to be processed processing_list.append(op) # enqueue node inputs working_list += [op for op in op.get_inputs()] resolved[op] = memo # processing list in reverse order (starting with deeper node to avoid too much recursion) for op in processing_list[::-1]: _ = self.generate_expr(code_object, op, folded=folded, initial=initial, language=language) # processing main node generate_pre_process = self.generate_clear_exception if optree.get_clearprevious( ) else None result = self.processor.generate_expr( self, code_object, optree, optree.inputs, generate_pre_process=generate_pre_process, folded=folded, result_var=result_var, language=language) # registering result into memoization table self.add_memoization(optree, result) # debug management if optree.get_debug() and not self.disable_debug: self.generate_debug_msg(optree, result, code_object) if (initial or force_variable_storing or result_too_long(result)) and not isinstance( result, CodeVariable) and not result is None: # result could have been modified from initial optree result_precision = result.precision prefix_tag = optree.get_tag( default="var_result" ) if force_variable_storing else "tmp_result" final_var = result_var if result_var else code_object.get_free_var_name( result_precision, prefix=prefix_tag, declare=True) code_object << self.generate_code_assignation( code_object, final_var, result.get()) return CodeVariable(final_var, result_precision) return result
def generate_reduction_fptaylor(x): # get sign and abs_x, must be the same at endpoints if sollya.sup(x) <= 0: sign_x_expr = "-1.0" abs_x_expr = "-x" abs_x = -x elif sollya.inf(x) >= 0: sign_x_expr = "1.0" abs_x_expr = "x" abs_x = x else: assert False, "Interval must not straddle 0" # get k, must be the same at endpoints unround_k = abs_x * n_invpi k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if k_low != k_high: assert False, "Interval must not straddle multples of pi" k = int(k_low) part = k % 2 r_expr = "abs_x - whole" r = abs_x - k * n_pi z_expr = "r" z = r if part == 1: flipped_poly_expr = "-poly" else: flipped_poly_expr = "poly" x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " abs_x rnd64= {};".format(abs_x_expr), " whole rnd64= {} * {};".format(k, n_pi), " r rnd64= abs_x - whole;", " z rnd64= {};".format(z_expr), " poly rnd64= {};".format(poly_expr), " flipped_poly rnd64= {};".format(flipped_poly_expr), " retval rnd64= flipped_poly*{};".format(sign_x_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err
def determine_error(self): sollya.settings.display = sollya.hexadecimal n_pi = self.precision.round_sollya_object(sollya.pi, sollya.RN) n_invpi = self.precision.round_sollya_object(1 / sollya.pi, sollya.RN) poly_expr = str(sollya.horner(self.poly_object.get_sollya_object())) poly_expr = poly_expr.replace("_x_", "z") poly_expr = poly_expr.replace("z^0x1p1", "z*z") config = fptaylor.CHECK_CONFIG.copy() del config["--abs-error"] config["--opt"] = "bb-eval" config["--rel-error-threshold"] = "0.0" config["--intermediate-opt"] = "false" config["--uncertainty"] = "false" def generate_fptaylor(x): x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real z in [{},{}];".format(x_low, x_high), "Definitions", " retval rnd64= {};".format(poly_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), x, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), x, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def generate_reduction_fptaylor(x): # get sign and abs_x, must be the same at endpoints if sollya.sup(x) <= 0: sign_x_expr = "-1.0" abs_x_expr = "-x" abs_x = -x elif sollya.inf(x) >= 0: sign_x_expr = "1.0" abs_x_expr = "x" abs_x = x else: assert False, "Interval must not straddle 0" # get k, must be the same at endpoints unround_k = abs_x * n_invpi k_low = sollya.floor(sollya.inf(unround_k)) k_high = sollya.floor(sollya.sup(unround_k)) if k_low != k_high: assert False, "Interval must not straddle multples of pi" k = int(k_low) part = k % 2 r_expr = "abs_x - whole" r = abs_x - k * n_pi z_expr = "r" z = r if part == 1: flipped_poly_expr = "-poly" else: flipped_poly_expr = "poly" x_low = sollya.inf(x) x_high = sollya.sup(x) query = "\n".join([ "Variables", " real x in [{},{}];".format(x_low, x_high), "Definitions", " abs_x rnd64= {};".format(abs_x_expr), " whole rnd64= {} * {};".format(k, n_pi), " r rnd64= abs_x - whole;", " z rnd64= {};".format(z_expr), " poly rnd64= {};".format(poly_expr), " flipped_poly rnd64= {};".format(flipped_poly_expr), " retval rnd64= flipped_poly*{};".format(sign_x_expr), "Expressions", " retval;" ]) rnd_rel_err = None rnd_abs_err = None try: res = fptaylor.Result(query, { **config, "--rel-error": "true", "--abs-error": "true" }) rnd_rel_err = float( res.result["relative_errors"]["final_total"]["value"]) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass except KeyError: try: rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except KeyError: pass if rnd_abs_err is None: try: res = fptaylor.Result(query, { **config, "--rel-error": "false", "--abs-error": "true" }) rnd_abs_err = float( res.result["absolute_errors"]["final_total"]["value"]) except AssertionError: pass err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.relative, 2**-100) algo_rel_err = sollya.sup(err_int) err_int = sollya.supnorm(self.poly_object.get_sollya_object(), sollya.sin(sollya.x), z, sollya.absolute, 2**-100) algo_abs_err = sollya.sup(err_int) if rnd_rel_err is None or str(algo_rel_err) == "error": rel_err = float("inf") else: rel_err = rnd_rel_err + algo_rel_err abs_err = rnd_abs_err + algo_abs_err return rel_err, abs_err def split_domain(starting_domain, slivers): in_domains = [starting_domain] # abs out_domains = list() for I in in_domains: if sollya.inf(I) < 0 and sollya.sup(I) > 0: out_domains.append(sollya.Interval(sollya.inf(I), 0)) out_domains.append(sollya.Interval(0, sollya.sup(I))) else: out_domains.append(I) in_domains = out_domains # k out_domains = list() while len(in_domains) > 0: I = in_domains.pop() #print("in: [{}, {}]".format(float(sollya.inf(I)), float(sollya.sup(I)))) unround_mult = I * n_invpi mult_low = sollya.floor(sollya.inf(unround_mult)) mult_high = sollya.floor(sollya.sup(unround_mult)) if mult_low == mult_high or (mult_low == -1 and mult_high == 0): #print(" accepted") out_domains.append(I) continue if sollya.sup(I) <= 0: divider_low = (mult_low + 1) * n_pi divider_high = divider_low - divider_low * 2**-53 else: divider_high = (mult_low + 1) * n_pi divider_low = divider_high - divider_high * 2**-53 lower_part = sollya.Interval(sollya.inf(I), divider_low) upper_part = sollya.Interval(divider_high, sollya.sup(I)) #print(" -> [{}, {}]".format(float(sollya.inf(lower_part)), float(sollya.sup(lower_part)))) #print(" -> [{}, {}]".format(float(sollya.inf(upper_part)), float(sollya.sup(upper_part)))) in_domains.append(lower_part) in_domains.append(upper_part) in_domains = out_domains # subdivide each section into 2**subd sections for _ in range(slivers): out_domains = list() for I in in_domains: mid = sollya.mid(I) out_domains.append(sollya.Interval(sollya.inf(I), mid)) out_domains.append(sollya.Interval(mid, sollya.sup(I))) in_domains = out_domains in_domains = set(in_domains) in_domains = sorted(in_domains, key=lambda x: float(sollya.inf(x))) in_domains = [ d for d in in_domains if sollya.inf(d) != sollya.sup(d) ] return in_domains if self.skip_reduction: starting_domain = sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7) else: reduction_k = 20 starting_domain = sollya.Interval(-reduction_k * n_pi, reduction_k * n_pi) # analyse each piece in_domains = split_domain(starting_domain, self.slivers) errors = list() for I in in_domains: if self.skip_reduction: rel_err, abs_err = generate_fptaylor(I) else: rel_err, abs_err = generate_reduction_fptaylor(I) print("{}\t{}\t{}\t{}".format(float(sollya.inf(I)), float(sollya.sup(I)), float(abs_err), float(rel_err))) errors.append((I, abs_err, rel_err)) def generate_json(errors, domain): errors = [err for err in errors if err[0] in domain] errors.sort(key=lambda err: err[2]) epsilon = errors[0][2] delta = max(err[1] for err in errors) d = { "cname": self.function_name, "delta": float(delta), "domain": [ float(sollya.inf(domain)), float(sollya.sup(domain)), ], "epsilon": float(epsilon), "operation": "sin" } return d if self.skip_reduction: d = generate_json(errors, sollya.Interval(-n_pi - 2**-7, n_pi + 2**-7)) json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) else: specs = list() for k in range(1, reduction_k): d = generate_json(errors, sollya.Interval(-k * n_pi, k * n_pi)) specs.append(d) for i in range(len(specs)): d = specs[i] if i == len(specs) - 1: json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str) break nd = specs[i + 1] if d["epsilon"] == nd["epsilon"] and d["delta"] == nd["delta"]: continue json_str = json.dumps(d, sort_keys=True, indent=4) json_str = "spec: " + json_str.replace("\n", "\nspec: ") print(json_str)
def __init__(self, precision = ML_Binary32, abs_accuracy = S2**-24, libm_compliant = True, debug_flag = False, fuse_fma = True, fast_path_extract = True, target = GenericProcessor(), output_file = "expf.c", function_name = "expf"): # declaring target and instantiating optimization engine processor = target self.precision = precision opt_eng = OptimizationEngine(processor) gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True) # declaring CodeFunction and retrieving input variable self.function_name = function_name exp_implementation = CodeFunction(self.function_name, output_format = self.precision) vx = exp_implementation.add_input_variable("x", self.precision) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf") test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test") test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign") test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan") return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision)))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater) early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2 ** precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less) early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision))) sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64} # constant computation invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision invlog2_cst = Constant(invlog2, precision = self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f")) k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f")) ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact= True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact = True) r = exact_hi_part - k * log2_lo r.set_tag("r") r.set_attributes(debug = ML_Debug(display_format = "%f")) opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma) tag_map = {} opt_eng.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision = self.precision, interval = interval_vx), tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision) } #try: if 1: #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g") eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g") Log.report(Log.Info, "eval error: %s" % eval_error) #except: # Log.report(Log.Info, "gappa error evaluation failed") print r.get_str(depth = None, display_precision = True, display_attribute = True) print opt_r.get_str(depth = None, display_precision = True, display_attribute = True) approx_interval = Interval(-log(2)/2, log(2)/2) local_ulp = sup(ulp(exp(approx_interval), self.precision)) print "ulp: ", local_ulp error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1 init_poly_degree = poly_degree return while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision) poly.set_tag("poly") # optimizing poly before evaluation error computation opt_poly = opt_eng.optimization_process(poly, self.precision) #print "poly: ", poly.get_str(depth = None, display_precision = True) #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval) poly_error_copy_map = { r.get_handle().get_node(): r_gappa_var } gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True) poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g") Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) global_poly_error = poly_eval_error + poly_approx_error global_rel_poly_error = global_poly_error / exp(approx_interval) print "global_poly_error: ", global_poly_error, global_rel_poly_error flag = local_ulp > sup(abs(global_rel_poly_error)) print "test: ", flag if flag: break else: if poly_degree > init_poly_degree + 5: Log.report(Log.Error, "poly degree search did not converge") poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k") late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset) late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf) late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset) late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False) test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal) late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result)) std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx) std_result.set_attributes(tag = "std_result", debug = debug_lftolx) result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result))) std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return) #print scheme.get_str(depth = None, display_precision = True) # fusing FMA if fuse_fma: Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m") scheme = opt_eng.fuse_multiply_add(scheme, silence = True) Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m") opt_eng.instantiate_abstract_precision(scheme, None) Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m") opt_eng.instantiate_precision(scheme, default_precision = self.precision) Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m") opt_eng.subexpression_sharing(scheme) Log.report(Log.Info, "\033[33;1m silencing operation \033[0m") opt_eng.silence_fp_operations(scheme) # registering scheme as function implementation exp_implementation.set_scheme(scheme) # check processor support Log.report(Log.Info, "\033[33;1m checking processor support \033[0m") opt_eng.check_processor_support(scheme) # factorizing fast path if fast_path_extract: Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m") opt_eng.factorize_fast_path(scheme) Log.report(Log.Info, "\033[33;1m generating source code \033[0m") cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant) self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True) #self.result.add_header("support_lib/ml_types.h") self.result.add_header("support_lib/ml_special_values.h") self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree) self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object()) if debug_flag: self.result.add_header("stdio.h") self.result.add_header("inttypes.h") output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w") output_stream.write(self.result.get(cg)) output_stream.close()
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) C_m1 = Constant(-1, precision = self.precision) test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool) test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool) test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False); # Infnty input infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1))) # non-std input (inf/nan) specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return) # Over/Underflow Tests precision_emax = self.precision.get_emax() precision_max_value = S2**(precision_emax + 1) expm1_overflow_bound = ceil(log(precision_max_value + 1)) overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool) overflow_return = Statement(Return(FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2** precision_emin expm1_underflow_bound = floor(log(precision_min_value) + 1) underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool) underflow_return = Statement(Return(C_m1)) sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision] int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision] # Constants log_2 = round(log(2), sollya_precision, sollya.RN) invlog2 = round(1/log(2), sollya_precision, sollya.RN) log_2_cst = Constant(log_2, precision = self.precision) interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - 6 log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN) # Reduction unround_k = vx * invlog2 ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik") k = Conversion(ik, precision = self.precision, tag = "k") red_coeff1 = Multiplication(k, log2_hi, precision = self.precision) red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision) pre_sub_mul = Subtraction(vx, red_coeff1, precision = self.precision) s = Addition(pre_sub_mul, red_coeff2, precision = self.precision) z = Subtraction(s, pre_sub_mul, precision = self.precision) t = Subtraction(red_coeff2, z, precision = self.precision) r = Addition(s, t, precision = self.precision) r.set_attributes(tag = "r", debug = debug_multi) r_interval = Interval(-log_2/S2, log_2/S2) local_ulp = sup(ulp(exp(r_interval), self.precision)) print("ulp: ", local_ulp) error_goal = S2**-1*local_ulp print("error goal: ", error_goal) # Polynomial Approx error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n") poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_degree_list = range(0, poly_degree) precision_list = [self.precision] *(len(poly_degree_list) + 1) poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function) sub_poly = poly_object.sub_poly(start_index = 2) Log.report(Log.Info, "Poly : %s" % sub_poly) Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error)))) pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision) poly = r + pre_sub_poly poly.set_attributes(tag = "poly", debug = debug_multi) exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision) exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision) diff = 1 - exp_mk diff.set_attributes(tag = "diff", debug = debug_multi) # Late Tests late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = ik - overflow_exp_offset exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi) exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi) late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0 late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result) ) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_coeff = ik + underflow_exp_offset exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision) exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision) late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0 test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result) ) # Reconstruction std_result = exp_k * ( poly + diff ) std_result.set_attributes(tag = "result", debug = debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock( late_underflow_test, late_underflow_return, Return(std_result) ) ) std_return = ConditionBlock( overflow_test, overflow_return, ConditionBlock( underflow_test, underflow_return, result_scheme) ) scheme = ConditionBlock( test_NaN_or_inf, Statement(specific_return), std_return ) return scheme
def generic_poly_split(offset_fct, indexing, target_eps, coeff_precision, vx): """ generate the meta approximation for @p offset_fct over several intervals defined by @p indexing object For each sub-interval, a polynomial approximation with maximal_error @p target_eps is tabulated, and evaluated using format @p coeff_precision. The input variable is @p vx """ # computing degree for a different polynomial approximation on each # sub-interval poly_degree_list = [ int(sup(guessdegree(offset_fct(offset), sub_interval, target_eps))) for offset, sub_interval in indexing.get_offseted_sub_list() ] poly_max_degree = max(poly_degree_list) # tabulating polynomial coefficients on split_num sub-interval of interval poly_table = ML_NewTable( dimensions=[indexing.split_num, poly_max_degree + 1], storage_precision=coeff_precision, const=True) offset_table = ML_NewTable(dimensions=[indexing.split_num], storage_precision=coeff_precision, const=True) max_error = 0.0 for sub_index in range(indexing.split_num): poly_degree = poly_degree_list[sub_index] offset, approx_interval = indexing.get_offseted_sub_interval(sub_index) offset_table[sub_index] = offset if poly_degree == 0: # managing constant approximation separately since it seems # to break sollya local_approx = coeff_precision.round_sollya_object( offset_fct(offset)(inf(approx_interval))) poly_table[sub_index][0] = local_approx for monomial_index in range(1, poly_max_degree + 1): poly_table[sub_index][monomial_index] = 0 approx_error = sollya.infnorm( offset_fct(offset) - local_approx, approx_interval) else: poly_object, approx_error = Polynomial.build_from_approximation_with_error( offset_fct(offset), poly_degree, [coeff_precision] * (poly_degree + 1), approx_interval, sollya.relative) for monomial_index in range(poly_max_degree + 1): if monomial_index <= poly_degree: poly_table[sub_index][ monomial_index] = poly_object.coeff_map[monomial_index] else: poly_table[sub_index][monomial_index] = 0 max_error = max(approx_error, max_error) Log.report(Log.Debug, "max approx error is {}", max_error) # indexing function: derive index from input @p vx value poly_index = indexing.get_index_node(vx) poly_index.set_attributes(tag="poly_index", debug=debug_multi) ext_precision = get_extended_fp_precision(coeff_precision) # building polynomial evaluation scheme offset = TableLoad(offset_table, poly_index, precision=coeff_precision, tag="offset", debug=debug_multi) poly = TableLoad(poly_table, poly_index, poly_max_degree, precision=coeff_precision, tag="poly_init", debug=debug_multi) red_vx = Subtraction(vx, offset, precision=vx.precision, tag="red_vx", debug=debug_multi) for monomial_index in range(poly_max_degree, -1, -1): coeff = TableLoad(poly_table, poly_index, monomial_index, precision=coeff_precision, tag="poly_%d" % monomial_index, debug=debug_multi) #fma_precision = coeff_precision if monomial_index > 1 else ext_precision fma_precision = coeff_precision poly = FMA(red_vx, poly, coeff, precision=fma_precision) #return Conversion(poly, precision=coeff_precision) #return poly.hi return poly
def generate_test_tables(self, test_num, test_ranges=[Interval(-1.0, 1.0)]): """ Generate inputs and output table to be shared between auto test and max_error tests """ random_sizes = self.generate_random_sizes() output_tensor_descriptor_list = self.generate_output_tensor_descriptors( random_sizes) input_tensor_descriptor_list = self.generate_innput_tensor_descriptors( random_sizes) index_range = self.test_index_range # number of arrays expected as inputs for tested_function NUM_INPUT_ARRAY = len(input_tensor_descriptor_list) # position of the input array in tested_function operands (generally # equals to 1 as to 0-th input is often the destination array) INPUT_INDEX_OFFSET = 1 # concatenating standard test array at the beginning of randomly # generated array INPUT_ARRAY_SIZE = [ td.get_bounding_size() for td in input_tensor_descriptor_list ] # TODO/FIXME: implement proper input range depending on input index # assuming a single input array input_precisions = [ td.scalar_format for td in input_tensor_descriptor_list ] rng_map = [ get_precision_rng(precision, inf(test_range), sup(test_range)) for precision, test_range in zip(input_precisions, test_ranges) ] # generated table of inputs input_tables = [ generate_1d_table( INPUT_ARRAY_SIZE[table_id], input_precisions[table_id], self.uniquify_name("input_table_arg%d" % table_id), value_gen=( lambda _: input_precisions[table_id].round_sollya_object( rng_map[table_id].get_new_value(), sollya.RN))) for table_id in range(NUM_INPUT_ARRAY) ] OUTPUT_ARRAY_SIZE = [ td.get_bounding_size() for td in output_tensor_descriptor_list ] OUTPUT_PRECISION = [ td.scalar_format for td in output_tensor_descriptor_list ] NUM_OUTPUT_ARRAY = len(output_tensor_descriptor_list) # generate output_array output_tables = [ generate_1d_table( OUTPUT_ARRAY_SIZE[table_id], OUTPUT_PRECISION[table_id], self.uniquify_name("output_array_%d" % table_id), const=False, #value_gen=(lambda _: FP_QNaN(self.precision)) value_gen=(lambda _: 0)) for table_id in range(NUM_OUTPUT_ARRAY) ] tensor_descriptors = (input_tensor_descriptor_list, output_tensor_descriptor_list) return tensor_descriptors, input_tables, output_tables
def generate_bench_wrapper(self, test_num=1, loop_num=100000, test_ranges=[Interval(-1.0, 1.0)], debug=False): # interval where the array lenght is chosen from (randomly) index_range = self.test_index_range auto_test = CodeFunction("bench_wrapper", output_format=ML_Binary64) tested_function = self.implementation.get_function_object() function_name = self.implementation.get_name() failure_report_op = FunctionOperator("report_failure") failure_report_function = FunctionObject("report_failure", [], ML_Void, failure_report_op) printf_success_op = FunctionOperator( "printf", arg_map={0: "\"test successful %s\\n\"" % function_name}, void_function=True) printf_success_function = FunctionObject("printf", [], ML_Void, printf_success_op) output_precision = FormatAttributeWrapper(self.precision, ["volatile"]) test_total = test_num # number of arrays expected as inputs for tested_function NUM_INPUT_ARRAY = 1 # position of the input array in tested_function operands (generally # equals to 1 as to 0-th input is often the destination array) INPUT_INDEX_OFFSET = 1 # concatenating standard test array at the beginning of randomly # generated array TABLE_SIZE_VALUES = [ len(std_table) for std_table in self.standard_test_cases ] + [ random.randrange(index_range[0], index_range[1] + 1) for i in range(test_num) ] OFFSET_VALUES = [sum(TABLE_SIZE_VALUES[:i]) for i in range(test_total)] table_size_offset_array = generate_2d_table( test_total, 2, ML_UInt32, self.uniquify_name("table_size_array"), value_gen=(lambda row_id: (TABLE_SIZE_VALUES[row_id], OFFSET_VALUES[row_id]))) INPUT_ARRAY_SIZE = sum(TABLE_SIZE_VALUES) # TODO/FIXME: implement proper input range depending on input index # assuming a single input array input_precisions = [self.get_input_precision(1).get_data_precision()] rng_map = [ get_precision_rng(precision, inf(test_range), sup(test_range)) for precision, test_range in zip(input_precisions, test_ranges) ] # generated table of inputs input_tables = [ generate_1d_table( INPUT_ARRAY_SIZE, self.get_input_precision(INPUT_INDEX_OFFSET + table_id).get_data_precision(), self.uniquify_name("input_table_arg%d" % table_id), value_gen=( lambda _: input_precisions[table_id].round_sollya_object( rng_map[table_id].get_new_value(), sollya.RN))) for table_id in range(NUM_INPUT_ARRAY) ] # generate output_array output_array = generate_1d_table( INPUT_ARRAY_SIZE, output_precision, self.uniquify_name("output_array"), #value_gen=(lambda _: FP_QNaN(self.precision)) value_gen=(lambda _: None), const=False, empty=True) # accumulate element number acc_num = Variable("acc_num", precision=ML_Int64, var_type=Variable.Local) def empty_post_statement_gen(input_tables, output_array, table_size_offset_array, array_offset, array_len, test_id): return Statement() test_loop = self.get_array_test_wrapper(test_total, tested_function, table_size_offset_array, input_tables, output_array, acc_num, empty_post_statement_gen) timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local) printf_timing_op = FunctionOperator( "printf", arg_map={ 0: "\"%s %%\"PRIi64\" elts computed in %%\"PRIi64\" nanoseconds => %%.3f CPE \\n\"" % function_name, 1: FO_Arg(0), 2: FO_Arg(1), 3: FO_Arg(2) }, void_function=True) printf_timing_function = FunctionObject( "printf", [ML_Int64, ML_Int64, ML_Binary64], ML_Void, printf_timing_op) vj = Variable("j", precision=ML_Int32, var_type=Variable.Local) loop_num_cst = Constant(loop_num, precision=ML_Int32, tag="loop_num") loop_increment = 1 # bench measure of clock per element cpe_measure = Division( Conversion(timer, precision=ML_Binary64), Conversion(acc_num, precision=ML_Binary64), precision=ML_Binary64, tag="cpe_measure", ) # common test scheme between scalar and vector functions test_scheme = Statement( self.processor.get_init_timestamp(), ReferenceAssign(timer, self.processor.get_current_timestamp()), ReferenceAssign(acc_num, 0), Loop( ReferenceAssign(vj, Constant(0, precision=ML_Int32)), vj < loop_num_cst, Statement(test_loop, ReferenceAssign(vj, vj + loop_increment))), ReferenceAssign( timer, Subtraction(self.processor.get_current_timestamp(), timer, precision=ML_Int64)), printf_timing_function( Conversion(acc_num, precision=ML_Int64), timer, cpe_measure, ), Return(cpe_measure), # Return(Constant(0, precision = ML_Int32)) ) auto_test.set_scheme(test_scheme) return FunctionGroup([auto_test])
def generate_bench(self, processor, test_num=1000, unroll_factor=10): """ generate performance bench for self.op_class """ initial_inputs = [ Constant(random.uniform(inf(self.init_interval), sup(self.init_interval)), precision=precision) for i, precision in enumerate(self.input_precisions) ] var_inputs = [ Variable("var_%d" % i, precision=FormatAttributeWrapper(precision, ["volatile"]), var_type=Variable.Local) for i, precision in enumerate(self.input_precisions) ] printf_timing_op = FunctionOperator( "printf", arg_map={ 0: "\"%s[%s] %%lld elts computed "\ "in %%lld cycles =>\\n %%.3f CPE \\n\"" % ( self.bench_name, self.output_precision.get_display_format() ), 1: FO_Arg(0), 2: FO_Arg(1), 3: FO_Arg(2), 4: FO_Arg(3) }, void_function=True ) printf_timing_function = FunctionObject( "printf", [self.output_precision, ML_Int64, ML_Int64, ML_Binary64], ML_Void, printf_timing_op) timer = Variable("timer", precision=ML_Int64, var_type=Variable.Local) void_function_op = FunctionOperator("(void)", arity=1, void_function=True) void_function = FunctionObject("(void)", [self.output_precision], ML_Void, void_function_op) # initialization of operation inputs init_assign = metaop.Statement() for var_input, init_value in zip(var_inputs, initial_inputs): init_assign.push(ReferenceAssign(var_input, init_value)) # test loop loop_i = Variable("i", precision=ML_Int64, var_type=Variable.Local) test_num_cst = Constant(test_num / unroll_factor, precision=ML_Int64, tag="test_num") # Goal build a chain of dependant operation to measure # elementary operation latency local_inputs = tuple(var_inputs) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) for i in range(unroll_factor - 1): local_inputs = tuple([local_result] + var_inputs[1:]) local_result = self.op_class(*local_inputs, precision=self.output_precision, unbreakable=True) # renormalisation local_result = self.renorm_function(local_result) # variable assignation to build dependency chain var_assign = Statement() var_assign.push(ReferenceAssign(var_inputs[0], local_result)) final_value = var_inputs[0] # loop increment value loop_increment = 1 test_loop = Loop( ReferenceAssign(loop_i, Constant(0, precision=ML_Int32)), loop_i < test_num_cst, Statement(var_assign, ReferenceAssign(loop_i, loop_i + loop_increment)), ) # bench scheme test_scheme = Statement( ReferenceAssign(timer, processor.get_current_timestamp()), init_assign, test_loop, ReferenceAssign( timer, Subtraction(processor.get_current_timestamp(), timer, precision=ML_Int64)), # prevent intermediary variable simplification void_function(final_value), printf_timing_function( final_value, Constant(test_num, precision=ML_Int64), timer, Division(Conversion(timer, precision=ML_Binary64), Constant(test_num, precision=ML_Binary64), precision=ML_Binary64)) # ,Return(Constant(0, precision = ML_Int32)) ) return test_scheme
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name if self.libm_compliant: return RaiseReturn(*args, precision=self.precision, **kwords) else: return Return(kwords["return_value"], precision=self.precision) test_nan_or_inf = Test(vx, specifier=Test.IsInfOrNaN, likely=False, debug=debug_multi, tag="nan_or_inf") test_nan = Test(vx, specifier=Test.IsNaN, debug=debug_multi, tag="is_nan_test") test_positive = Comparison(vx, 0, specifier=Comparison.GreaterOrEqual, debug=debug_multi, tag="inf_sign") test_signaling_nan = Test(vx, specifier=Test.IsSignalingNaN, debug=debug_multi, tag="is_signaling_nan") return_snan = Statement( ExpRaiseReturn(ML_FPE_Invalid, return_value=FP_QNaN(self.precision))) # return in case of infinity input infty_return = Statement( ConditionBlock( test_positive, Return(FP_PlusInfty(self.precision), precision=self.precision), Return(FP_PlusZero(self.precision), precision=self.precision))) # return in case of specific value input (NaN or inf) specific_return = ConditionBlock( test_nan, ConditionBlock( test_signaling_nan, return_snan, Return(FP_QNaN(self.precision), precision=self.precision)), infty_return) # return in case of standard (non-special) input # exclusion of early overflow and underflow cases precision_emax = self.precision.get_emax() precision_max_value = S2 * S2**precision_emax exp_overflow_bound = sollya.ceil(log(precision_max_value)) early_overflow_test = Comparison(vx, exp_overflow_bound, likely=False, specifier=Comparison.Greater) early_overflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision))) precision_emin = self.precision.get_emin_subnormal() precision_min_value = S2**precision_emin exp_underflow_bound = floor(log(precision_min_value)) early_underflow_test = Comparison(vx, exp_underflow_bound, likely=False, specifier=Comparison.Less) early_underflow_return = Statement( ClearException() if self.libm_compliant else Statement(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value=FP_PlusZero(self.precision))) # constant computation invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN) interval_vx = Interval(exp_underflow_bound, exp_overflow_bound) interval_fk = interval_vx * invlog2 interval_k = Interval(floor(inf(interval_fk)), sollya.ceil(sup(interval_fk))) log2_hi_precision = self.precision.get_field_size() - ( sollya.ceil(log2(sup(abs(interval_k)))) + 2) Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision) invlog2_cst = Constant(invlog2, precision=self.precision) log2_hi = round(log(2), log2_hi_precision, sollya.RN) log2_lo = self.precision.round_sollya_object( log(2) - log2_hi, sollya.RN) # argument reduction unround_k = vx * invlog2 unround_k.set_attributes(tag="unround_k", debug=debug_multi) k = NearestInteger(unround_k, precision=self.precision, debug=debug_multi) ik = NearestInteger(unround_k, precision=self.precision.get_integer_format(), debug=debug_multi, tag="ik") ik.set_tag("ik") k.set_tag("k") exact_pre_mul = (k * log2_hi) exact_pre_mul.set_attributes(exact=True) exact_hi_part = vx - exact_pre_mul exact_hi_part.set_attributes(exact=True, tag="exact_hi", debug=debug_multi, prevent_optimization=True) exact_lo_part = -k * log2_lo exact_lo_part.set_attributes(tag="exact_lo", debug=debug_multi, prevent_optimization=True) r = exact_hi_part + exact_lo_part r.set_tag("r") r.set_attributes(debug=debug_multi) approx_interval = Interval(-log(2) / 2, log(2) / 2) approx_interval_half = approx_interval / 2 approx_interval_split = [ Interval(-log(2) / 2, inf(approx_interval_half)), approx_interval_half, Interval(sup(approx_interval_half), log(2) / 2) ] # TODO: should be computed automatically exact_hi_interval = approx_interval exact_lo_interval = -interval_k * log2_lo opt_r = self.optimise_scheme(r, copy={}) tag_map = {} self.opt_engine.register_nodes_by_tag(opt_r, tag_map) cg_eval_error_copy_map = { vx: Variable("x", precision=self.precision, interval=interval_vx), tag_map["k"]: Variable("k", interval=interval_k, precision=self.precision) } #try: if is_gappa_installed(): eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_r, cg_eval_error_copy_map, gappa_filename="red_arg.g") else: eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "eval error: %s" % eval_error) local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision)) # FIXME refactor error_goal from accuracy Log.report(Log.Info, "accuracy: %s" % self.accuracy) if isinstance(self.accuracy, ML_Faithful): error_goal = local_ulp elif isinstance(self.accuracy, ML_CorrectlyRounded): error_goal = S2**-1 * local_ulp elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute): error_goal = self.accuracy.goal elif isinstance(self.accuracy, ML_DegradedAccuracyRelative): error_goal = self.accuracy.goal else: Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy) # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1) error_goal_approx = S2**-1 * error_goal Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n") poly_degree = max( sup( guessdegree( expm1(sollya.x) / sollya.x, approx_interval, error_goal_approx)) - 1, 2) init_poly_degree = poly_degree error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme while 1: Log.report(Log.Info, "attempting poly degree: %d" % poly_degree) precision_list = [1] + [self.precision] * (poly_degree) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( expm1(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) Log.report(Log.Info, "polynomial: %s " % poly_object) sub_poly = poly_object.sub_poly(start_index=2) Log.report(Log.Info, "polynomial: %s " % sub_poly) Log.report(Log.Info, "poly approx error: %s" % poly_approx_error) Log.report( Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m") pre_poly = polynomial_scheme_builder( poly_object, r, unified_precision=self.precision) pre_poly.set_attributes(tag="pre_poly", debug=debug_multi) pre_sub_poly = polynomial_scheme_builder( sub_poly, r, unified_precision=self.precision) pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi) poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly)) poly.set_tag("poly") # optimizing poly before evaluation error computation #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma) #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma) opt_poly = self.optimise_scheme(poly) opt_sub_poly = self.optimise_scheme(pre_sub_poly) # evaluating error of the polynomial approximation r_gappa_var = Variable("r", precision=self.precision, interval=approx_interval) exact_hi_gappa_var = Variable("exact_hi", precision=self.precision, interval=exact_hi_interval) exact_lo_gappa_var = Variable("exact_lo", precision=self.precision, interval=exact_lo_interval) vx_gappa_var = Variable("x", precision=self.precision, interval=interval_vx) k_gappa_var = Variable("k", interval=interval_k, precision=self.precision) #print "exact_hi interval: ", exact_hi_interval sub_poly_error_copy_map = { #r.get_handle().get_node(): r_gappa_var, #vx.get_handle().get_node(): vx_gappa_var, exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, #k.get_handle().get_node(): k_gappa_var, } poly_error_copy_map = { exact_hi_part.get_handle().get_node(): exact_hi_gappa_var, exact_lo_part.get_handle().get_node(): exact_lo_gappa_var, } if is_gappa_installed(): sub_poly_eval_error = -1.0 sub_poly_eval_error = self.gappa_engine.get_eval_error_v2( self.opt_engine, opt_sub_poly, sub_poly_error_copy_map, gappa_filename="%s_gappa_sub_poly.g" % self.function_name) dichotomy_map = [ { exact_hi_part.get_handle().get_node(): approx_interval_split[0], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[1], }, { exact_hi_part.get_handle().get_node(): approx_interval_split[2], }, ] poly_eval_error_dico = self.gappa_engine.get_eval_error_v3( self.opt_engine, opt_poly, poly_error_copy_map, gappa_filename="gappa_poly.g", dichotomy=dichotomy_map) poly_eval_error = max( [sup(abs(err)) for err in poly_eval_error_dico]) else: poly_eval_error = 0.0 sub_poly_eval_error = 0.0 Log.report(Log.Warning, "gappa is not installed in this environnement") Log.report(Log.Info, "stopping autonomous degree research") # incrementing polynomial degree to counteract initial decrementation effect poly_degree += 1 break Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error) Log.report(Log.Info, "sub poly evaluation error: %s" % sub_poly_eval_error) global_poly_error = None global_rel_poly_error = None for case_index in range(3): poly_error = poly_approx_error + poly_eval_error_dico[ case_index] rel_poly_error = sup( abs(poly_error / sollya.exp(approx_interval_split[case_index]))) if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error: global_rel_poly_error = rel_poly_error global_poly_error = poly_error flag = error_goal > global_rel_poly_error if flag: break else: poly_degree += 1 late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier=Comparison.Greater, likely=False, debug=debug_multi, tag="late_overflow_test") overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2) diff_k = Subtraction( ik, Constant(overflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), debug=debug_multi, tag="diff_k", ) late_overflow_result = (ExponentInsertion( diff_k, precision=self.precision) * poly) * ExponentInsertion( overflow_exp_offset, precision=self.precision) late_overflow_result.set_attributes(silent=False, tag="late_overflow_result", debug=debug_multi, precision=self.precision) late_overflow_return = ConditionBlock( Test(late_overflow_result, specifier=Test.IsInfty, likely=False), ExpRaiseReturn(ML_FPE_Overflow, return_value=FP_PlusInfty(self.precision)), Return(late_overflow_result, precision=self.precision)) late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier=Comparison.LessOrEqual, likely=False) underflow_exp_offset = 2 * self.precision.get_field_size() corrected_exp = Addition( ik, Constant(underflow_exp_offset, precision=self.precision.get_integer_format()), precision=self.precision.get_integer_format(), tag="corrected_exp") late_underflow_result = ( ExponentInsertion(corrected_exp, precision=self.precision) * poly) * ExponentInsertion(-underflow_exp_offset, precision=self.precision) late_underflow_result.set_attributes(debug=debug_multi, tag="late_underflow_result", silent=False) test_subnormal = Test(late_underflow_result, specifier=Test.IsSubnormal) late_underflow_return = Statement( ConditionBlock( test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value=late_underflow_result)), Return(late_underflow_result, precision=self.precision)) twok = ExponentInsertion(ik, tag="exp_ik", debug=debug_multi, precision=self.precision) #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly) std_result = twok * poly std_result.set_attributes(tag="std_result", debug=debug_multi) result_scheme = ConditionBlock( late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result, precision=self.precision))) std_return = ConditionBlock( early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme)) # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = ConditionBlock( test_nan_or_inf, Statement(ClearException() if self.libm_compliant else Statement(), specific_return), std_return) return scheme
def generate_argument_reduction(self, memory_limit): best_arg_reduc = None best_arg_reduc = self.eval_argument_reduction(6,10,12,13) best_arg_reduc['sizeof_tables'] = best_arg_reduc['sizeof_table1'] + best_arg_reduc['sizeof_table2'] best_arg_reduc['degree_poly1'] = 4 best_arg_reduc['degree_poly2'] = 8 return best_arg_reduc # iterate through all possible parameters, and return the best argument reduction # the order of importance of the caracteristics of a good argument reduction is: # 1- the argument reduction is valid # 2- the degree of the polynomials obtains are minimals # 3- the memory used is minimal # An arument reduction is valid iff: # - the memory used is less than memory_limit # - y-1 and z-1 fit into a uint64_t # - the second argument reduction should usefull (ie: it should add at least 1 bit to the argument reduction) # From thoses validity constraint we deduce some bound on the parameters to reduce the space of value searched: # (note that thoses bound are implied by, but not equivalents to the constraints) # size1 <= log2(memory_limit/17) (memory_limit on the first table) # prec1 < 13 + size1 (y-1 fits into a uint64_t) # size2 <= log2((memory_limit - sizeof_table1)/17/midinterval) (memory_limit on both tables) # size2 >= 1 - log2(midinterval) (second arg red should be usefull) # prec2 < 12 - prec1 - log2((y-y1)/y1), for all possible y (z-1 fits into a uint64_t) # note: it is hard to deduce a tight bound on prec2 from the last inequality # a good approximation is size2 ~= max[for y]( - log2((y-y1)/y1)), but using it may eliminate valid arg reduc #self.eval_argument_reduction(12, 20, 22, 14) min_size1 = 1 max_size1 = floor(log(memory_limit/17)/log(2)).getConstantAsInt() for size1 in xrange(max_size1, min_size1-1, -1): min_prec1 = size1 max_prec1 = 12 + size1 for prec1 in xrange(min_prec1,max_prec1+1): # we need sizeof_table1 and mid_interval for the bound on size2 and prec2 first_arg_reduc = self.eval_argument_reduction(size1, prec1, prec1, prec1) mid_interval = first_arg_reduc['mid_interval'] sizeof_table1 = first_arg_reduc['sizeof_table1'] if not(0 <= inf(mid_interval) and sup(mid_interval) < S2**(64 - 52 - prec1)): continue if not(first_arg_reduc['sizeof_table1'] < memory_limit): continue min_size2 = 1 - ceil(log(sup(mid_interval))/log(2)).getConstantAsInt() max_size2 = floor(log((memory_limit - sizeof_table1)/(17 * sup(mid_interval)))/log(2)).getConstantAsInt() # during execution of the prec2 loop, it can reduces the interval of valid values for prec2 # so min_prec2 and max_prec2 are setted here and not before the the prec2 loop # (because they are modified inside the body of the loop, for the next iteration of size2) min_prec2 = 0 max_prec2 = 12 + max_size2 - prec1 for size2 in xrange(max_size2,min_size2-1,-1): max_prec2 = min(max_prec2, 12 + size2 - prec1) for prec2 in xrange(max_prec2,min_prec2-1,-1): #print '=====\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{}),\t\033[1m{}\033[0m({}/{})\t====='.format(size1,min_size1,max_size1,prec1,min_prec1,max_prec1,size2,min_size2,max_size2,prec2,min_prec2,max_prec2) #print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss #memory used by the programm arg_reduc = self.eval_argument_reduction(size1, prec1, size2, prec2) mid_interval = arg_reduc['mid_interval'] out_interval = arg_reduc['out_interval'] sizeof_tables = arg_reduc['sizeof_table1'] + arg_reduc['sizeof_table2'] if not(0 <= inf(out_interval) and sup(out_interval) < S2**(64-52-prec1-prec2)): max_prec2 = prec2 - 1 continue if memory_limit < sizeof_tables: continue #assert(prec2 < 12 + size2 - prec1) # test the approximation size2 ~= max[for y]( - log2((y-y1)/y1)) # guess the degree of the two polynomials (relative error <= 2^-52 and absolute error <= 2^-120) # note: we exclude zero from out_interval to not perturb sollya (log(1+x)/x is not well defined on 0) sollya_out_interval = Interval(S2**(-52-prec1-prec2), sup(out_interval)) guess_degree_poly1 = guessdegree(log(1+sollya.x)/sollya.x, sollya_out_interval, S2**-52) guess_degree_poly2 = guessdegree(log(1+sollya.x), sollya_out_interval, S2**-120) # TODO: detect when guessdegree return multiple possible degree, and find the right one if False and inf(guess_degree_poly1) <> sup(guess_degree_poly1): print "improvable guess_degree_poly1:", guess_degree_poly1 if False and inf(guess_degree_poly2) <> sup(guess_degree_poly2): print "improvable guess_degree_poly2:", guess_degree_poly2 degree_poly1 = sup(guess_degree_poly1).getConstantAsInt() + 1 degree_poly2 = sup(guess_degree_poly2).getConstantAsInt() if ((best_arg_reduc is not None) and (best_arg_reduc['degree_poly1'] < degree_poly1 or best_arg_reduc['degree_poly2'] < degree_poly2)): min_prec2 = prec2 + 1 break if ((best_arg_reduc is None) or (best_arg_reduc['degree_poly1'] > degree_poly1) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] > degree_poly2) or (best_arg_reduc['degree_poly1'] == degree_poly1 and best_arg_reduc['degree_poly2'] == degree_poly2 and best_arg_reduc['sizeof_tables'] > sizeof_tables)): arg_reduc['degree_poly1'] = degree_poly1 arg_reduc['degree_poly2'] = degree_poly2 arg_reduc['sizeof_tables'] = sizeof_tables best_arg_reduc = arg_reduc #print "\n --new best-- \n", arg_reduc, "\n" #print "\nBest arg reduc: \n", best_arg_reduc, "\n" return best_arg_reduc