# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ############################################################################### # last-modified: Mar 7th, 2018 # Author(s): Nicolas Brunie <*****@*****.**> ############################################################################### import sys import sollya from sollya import (Interval, ceil, floor, round, inf, sup, log, exp, expm1, log2, cosh, guessdegree, dirtyinfnorm, RN, acosh, RD) S2 = sollya.SollyaObject(2) from sollya import parse as sollya_parse from metalibm_core.core.ml_operations import * from metalibm_core.core.ml_formats import * from metalibm_core.core.ml_table import ML_NewTable from metalibm_core.code_generation.generic_processor import GenericProcessor from metalibm_core.core.polynomials import * from metalibm_core.core.ml_function import (ML_Function, ML_FunctionBasis, DefaultArgTemplate) from metalibm_core.code_generation.generator_utility import (FunctionOperator, FO_Result, FO_Arg) from metalibm_core.core.ml_complex_formats import ML_Mpfr_t from metalibm_core.core.special_values import FP_PlusInfty from metalibm_core.core.simple_scalar_function import ScalarUnaryFunction
def computeBoundMultiplication(self, out_format, input_format_lhs, input_format_rhs): eps = sollya.SollyaObject( out_format.meta_block.local_relative_error_eval( input_format_lhs.mp_node, input_format_rhs.mp_node)) return eps
def generate_scheme(self): # declaring target and instantiating optimization engine vx = self.implementation.add_input_variable("x", self.precision) Log.set_dump_stdout(True) Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m") if self.debug_flag: Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m") # local overloading of RaiseReturn operation def ExpRaiseReturn(*args, **kwords): kwords["arg_value"] = vx kwords["function_name"] = self.function_name return RaiseReturn(*args, **kwords) index_size = 3 vx = Abs(vx) int_precision = self.precision.get_integer_format() # argument reduction arg_reg_value = log(2) / 2**index_size inv_log2_value = round(1 / arg_reg_value, self.precision.get_sollya_object(), RN) inv_log2_cst = Constant(inv_log2_value, precision=self.precision, tag="inv_log2") # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact # by limiting the number of non-zero bits in log2_hi_value_cst # cosh(x) ~ exp(abs(x))/2 for a big enough x # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024) # k = inv_log2_value * x # -1 for guard max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024) max_k_bitsize = int(ceil(log2(max_k_approx))) Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize) log2_hi_value_precision = self.precision.get_precision( ) - max_k_bitsize - 1 log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN) log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), RN) log2_hi_value_cst = Constant(log2_hi_value, tag="log2_hi_value", precision=self.precision) log2_lo_value_cst = Constant(log2_lo_value, tag="log2_lo_value", precision=self.precision) k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision) k_log2 = Multiplication(k, log2_hi_value_cst, precision=self.precision, exact=True, tag="k_log2", unbreakable=True) r_hi = vx - k_log2 r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True) r_lo = -k * log2_lo_value_cst # reduced argument r = r_hi + r_lo r.set_attributes(tag="r", debug=debug_multi) r_eval_error = self.get_eval_error( r_hi, variable_copy_map={ vx: Variable("vx", interval=Interval(0, 715), precision=self.precision), k: Variable("k", interval=Interval(0, 1024), precision=self.precision) }) approx_interval = Interval(-arg_reg_value, arg_reg_value) error_goal_approx = 2**-(self.precision.get_precision()) poly_degree = sup( guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) precision_list = [1] + [self.precision] * (poly_degree) k_integer = Conversion(k, precision=int_precision, tag="k_integer", debug=debug_multi) k_hi = BitLogicRightShift(k_integer, Constant(index_size), tag="k_int_hi", precision=int_precision, debug=debug_multi) k_lo = Modulo(k_integer, 2**index_size, tag="k_int_lo", precision=int_precision, debug=debug_multi) pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision), precision=self.precision, tag="pow_exp", debug=debug_multi) exp_table = ML_NewTable(dimensions=[2 * 2**index_size, 4], storage_precision=self.precision, tag=self.uniquify_name("exp2_table")) for i in range(2 * 2**index_size): input_value = i - 2**index_size if i >= 2**index_size else i reduced_hi_prec = int(self.precision.get_mantissa_size() * 2 / 3.0) # using SollyaObject wrapper to force evaluation by sollya # with higher precision exp_value = sollya.SollyaObject(2)**((input_value) * 2**-index_size) mexp_value = sollya.SollyaObject(2)**((-input_value) * 2**-index_size) pos_value_hi = round(exp_value, reduced_hi_prec, RN) pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), RN) neg_value_hi = round(mexp_value, reduced_hi_prec, RN) neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), RN) exp_table[i][0] = neg_value_hi exp_table[i][1] = neg_value_lo exp_table[i][2] = pos_value_hi exp_table[i][3] = pos_value_lo # log2_value = log(2) / 2^index_size # cosh(x) = 1/2 * (exp(x) + exp(-x)) # exp(x) = exp(x - k * log2_value + k * log2_value) # # r = x - k * log2_value # exp(x) = exp(r) * 2 ^ (k / 2^index_size) # # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size) # # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size) # S=2^(h-1), T = 2^(-h-1) # exp(r) = 1 + poly_pos(r) # exp(-r) = 1 + poly_neg(r) # 2^(l / 2^index_size) = pos_value_hi + pos_value_lo # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo # # cosh(x) = error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai) poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error( exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function=error_function) polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme poly_pos = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), r, unified_precision=self.precision) poly_pos.set_attributes(tag="poly_pos", debug=debug_multi) poly_neg = polynomial_scheme_builder( poly_object.sub_poly(start_index=1), -r, unified_precision=self.precision) poly_neg.set_attributes(tag="poly_neg", debug=debug_multi) table_index = Addition(k_lo, Constant(2**index_size, precision=int_precision), precision=int_precision, tag="table_index", debug=debug_multi) neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag="neg_value_load_hi", debug=debug_multi) neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag="neg_value_load_lo", debug=debug_multi) pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag="pos_value_load_hi", debug=debug_multi) pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag="pos_value_load_lo", debug=debug_multi) k_plus = Max( Subtraction(k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_plus", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) k_neg = Max( Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi), Constant(self.precision.get_emin_normal(), precision=int_precision)) pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision, tag="pow_exp_pos", debug=debug_multi) pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision, tag="pow_exp_neg", debug=debug_multi) hi_terms = (pos_value_load_hi * pow_exp_pos + neg_value_load_hi * pow_exp_neg) hi_terms.set_attributes(tag="hi_terms") pos_exp = ( pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos pos_exp.set_attributes(tag="pos_exp", debug=debug_multi) neg_exp = ( neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg neg_exp.set_attributes(tag="neg_exp", debug=debug_multi) result = Addition(Addition( pos_exp, neg_exp, precision=self.precision, ), hi_terms, precision=self.precision, tag="result", debug=debug_multi) # ov_value ov_value = round(acosh(self.precision.get_max_value()), self.precision.get_sollya_object(), RD) ov_flag = Comparison(Abs(vx), Constant(ov_value, precision=self.precision), specifier=Comparison.Greater, tag="ov_flag") # main scheme Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m") scheme = Statement( Return(Select(ov_flag, FP_PlusInfty(self.precision), result))) return scheme
@property def tag(self): return self.ctor.function_name GEN_LOG_ARGS = {"basis": sollya.exp(1), "function_name": "ml_genlog", "extra_passes" : ["beforecodegen:fuse_fma"]} GEN_LOG2_ARGS = {"basis": 2, "function_name": "ml_genlog2", "extra_passes" : ["beforecodegen:fuse_fma"]} GEN_LOG10_ARGS = {"basis": 10, "function_name": "ml_genlog10", "extra_passes" : ["beforecodegen:fuse_fma"]} class LibmFunctionTest(FunctionTest): @property def tag(self): # NOTES/FIXME: 0-th element of self.arg_map_list is chosen # for tag determination without considering the others return self.title + "_" + self.arg_map_list[0]["bench_function_name"] S2 = sollya.SollyaObject(2) S10 = sollya.SollyaObject(10) def emulate_exp2(v): return S2**v def emulate_exp10(v): return S10**v # libm functions LIBM_FUNCTION_LIST = [ # single precision LibmFunctionTest(metalibm_functions.external_bench.ML_ExternalBench, [{"bench_function_name": fname, "emulate": emulate, "auto_test": 0, "headers": ["math.h"]}], title="libm") for fname, emulate in [ ("expf", sollya.exp), ("exp2f", emulate_exp2), ("exp10f", emulate_exp10), ("expm1f", sollya.expm1), ("logf", sollya.log), ("log2f", sollya.log2), ("log10f", sollya.log10), ("log1p", sollya.log1p), ("cosf", sollya.cos), ("sinf", sollya.sin), ("tanf", sollya.tan), ("atanf", sollya.atan), ("coshf", sollya.cosh), ("sinhf", sollya.sinh), ("tanhf", sollya.tanh),
def generate_bipartite_approx_module(self, vx): """ vx input value """ debug_fixed, debug_std = self.get_debug_utils() # size of most significant table index (for linear slope tabulation) alpha = self.alpha # 6 # size of medium significant table index (for initial value table index LSB) beta = self.beta # 5 # size of least significant table index (for linear offset tabulation) gamma = self.gamma # 5 guard_bits = self.guard_bits # 3 vx.set_interval(self.interval) range_hi = sollya.sup(self.interval) range_lo = sollya.inf(self.interval) f_hi = self.function(range_hi) f_lo = self.function(range_lo) # fixed by format used for reduced_x range_size = range_hi - range_lo range_size_log2 = int(sollya.log2(range_size)) assert 2**range_size_log2 == range_size reduced_x = Conversion(BitLogicRightShift(vx - range_lo, range_size_log2), precision=fixed_point(0, alpha + beta + gamma, signed=False), tag="reduced_x", debug=debug_fixed) alpha_index = self.get_fixed_slice( reduced_x, 0, alpha - 1, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromMSBToLSB, tag="alpha_index", debug=debug_std) gamma_index = self.get_fixed_slice( reduced_x, gamma - 1, 0, align_hi=FixedPointPosition.FromLSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="gamma_index", debug=debug_std) beta_index = self.get_fixed_slice( reduced_x, alpha, gamma, align_hi=FixedPointPosition.FromMSBToLSB, align_lo=FixedPointPosition.FromLSBToLSB, tag="beta_index", debug=debug_std) # Assuming monotonic function f_absmax = max(abs(f_hi), abs(f_lo)) f_absmin = min(abs(f_hi), abs(f_lo)) f_msb = int(sollya.ceil(sollya.log2(f_absmax))) + 1 f_lsb = int(sollya.floor(sollya.log2(f_absmin))) storage_lsb = f_lsb - self.precision.get_bit_size() - guard_bits f_int_size = f_msb f_frac_size = -storage_lsb storage_format = fixed_point(f_int_size, f_frac_size, signed=False) Log.report(Log.Info, "storage_format is {}".format(storage_format)) # table of initial value index tiv_index = Concatenation(alpha_index, beta_index, tag="tiv_index", debug=debug_std) # table of offset value index to_index = Concatenation(alpha_index, gamma_index, tag="to_index", debug=debug_std) tiv_index_size = int(alpha + beta) to_index_size = int(alpha + gamma) Log.report(Log.Info, "initial table structures") table_iv = ML_NewTable(dimensions=[2**tiv_index_size], storage_precision=storage_format, tag="tiv") table_offset = ML_NewTable(dimensions=[2**to_index_size], storage_precision=storage_format, tag="to") slope_table = [None] * (2**alpha) slope_delta = 1.0 / sollya.SollyaObject(2**alpha) delta_u = range_size * slope_delta * 2**-15 Log.report(Log.Info, "computing slope value") for i in range(2**alpha): # slope is computed at the middle of range_size interval slope_x = range_lo + (i + 0.5) * range_size * slope_delta # TODO: gross approximation of derivatives f_xpu = self.function(slope_x + delta_u / 2) f_xmu = self.function(slope_x - delta_u / 2) slope = (f_xpu - f_xmu) / delta_u slope_table[i] = slope range_rcp_steps = 1.0 / sollya.SollyaObject(2**tiv_index_size) Log.report(Log.Info, "computing value for initial-value table") for i in range(2**tiv_index_size): slope_index = i / 2**beta iv_x = range_lo + i * range_rcp_steps * range_size offset_x = 0.5 * range_rcp_steps * range_size # initial value is computed so that the piecewise linear # approximation intersects the function at iv_x + offset_x iv_y = self.function( iv_x + offset_x) - offset_x * slope_table[int(slope_index)] initial_value = storage_format.round_sollya_object(iv_y) table_iv[i] = initial_value # determining table of initial value interval #tiv_min = table_iv[0] #tiv_max = table_iv[0] #for i in range(1, 2**tiv_index_size): # tiv_min = min(tiv_min, table_iv[i]) # tiv_max = max(tiv_max, table_iv[i]) tiv_min = min(table_iv) tiv_max = max(table_iv) table_iv.set_interval(Interval(tiv_min, tiv_max)) offset_step = range_size / S2**(alpha + beta + gamma) for i in range(2**alpha): Log.report(Log.Info, "computing offset value for sub-table {}".format(i)) for j in range(2**gamma): to_i = i * 2**gamma + j offset = slope_table[i] * j * offset_step table_offset[to_i] = offset # determining table of offset interval to_min = table_offset[0] to_max = table_offset[0] for i in range(1, 2**(alpha + gamma)): to_min = min(to_min, table_offset[i]) to_max = max(to_max, table_offset[i]) offset_interval = Interval(to_min, to_max) table_offset.set_interval(offset_interval) initial_value = TableLoad(table_iv, tiv_index, precision=storage_format, tag="initial_value", debug=debug_fixed) offset_precision = get_fixed_type_from_interval(offset_interval, 16) Log.report( Log.Verbose, "offset_precision is {} ({} bits)".format( offset_precision, offset_precision.get_bit_size())) table_offset.get_precision().storage_precision = offset_precision # rounding table value for i in range(1, 2**(alpha + gamma)): table_offset[i] = offset_precision.round_sollya_object( table_offset[i]) offset_value = TableLoad(table_offset, to_index, precision=offset_precision, tag="offset_value", debug=debug_fixed) Log.report( Log.Verbose, "initial_value's interval: {}, offset_value's interval: {}".format( evaluate_range(initial_value), evaluate_range(offset_value))) final_add = initial_value + offset_value round_bit = final_add # + FixedPointPosition(final_add, io_precision.get_bit_size(), align=FixedPointPosition.FromMSBToLSB) result = Conversion(initial_value + offset_value, precision=self.precision, tag="vr_out", debug=debug_fixed) # Approximation error evaluation approx_error = 0.0 for i in range(2**alpha): for j in range(2**beta): tiv_i = (i * 2**beta + j) # = range_lo + tiv_i * range_rcp_steps * range_size iv = table_iv[tiv_i] for k in range(2**gamma): to_i = i * 2**gamma + k offset = table_offset[to_i] approx_value = offset + iv table_x = range_lo + range_size * ( (i * 2**beta + j) * 2**gamma + k) / S2**(alpha + beta + gamma) local_error = abs(self.function(table_x) - approx_value) approx_error = max(approx_error, local_error) error_log2 = float(sollya.log2(approx_error)) Log.report( Log.Verbose, "approx_error is {}, error_log2 is {}".format( float(approx_error), error_log2)) # table size table_iv_size = 2**(alpha + beta) table_offset_size = 2**(alpha + gamma) Log.report( Log.Verbose, "tables' size are {} entries".format(table_iv_size + table_offset_size)) return result
def __eq__(lhs, rhs): if not is_numeric_value(rhs): return False else: return sollya.SollyaObject.__eq__(sollya.SollyaObject(lhs), sollya.SollyaObject(rhs))
def numeric_emulate(self, input_value): return sollya.SollyaObject(2)**(input_value)
def get_sollya_object(self): return sollya.SollyaObject(0)