예제 #1
0
        def generate_fptaylor(x):
            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  r rnd64= x;",
                "  retval rnd64= {};".format(poly_expr), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), x, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
예제 #2
0
 def get_default_args(**kw):
     """ Return a structure containing the arguments for ML_GenericLog,
         builtin from a default argument mapping overloaded with @p kw """
     default_args_log = {
         "output_file": "ml_genlog.c",
         "function_name": "ml_genlog",
         "precision": ML_Binary32,
         "accuracy": ML_Faithful,
         "target": GenericProcessor(),
         "basis": exp(1),
     }
     default_args_log.update(kw)
     return DefaultArgTemplate(**default_args_log)
예제 #3
0
    def generate_scheme(self):
        x = self.implementation.add_input_variable("x", self.precision)

        n_log2 = self.precision.round_sollya_object(sollya.log(2), sollya.RN)

        if not self.skip_reduction:
            n_invlog2 = self.precision.round_sollya_object(
                1 / sollya.log(2), sollya.RN)
            invlog2 = Constant(n_invlog2, tag="invlog2")

            unround_k = Multiplication(x, invlog2, tag="unround_k")

            k = Floor(unround_k, precision=self.precision, tag="k")

            log2 = Constant(n_log2, tag="log2")

            whole = Multiplication(k, log2, tag="whole")

            r = Subtraction(x, whole, tag="r")

        else:
            r = x

        approx_interval = sollya.Interval(-2**-10, n_log2 + 2**-10)
        approx_func = sollya.exp(sollya.x)
        builder = Polynomial.build_from_approximation
        sollya.settings.prec = 2**10
        poly_object = builder(approx_func, self.poly_degree,
                              [self.precision] * (self.poly_degree + 1),
                              approx_interval, sollya.relative)
        self.poly_object = poly_object
        schemer = PolynomialSchemeEvaluator.generate_horner_scheme
        poly = schemer(poly_object, r)

        if not self.skip_reduction:
            ik = Conversion(k,
                            precision=ML_Binary32.get_integer_format(),
                            tag="ik")

            twok = ExponentInsertion(ik, precision=self.precision, tag="twok")

            retval = Multiplication(poly, twok, tag="retval")

        else:
            retval = poly

        scheme = Return(retval)

        return scheme
예제 #4
0
    def generate_expected_table(self, input_tables, table_size_offset_array):
        """ Generate the complete table of expected results """
        ## output values required to check results are stored in output table
        num_output_value = self.accuracy.get_num_output_value()
        NUM_INPUT_ARRAY = len(input_tables)

        TABLE_SIZE = input_tables[0].dimensions[0]
        NUM_SUBTABLE = table_size_offset_array.dimensions[0]

        EXP_TABLE = [None] * TABLE_SIZE
        sum_exp = sollya.SollyaObject(0)

        # compute elemnt-wise exponential
        for row_id in range(TABLE_SIZE):
            local_exp = sollya.exp(input_tables[0][row_id])
            EXP_TABLE[row_id] = local_exp

        # for each sub-array, compute the sum of exponential, its reciprocal
        # and the each element softmax
        for table_id in range(NUM_SUBTABLE):
            sub_size, sub_offset = table_size_offset_array[table_id]
            if sub_size == 0:
                # avoid division by zero by skipping empty arrays
                continue
            sum_exp = sum(EXP_TABLE[sub_offset:(sub_offset + sub_size)])
            sum_rcp = 1.0 / sum_exp
            for sub_row_id in range(sub_size):
                row_id = sub_row_id + sub_offset
                EXP_TABLE[row_id] = EXP_TABLE[row_id] * sum_rcp

        def expected_value_gen(table_id, table_row_id):
            """ generate a full row of expected values using inputs from
                input_tables"""
            table_offset = table_size_offset_array[table_id][1]
            row_id = table_offset + table_row_id
            output_values = self.accuracy.get_output_check_value(
                EXP_TABLE[row_id])
            return output_values

        # generating expected value table
        expected_table = generate_2d_multi_table(table_size_offset_array,
                                                 num_output_value,
                                                 self.precision,
                                                 "expected_table",
                                                 value_gen=expected_value_gen)
        return expected_table
예제 #5
0
  def generate_scheme(self):
    # declaring target and instantiating optimization engine

    vx = self.implementation.add_input_variable("x", self.precision)
    
    Log.set_dump_stdout(True)
    
    Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
    if self.debug_flag: 
        Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")
    
    # local overloading of RaiseReturn operation
    def ExpRaiseReturn(*args, **kwords):
        kwords["arg_value"] = vx
        kwords["function_name"] = self.function_name
        return RaiseReturn(*args, **kwords)
    
    C_m1 = Constant(-1, precision = self.precision)
    
    test_NaN_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = debug_multi, tag = "NaN_or_inf", precision = ML_Bool)
    test_NaN = Test(vx, specifier = Test.IsNaN, likely = False, debug = debug_multi, tag = "is_NaN", precision = ML_Bool)
    test_inf = Comparison(vx, 0, specifier = Comparison.Greater, debug = debug_multi, tag = "sign", precision = ML_Bool, likely = False);
    
    #  Infnty input
    infty_return = Statement(ConditionBlock(test_inf, Return(FP_PlusInfty(self.precision)), Return(C_m1)))
    #  non-std input (inf/nan)
    specific_return = ConditionBlock(test_NaN, Return(FP_QNaN(self.precision)), infty_return)
    
    # Over/Underflow Tests
    
    precision_emax = self.precision.get_emax()
    precision_max_value = S2**(precision_emax + 1)
    expm1_overflow_bound = ceil(log(precision_max_value + 1))
    overflow_test = Comparison(vx, expm1_overflow_bound, likely = False, specifier = Comparison.Greater, precision = ML_Bool)
    overflow_return = Statement(Return(FP_PlusInfty(self.precision)))
    
    precision_emin = self.precision.get_emin_subnormal()
    precision_min_value = S2** precision_emin
    expm1_underflow_bound = floor(log(precision_min_value) + 1)
    underflow_test = Comparison(vx, expm1_underflow_bound, likely = False, specifier = Comparison.Less, precision = ML_Bool)
    underflow_return = Statement(Return(C_m1))
    
    sollya_precision = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}[self.precision]
    int_precision = {ML_Binary32: ML_Int32, ML_Binary64: ML_Int64}[self.precision]
    
    # Constants
    
    log_2 = round(log(2), sollya_precision, sollya.RN)
    invlog2 = round(1/log(2), sollya_precision, sollya.RN)
    log_2_cst = Constant(log_2, precision = self.precision)
    
    interval_vx = Interval(expm1_underflow_bound, expm1_overflow_bound)
    interval_fk = interval_vx * invlog2
    interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))
    
    log2_hi_precision = self.precision.get_field_size() - 6
    log2_hi = round(log(2), log2_hi_precision, sollya.RN)
    log2_lo = round(log(2) - log2_hi, sollya_precision, sollya.RN)


    # Reduction
    unround_k = vx * invlog2
    ik = NearestInteger(unround_k, precision = int_precision, debug = debug_multi, tag = "ik")
    k = Conversion(ik, precision = self.precision, tag = "k")
    
    red_coeff1 = Multiplication(k, log2_hi, precision = self.precision)
    red_coeff2 = Multiplication(Negation(k, precision = self.precision), log2_lo, precision = self.precision)
    
    pre_sub_mul = Subtraction(vx, red_coeff1, precision  = self.precision)
    
    s = Addition(pre_sub_mul, red_coeff2, precision = self.precision)
    z = Subtraction(s, pre_sub_mul, precision = self.precision)
    t = Subtraction(red_coeff2, z, precision = self.precision)
    
    r = Addition(s, t, precision = self.precision)
    
    r.set_attributes(tag = "r", debug = debug_multi)
    
    r_interval = Interval(-log_2/S2, log_2/S2)
    
    local_ulp = sup(ulp(exp(r_interval), self.precision))
    
    print("ulp: ", local_ulp)
    error_goal = S2**-1*local_ulp
    print("error goal: ", error_goal)
    
    
    # Polynomial Approx
    error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)
    Log.report(Log.Info, "\033[33;1m Building polynomial \033[0m\n")
    
    poly_degree = sup(guessdegree(expm1(sollya.x), r_interval, error_goal) + 1)
    
    polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
    poly_degree_list = range(0, poly_degree)
    
    precision_list = [self.precision] *(len(poly_degree_list) + 1)
    poly_object, poly_error = Polynomial.build_from_approximation_with_error(expm1(sollya.x), poly_degree, precision_list, r_interval, sollya.absolute, error_function = error_function)
    sub_poly = poly_object.sub_poly(start_index = 2)
    Log.report(Log.Info, "Poly : %s" % sub_poly)
    Log.report(Log.Info, "poly error : {} / {:d}".format(poly_error, int(sollya.log2(poly_error))))
    pre_sub_poly = polynomial_scheme_builder(sub_poly, r, unified_precision = self.precision)
    poly = r + pre_sub_poly
    poly.set_attributes(tag = "poly", debug = debug_multi)
    
    exp_k = ExponentInsertion(ik, tag = "exp_k", debug = debug_multi, precision = self.precision)
    exp_mk = ExponentInsertion(-ik, tag = "exp_mk", debug = debug_multi, precision = self.precision)
    
    diff = 1 - exp_mk
    diff.set_attributes(tag = "diff", debug = debug_multi) 
    
    # Late Tests
    late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = debug_multi, tag = "late_overflow_test")
    
    overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
    diff_k = ik - overflow_exp_offset 
    
    exp_diff_k = ExponentInsertion(diff_k, precision = self.precision, tag = "exp_diff_k", debug = debug_multi)
    exp_oflow_offset = ExponentInsertion(overflow_exp_offset, precision = self.precision, tag = "exp_offset", debug = debug_multi)
    
    late_overflow_result = (exp_diff_k * (1 + poly)) * exp_oflow_offset - 1.0
    
    late_overflow_return = ConditionBlock(
        Test(late_overflow_result, specifier = Test.IsInfty, likely = False), 
        ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), 
        Return(late_overflow_result)
        )


    late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
    
    underflow_exp_offset = 2 * self.precision.get_field_size()
    corrected_coeff = ik + underflow_exp_offset
    
    exp_corrected = ExponentInsertion(corrected_coeff, precision = self.precision)
    exp_uflow_offset = ExponentInsertion(-underflow_exp_offset, precision = self.precision)
    
    late_underflow_result = ( exp_corrected * (1 + poly)) * exp_uflow_offset - 1.0
    
    test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal, likely = False)
    
    late_underflow_return = Statement(
        ConditionBlock(
            test_subnormal, 
            ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), 
            Return(late_underflow_result)
            )
    
    # Reconstruction
    
    std_result = exp_k * ( poly + diff )
    std_result.set_attributes(tag = "result", debug = debug_multi)
    
    result_scheme = ConditionBlock(
        late_overflow_test, 
        late_overflow_return, 
        ConditionBlock(
            late_underflow_test, 
            late_underflow_return, 
            Return(std_result)
            )
        )
        
    std_return = ConditionBlock(
        overflow_test, 
        overflow_return, 
        ConditionBlock(
            underflow_test, 
            underflow_return, 
            result_scheme)
        )
        
    scheme = ConditionBlock(
        test_NaN_or_inf, 
        Statement(specific_return), 
        std_return
        )

    return scheme
예제 #6
0
    def generate_scalar_scheme(self, vx):
        Log.set_dump_stdout(True)

        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
                Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        index_size = 5

        comp_lo = (vx < 0)
        comp_lo.set_attributes(tag = "comp_lo", precision = ML_Bool)
        sign = Select(comp_lo, -1, 1, precision = self.precision)

        # as sinh is an odd function, we can simplify the input to its absolute
        # value once the sign has been extracted
        vx = Abs(vx)
        int_precision = self.precision.get_integer_format()

        # argument reduction
        arg_reg_value = log(2)/2**index_size
        inv_log2_value = round(1/arg_reg_value, self.precision.get_sollya_object(), sollya.RN)
        inv_log2_cst = Constant(inv_log2_value, precision = self.precision, tag = "inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2    for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^1024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx    = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision() - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, sollya.RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value, self.precision.get_sollya_object(), sollya.RN)
        log2_hi_value_cst = Constant(log2_hi_value, tag = "log2_hi_value", precision = self.precision)
        log2_lo_value_cst = Constant(log2_lo_value, tag = "log2_lo_value", precision = self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision = self.precision)
        k_log2 = Multiplication(k, log2_hi_value_cst, precision = self.precision, exact = True, tag = "k_log2", unbreakable = True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag = "r_hi", debug = debug_multi, unbreakable = True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag = "r", debug = debug_multi)

        if is_gappa_installed():
                r_eval_error = self.get_eval_error(r_hi, variable_copy_map =
                    {
                        vx: Variable("vx", interval = Interval(0, 715), precision = self.precision),
                        k: Variable("k", interval = Interval(0, 1024), precision = self.precision)
                    })
                Log.report(Log.Verbose, "r_eval_error: ", r_eval_error)

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(guessdegree(exp(sollya.x), approx_interval, error_goal_approx)) + 3
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k, precision = int_precision, tag = "k_integer", debug = debug_multi)
        k_hi = BitLogicRightShift(k_integer, Constant(index_size, precision=int_precision), tag = "k_int_hi", precision = int_precision, debug = debug_multi)
        k_lo = Modulo(k_integer, 2**index_size, tag = "k_int_lo", precision = int_precision, debug = debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision = int_precision), precision = self.precision, tag = "pow_exp", debug = debug_multi)

        exp_table = ML_NewTable(dimensions = [2 * 2**index_size, 4], storage_precision = self.precision, tag = self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i

            reduced_hi_prec = int(self.precision.get_mantissa_size() - 8)
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value    = sollya.SollyaObject(2)**((input_value)* 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value)* 2**-index_size)
            pos_value_hi = round(exp_value, reduced_hi_prec, sollya.RN)
            pos_value_lo = round(exp_value - pos_value_hi, self.precision.get_sollya_object(), sollya.RN)
            neg_value_hi = round(mexp_value, reduced_hi_prec, sollya.RN)
            neg_value_lo = round(mexp_value - neg_value_hi, self.precision.get_sollya_object(), sollya.RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # sinh(x) = 1/2 * (exp(x) - exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value)
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size, with k, h, l integers
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # sinh(x) = exp(r) * 2^(h-1) * 2^(l *2^-index_size) - exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        # S=2^(h-1), T = 2^(-h-1)
        # exp(r)    = 1 + poly_pos(r)
        # exp(-r) = 1 + poly_neg(r)
        # 2^(l / 2^index_size)    = pos_value_hi + pos_value_lo
        # 2^(-l / 2^index_size) = neg_value_hi + neg_value_lo
        #

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(sollya.x), poly_degree, precision_list, approx_interval, sollya.absolute, error_function = error_function)

        Log.report(Log.Verbose, "poly_approx_error: {}, {}".format(poly_approx_error, float(log2(poly_approx_error))))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), r, unified_precision = self.precision)
        poly_pos.set_attributes(tag = "poly_pos", debug = debug_multi)

        poly_neg = polynomial_scheme_builder(poly_object.sub_poly(start_index = 1), -r, unified_precision = self.precision)
        poly_neg.set_attributes(tag = "poly_neg", debug = debug_multi)

        table_index = Addition(k_lo, Constant(2**index_size, precision = int_precision), precision = int_precision, tag = "table_index", debug = debug_multi)

        neg_value_load_hi = TableLoad(exp_table, table_index, 0, tag = "neg_value_load_hi", debug = debug_multi)
        neg_value_load_lo = TableLoad(exp_table, table_index, 1, tag = "neg_value_load_lo", debug = debug_multi)
        pos_value_load_hi = TableLoad(exp_table, table_index, 2, tag = "pos_value_load_hi", debug = debug_multi)
        pos_value_load_lo = TableLoad(exp_table, table_index, 3, tag = "pos_value_load_lo", debug = debug_multi)

        k_plus = Max(
            Subtraction(k_hi, Constant(1, precision = int_precision), precision=int_precision, tag="k_plus", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))
        k_neg = Max(
            Subtraction(-k_hi, Constant(1, precision=int_precision), precision=int_precision, tag="k_neg", debug=debug_multi),
            Constant(self.precision.get_emin_normal(), precision = int_precision))

        # 2^(h-1)
        pow_exp_pos = ExponentInsertion(k_plus, precision = self.precision, tag="pow_exp_pos", debug=debug_multi)
        # 2^(-h-1)
        pow_exp_neg = ExponentInsertion(k_neg, precision = self.precision, tag="pow_exp_neg", debug=debug_multi)

        hi_terms = (pos_value_load_hi * pow_exp_pos - neg_value_load_hi * pow_exp_neg)
        hi_terms.set_attributes(tag = "hi_terms", debug=debug_multi)


        pos_exp = (pos_value_load_hi * poly_pos + (pos_value_load_lo + pos_value_load_lo * poly_pos)) * pow_exp_pos
        pos_exp.set_attributes(tag = "pos_exp", debug = debug_multi)

        neg_exp = (neg_value_load_hi * poly_neg + (neg_value_load_lo + neg_value_load_lo * poly_neg)) * pow_exp_neg
        neg_exp.set_attributes(tag = "neg_exp", debug = debug_multi)

        result = Addition(
            Subtraction(
                pos_exp,
                neg_exp,
                precision=self.precision,
            ),
            hi_terms,
            precision=self.precision,
            tag="result",
            debug=debug_multi
        )

        # ov_value
        ov_value = round(asinh(self.precision.get_max_value()), self.precision.get_sollya_object(), sollya.RD)
        ov_flag = Comparison(Abs(vx), Constant(ov_value, precision = self.precision), specifier = Comparison.Greater)

        # main scheme
        scheme = Statement(
            Return(
                Select(
                    ov_flag,
                    sign*FP_PlusInfty(self.precision),
                    sign*result
                )))

        return scheme
예제 #7
0
from sollya_extra_functions import cbrt


S2 = sollya.SollyaObject(2)

# dict of (str) -> tuple(ctor, dict(ML_Format -> str))
# the first level key is the function name
# the first value of value tuple is the meta-function constructor
# the second value of the value tuple is a dict which associates to a ML_Format
# the corresponding libm function
FUNCTION_MAP = {
    "exp": (metalibm_functions.ml_exp.ML_Exponential, {}, sollya.exp),
    "tanh": (metalibm_functions.ml_tanh.ML_HyperbolicTangent, {}, sollya.tanh),
    "sqrt": (metalibm_functions.ml_sqrt.MetalibmSqrt, {}, sollya.sqrt),
    "log": (metalibm_functions.generic_log.ML_GenericLog, {"basis": sollya.exp(1)}, sollya.log),
    "log2": (metalibm_functions.generic_log.ML_GenericLog, {"basis": 2}, sollya.log2),
    "log10": (metalibm_functions.generic_log.ML_GenericLog, {"basis": 10}, sollya.log10),
    "exp2": (metalibm_functions.ml_exp2.ML_Exp2, {}, (lambda x: S2**x)),

    "div": (metalibm_functions.ml_div.ML_Division, {}, (lambda x,y: x / y)),
    "cbrt": (metalibm_functions.ml_cbrt.ML_Cbrt, {}, cbrt),

    "cosh": (metalibm_functions.ml_cosh.ML_HyperbolicCosine, {}, sollya.cosh),
    "sinh": (metalibm_functions.ml_sinh.ML_HyperbolicSine, {}, sollya.sinh),

    "cos": (metalibm_functions.ml_sincos.ML_SinCos, {"sin_output": False}, sollya.cos),
    "sin": (metalibm_functions.ml_sincos.ML_SinCos, {"sin_output": True}, sollya.sin),
    "atan": (metalibm_functions.ml_atan.MetaAtan, {}, sollya.atan),
    "atan2": (metalibm_functions.ml_atan.MetaAtan2, {}, (lambda y, x: sollya.atan(y / x))),
예제 #8
0
class FunctionTest:
    def __init__(self, ctor, arg_map_list):
        """ FunctionTest constructor:

            Args:
                ctor(class): constructor of the meta-function
                arg_map_list: list of dictionnaries

        """
        self.ctor = ctor
        self.arg_map_list = arg_map_list


GEN_LOG_ARGS = {
    "basis": sollya.exp(1),
    "function_name": "ml_genlog",
    "passes": ["beforecodegen:fuse_fma"]
}
GEN_LOG2_ARGS = {
    "basis": 2,
    "function_name": "ml_genlog2",
    "passes": ["beforecodegen:fuse_fma"]
}
GEN_LOG10_ARGS = {
    "basis": 10,
    "function_name": "ml_genlog10",
    "passes": ["beforecodegen:fuse_fma"]
}

FUNCTION_LIST = [
예제 #9
0
from metalibm_core.opt.ml_blocks import (
    generate_count_leading_zeros, generate_fasttwosum,
    Add222, Add122, Add212, Add211, Mul212, Mul211, Mul222
)
from metalibm_core.opt.p_expand_multi_precision import (
    dirty_multi_node_expand
)

from metalibm_core.code_generation.generic_processor import GenericProcessor
from metalibm_core.targets.common.vector_backend import VectorBackend

from metalibm_core.utility.gappa_utils import execute_gappa_script_extract
from metalibm_core.utility.ml_template import *
from metalibm_core.utility.debug_utils import *

EXP_1 = sollya.exp(1)
S2    = sollya.parse("2")
S10   = sollya.parse("10")

def DirtyExponentExtraction(f, precision):
  """Get the raw (biased) exponent of @p f.
  Only valid if f is a positive floating-point number.
  @param f input node to extract exponent from
  @param precision underlying floating-point format
  @return node representing f's biased exponent
  """
  int_prec = precision.get_integer_format()
  field_size = Constant(precision.get_field_size(),
                        precision=int_prec,
                        tag='field_size')
  recast_f = TypeCast(f, precision=int_prec) \
예제 #10
0
                        Statement(
                            ClearException(),
                            Raise(ML_FPE_DivideByZero),
                            Return(FP_MinusInfty(self.precision)),
                        ), Return(result_subnormal)), Return(result))))
        scheme = pre_scheme
        return scheme

    def numeric_emulate(self, input_value):
        return sollya.log(input_value) / sollya.log(self.basis)

    standard_test_cases = [(sollya.parse("0x1.42af3ap-1"), None)]


if __name__ == "__main__":
    # auto-test
    ARG_TEMPLATE = ML_NewArgTemplate(
        default_arg=ML_GenericLog.get_default_args())

    ARG_TEMPLATE.parser.add_argument("--basis",
                                     dest="basis",
                                     action="store",
                                     default=exp(1),
                                     type=sollya.parse,
                                     help="logarithm basis")

    args = ARG_TEMPLATE.arg_extraction()

    ml_log10 = ML_GenericLog(args)
    ml_log10.gen_implementation()
예제 #11
0
    def __init__(self, 
                 precision = ML_Binary32, 
                 abs_accuracy = S2**-24, 
                 libm_compliant = True, 
                 debug_flag = False, 
                 fuse_fma = True, 
                 fast_path_extract = True,
                 target = GenericProcessor(), 
                 output_file = "expf.c", 
                 function_name = "expf"):

        # declaring target and instantiating optimization engine
        processor = target
        self.precision = precision
        opt_eng = OptimizationEngine(processor)
        gappacg = GappaCodeGenerator(processor, declare_cst = True, disable_debug = True)

        # declaring CodeFunction and retrieving input variable
        self.function_name = function_name
        exp_implementation = CodeFunction(self.function_name, output_format = self.precision)
        vx = exp_implementation.add_input_variable("x", self.precision) 


        Log.report(Log.Info, "\033[33;1m generating implementation scheme \033[0m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)


        test_nan_or_inf = Test(vx, specifier = Test.IsInfOrNaN, likely = False, debug = True, tag = "nan_or_inf")
        test_nan = Test(vx, specifier = Test.IsNaN, debug = True, tag = "is_nan_test")
        test_positive = Comparison(vx, 0, specifier = Comparison.GreaterOrEqual, debug = True, tag = "inf_sign")

        test_signaling_nan = Test(vx, specifier = Test.IsSignalingNaN, debug = True, tag = "is_signaling_nan")
        return_snan = Statement(ExpRaiseReturn(ML_FPE_Invalid, return_value = FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(ConditionBlock(test_positive, Return(FP_PlusInfty(self.precision)), Return(FP_PlusZero(self.precision))))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(test_nan, ConditionBlock(test_signaling_nan, return_snan, Return(FP_QNaN(self.precision))), infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax      = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax 
        exp_overflow_bound  = ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx, exp_overflow_bound, likely = False, specifier = Comparison.Greater)
        early_overflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2 ** precision_emin
        exp_underflow_bound = floor(log(precision_min_value))


        early_underflow_test = Comparison(vx, exp_underflow_bound, likely = False, specifier = Comparison.Less)
        early_underflow_return = Statement(ClearException(), ExpRaiseReturn(ML_FPE_Inexact, ML_FPE_Underflow, return_value = FP_PlusZero(self.precision)))


        sollya_prec_map = {ML_Binary32: sollya.binary32, ML_Binary64: sollya.binary64}


        # constant computation
        invlog2 = round(1/log(2), sollya_prec_map[self.precision], RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)), ceil(sup(interval_fk)))


        log2_hi_precision = self.precision.get_field_size() - (ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: "), log2_hi_precision
        invlog2_cst = Constant(invlog2, precision = self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN) 
        log2_lo = round(log(2) - log2_hi, sollya_prec_map[self.precision], sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag = "unround_k", debug = ML_Debug(display_format = "%f"))
        k = NearestInteger(unround_k, precision = self.precision, debug = ML_Debug(display_format = "%f"))
        ik = NearestInteger(unround_k, precision = ML_Int32, debug = ML_Debug(display_format = "%d"), tag = "ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact= True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact = True)
        r =  exact_hi_part - k * log2_lo
        r.set_tag("r")
        r.set_attributes(debug = ML_Debug(display_format = "%f"))

        opt_r = opt_eng.optimization_process(r, self.precision, copy = True, fuse_fma = fuse_fma)

        tag_map = {}
        opt_eng.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx: Variable("x", precision = self.precision, interval = interval_vx),
            tag_map["k"]: Variable("k", interval = interval_k, precision = self.precision)
        }
        #try:
        if 1:
            #eval_error = gappacg.get_eval_error(opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            eval_error = gappacg.get_eval_error_v2(opt_eng, opt_r, cg_eval_error_copy_map, gappa_filename = "red_arg.g")
            Log.report(Log.Info, "eval error: %s" % eval_error)
        #except:
        #    Log.report(Log.Info, "gappa error evaluation failed")
        print r.get_str(depth = None, display_precision = True, display_attribute = True)
        print opt_r.get_str(depth = None, display_precision = True, display_attribute = True)

        approx_interval = Interval(-log(2)/2, log(2)/2)

        local_ulp = sup(ulp(exp(approx_interval), self.precision))
        print "ulp: ", local_ulp 
        error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info, "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = sup(guessdegree(exp(x), approx_interval, error_goal_approx)) #- 1
        init_poly_degree = poly_degree

        return


        while 1: 
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(exp(x), poly_degree, [self.precision]*(poly_degree+1), approx_interval, absolute)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(Log.Info, "\033[33;1m generating polynomial evaluation scheme \033[0m")
            poly = PolynomialSchemeEvaluator.generate_horner_scheme(poly_object, r, unified_precision = self.precision)
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            opt_poly = opt_eng.optimization_process(poly, self.precision)

            #print "poly: ", poly.get_str(depth = None, display_precision = True)
            #print "opt_poly: ", opt_poly.get_str(depth = None, display_precision = True)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r", precision = self.precision, interval = approx_interval)
            poly_error_copy_map = {
                r.get_handle().get_node(): r_gappa_var
            }
            gappacg = GappaCodeGenerator(target, declare_cst = False, disable_debug = True)
            poly_eval_error = gappacg.get_eval_error_v2(opt_eng, poly.get_handle().get_node(), poly_error_copy_map, gappa_filename = "gappa_poly.g")
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)

            global_poly_error = poly_eval_error + poly_approx_error
            global_rel_poly_error = global_poly_error / exp(approx_interval)
            print "global_poly_error: ", global_poly_error, global_rel_poly_error 
            flag = local_ulp > sup(abs(global_rel_poly_error))
            print "test: ", flag
            if flag: break
            else:
                if poly_degree > init_poly_degree + 5:
                    Log.report(Log.Error, "poly degree search did not converge")
                poly_degree += 1



        late_overflow_test = Comparison(ik, self.precision.get_emax(), specifier = Comparison.Greater, likely = False, debug = True, tag = "late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() - self.precision.get_field_size() / 2)
        diff_k = ik - overflow_exp_offset 
        diff_k.set_attributes(debug = ML_Debug(display_format = "%d"), tag = "diff_k")
        late_overflow_result = (ExponentInsertion(diff_k) * poly) * ExponentInsertion(overflow_exp_offset)
        late_overflow_result.set_attributes(silent = False, tag = "late_overflow_result", debug = debugf)
        late_overflow_return = ConditionBlock(Test(late_overflow_result, specifier = Test.IsInfty, likely = False), ExpRaiseReturn(ML_FPE_Overflow, return_value = FP_PlusInfty(self.precision)), Return(late_overflow_result))

        late_underflow_test = Comparison(k, self.precision.get_emin_normal(), specifier = Comparison.LessOrEqual, likely = False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        late_underflow_result = (ExponentInsertion(ik + underflow_exp_offset) * poly) * ExponentInsertion(-underflow_exp_offset)
        late_underflow_result.set_attributes(debug = ML_Debug(display_format = "%e"), tag = "late_underflow_result", silent = False)
        test_subnormal = Test(late_underflow_result, specifier = Test.IsSubnormal)
        late_underflow_return = Statement(ConditionBlock(test_subnormal, ExpRaiseReturn(ML_FPE_Underflow, return_value = late_underflow_result)), Return(late_underflow_result))

        std_result = poly * ExponentInsertion(ik, tag = "exp_ik", debug = debug_lftolx)
        std_result.set_attributes(tag = "std_result", debug = debug_lftolx)
        result_scheme = ConditionBlock(late_overflow_test, late_overflow_return, ConditionBlock(late_underflow_test, late_underflow_return, Return(std_result)))
        std_return = ConditionBlock(early_overflow_test, early_overflow_return, ConditionBlock(early_underflow_test, early_underflow_return, result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(test_nan_or_inf, Statement(ClearException(), specific_return), std_return)

        #print scheme.get_str(depth = None, display_precision = True)

        # fusing FMA
        if fuse_fma: 
            Log.report(Log.Info, "\033[33;1m MDL fusing FMA \033[0m")
            scheme = opt_eng.fuse_multiply_add(scheme, silence = True)

        Log.report(Log.Info, "\033[33;1m MDL abstract scheme \033[0m")
        opt_eng.instantiate_abstract_precision(scheme, None)

        Log.report(Log.Info, "\033[33;1m MDL instantiated scheme \033[0m")
        opt_eng.instantiate_precision(scheme, default_precision = self.precision)


        Log.report(Log.Info, "\033[33;1m subexpression sharing \033[0m")
        opt_eng.subexpression_sharing(scheme)

        Log.report(Log.Info, "\033[33;1m silencing operation \033[0m")
        opt_eng.silence_fp_operations(scheme)

        # registering scheme as function implementation
        exp_implementation.set_scheme(scheme)

        # check processor support
        Log.report(Log.Info, "\033[33;1m checking processor support \033[0m")
        opt_eng.check_processor_support(scheme)

        # factorizing fast path
        if fast_path_extract:
            Log.report(Log.Info, "\033[33;1m factorizing fast path\033[0m")
            opt_eng.factorize_fast_path(scheme)
        
        Log.report(Log.Info, "\033[33;1m generating source code \033[0m")
        cg = CCodeGenerator(processor, declare_cst = False, disable_debug = not debug_flag, libm_compliant = libm_compliant)
        self.result = exp_implementation.get_definition(cg, C_Code, static_cst = True)
        #self.result.add_header("support_lib/ml_types.h")
        self.result.add_header("support_lib/ml_special_values.h")
        self.result.add_header_comment("polynomial degree for exp(x): %d" % poly_degree)
        self.result.add_header_comment("sollya polynomial for exp(x): %s" % poly_object.get_sollya_object())
        if debug_flag:
            self.result.add_header("stdio.h")
            self.result.add_header("inttypes.h")
        output_stream = open(output_file, "w")#"%s.c" % exp_implementation.get_name(), "w")
        output_stream.write(self.result.get(cg))
        output_stream.close()
예제 #12
0
        Log.report(Log.Error, "poly degree must be positive or null. {}, {}",
                   poly_object.degree, poly_object)


if __name__ == "__main__":
    implem_results = []
    for eps_target in [S2**-40, S2**-50, S2**-55, S2**-60, S2**-65]:
        approx_interval = Interval(-S2**-5, S2**-5)
        ctx = MLL_Context(ML_Binary64, approx_interval)
        vx = Variable("x",
                      precision=ctx.variableFormat,
                      interval=approx_interval)
        # guessding the best degree
        poly_degree = int(
            sup(
                sollya.guessdegree(sollya.exp(sollya.x), approx_interval,
                                   eps_target)))
        # asking sollya to provide the approximation
        poly_object = Polynomial.build_from_approximation(
            sollya.exp(sollya.x), poly_degree,
            [sollya.doubledouble] * (poly_degree + 1), vx.interval)
        print("poly object is {}".format(poly_object))
        poly_graph, poly_epsilon = mll_implementpoly_horner(
            ctx, poly_object, eps_target, vx)
        print("poly_graph is {}".format(
            poly_graph.get_str(depth=None, display_precision=True)))
        print("poly epsilon is {}".format(float(poly_epsilon)))
        print("poly accuracy is {}".format(
            get_accuracy_from_epsilon(poly_epsilon)))
        implem_results.append(
            (eps_target, poly_degree, poly_object, poly_graph, poly_epsilon))
예제 #13
0
                ctor(class): constructor of the meta-function
                arg_map_list: list of dictionnaries

        """
        self.ctor = ctor
        self.arg_map_list = arg_map_list
        self.title = title if not title is None else ctor.function_name
        # callback(<option dict>) -> specialized <option dict>
        self.specific_opts_builder = specific_opts_builder


    @property
    def tag(self):
        return self.ctor.function_name

GEN_LOG_ARGS = {"basis": sollya.exp(1), "function_name": "ml_genlog", "extra_passes" : ["beforecodegen:fuse_fma"]}
GEN_LOG2_ARGS =  {"basis": 2, "function_name": "ml_genlog2", "extra_passes" : ["beforecodegen:fuse_fma"]}
GEN_LOG10_ARGS =  {"basis": 10, "function_name": "ml_genlog10", "extra_passes" : ["beforecodegen:fuse_fma"]}

class LibmFunctionTest(FunctionTest):
    @property
    def tag(self):
        # NOTES/FIXME: 0-th element of self.arg_map_list is chosen
        # for tag determination without considering the others
        return self.title + "_" + self.arg_map_list[0]["bench_function_name"]

S2 = sollya.SollyaObject(2)
S10 = sollya.SollyaObject(10)
def emulate_exp2(v):
    return S2**v
def emulate_exp10(v):
예제 #14
0
        def generate_reduction_fptaylor(x):

            # get k, must be the same at endpoints
            unround_k = x * n_invlog2
            k_low = sollya.floor(sollya.inf(unround_k))
            k_high = sollya.floor(sollya.sup(unround_k))
            if not (k_low == k_high or (k_low == -1 and sollya.sup(x) == 0)):
                assert False, "Interval must not straddle multples of log(2)"
            k = int(k_low)
            r = x - k * n_log2

            twok = 2**k

            x_low = sollya.inf(x)
            x_high = sollya.sup(x)
            query = "\n".join([
                "Variables", "  real x in [{},{}];".format(x_low, x_high),
                "Definitions", "  whole rnd64= {} * {};".format(k, n_log2),
                "  r rnd64= x - whole;", "  poly rnd64= {};".format(poly_expr),
                "  retval rnd64= poly*{};".format(twok), "Expressions",
                "  retval;"
            ])

            rnd_rel_err = None
            rnd_abs_err = None
            try:
                res = fptaylor.Result(query, {
                    **config, "--rel-error": "true",
                    "--abs-error": "true"
                })
                rnd_rel_err = float(
                    res.result["relative_errors"]["final_total"]["value"])
                rnd_abs_err = float(
                    res.result["absolute_errors"]["final_total"]["value"])
            except AssertionError:
                pass
            except KeyError:
                try:
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except KeyError:
                    pass

            if rnd_abs_err is None:
                try:
                    res = fptaylor.Result(query, {
                        **config, "--rel-error": "false",
                        "--abs-error": "true"
                    })
                    rnd_abs_err = float(
                        res.result["absolute_errors"]["final_total"]["value"])
                except AssertionError:
                    pass

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.relative,
                                     2**-100)
            algo_rel_err = sollya.sup(err_int)

            err_int = sollya.supnorm(self.poly_object.get_sollya_object(),
                                     sollya.exp(sollya.x), r, sollya.absolute,
                                     2**-100)
            algo_abs_err = sollya.sup(err_int)

            if rnd_rel_err is None or str(algo_rel_err) == "error":
                rel_err = float("inf")
            else:
                rel_err = rnd_rel_err + algo_rel_err

            abs_err = rnd_abs_err + algo_abs_err

            return rel_err, abs_err
예제 #15
0
 def numeric_emulate(self, input_value):
     """ Numeric emaluation of exponential """
     return sollya.exp(input_value)
예제 #16
0
 def numeric_emulate(self, input_value):
     return exp(input_value)
예제 #17
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine
        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            if self.libm_compliant:
                return RaiseReturn(*args, precision=self.precision, **kwords)
            else:
                return Return(kwords["return_value"], precision=self.precision)

        test_nan_or_inf = Test(vx,
                               specifier=Test.IsInfOrNaN,
                               likely=False,
                               debug=debug_multi,
                               tag="nan_or_inf")
        test_nan = Test(vx,
                        specifier=Test.IsNaN,
                        debug=debug_multi,
                        tag="is_nan_test")
        test_positive = Comparison(vx,
                                   0,
                                   specifier=Comparison.GreaterOrEqual,
                                   debug=debug_multi,
                                   tag="inf_sign")

        test_signaling_nan = Test(vx,
                                  specifier=Test.IsSignalingNaN,
                                  debug=debug_multi,
                                  tag="is_signaling_nan")
        return_snan = Statement(
            ExpRaiseReturn(ML_FPE_Invalid,
                           return_value=FP_QNaN(self.precision)))

        # return in case of infinity input
        infty_return = Statement(
            ConditionBlock(
                test_positive,
                Return(FP_PlusInfty(self.precision), precision=self.precision),
                Return(FP_PlusZero(self.precision), precision=self.precision)))
        # return in case of specific value input (NaN or inf)
        specific_return = ConditionBlock(
            test_nan,
            ConditionBlock(
                test_signaling_nan, return_snan,
                Return(FP_QNaN(self.precision), precision=self.precision)),
            infty_return)
        # return in case of standard (non-special) input

        # exclusion of early overflow and underflow cases
        precision_emax = self.precision.get_emax()
        precision_max_value = S2 * S2**precision_emax
        exp_overflow_bound = sollya.ceil(log(precision_max_value))
        early_overflow_test = Comparison(vx,
                                         exp_overflow_bound,
                                         likely=False,
                                         specifier=Comparison.Greater)
        early_overflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)))

        precision_emin = self.precision.get_emin_subnormal()
        precision_min_value = S2**precision_emin
        exp_underflow_bound = floor(log(precision_min_value))

        early_underflow_test = Comparison(vx,
                                          exp_underflow_bound,
                                          likely=False,
                                          specifier=Comparison.Less)
        early_underflow_return = Statement(
            ClearException() if self.libm_compliant else Statement(),
            ExpRaiseReturn(ML_FPE_Inexact,
                           ML_FPE_Underflow,
                           return_value=FP_PlusZero(self.precision)))

        # constant computation
        invlog2 = self.precision.round_sollya_object(1 / log(2), sollya.RN)

        interval_vx = Interval(exp_underflow_bound, exp_overflow_bound)
        interval_fk = interval_vx * invlog2
        interval_k = Interval(floor(inf(interval_fk)),
                              sollya.ceil(sup(interval_fk)))

        log2_hi_precision = self.precision.get_field_size() - (
            sollya.ceil(log2(sup(abs(interval_k)))) + 2)
        Log.report(Log.Info, "log2_hi_precision: %d" % log2_hi_precision)
        invlog2_cst = Constant(invlog2, precision=self.precision)
        log2_hi = round(log(2), log2_hi_precision, sollya.RN)
        log2_lo = self.precision.round_sollya_object(
            log(2) - log2_hi, sollya.RN)

        # argument reduction
        unround_k = vx * invlog2
        unround_k.set_attributes(tag="unround_k", debug=debug_multi)
        k = NearestInteger(unround_k,
                           precision=self.precision,
                           debug=debug_multi)
        ik = NearestInteger(unround_k,
                            precision=self.precision.get_integer_format(),
                            debug=debug_multi,
                            tag="ik")
        ik.set_tag("ik")
        k.set_tag("k")
        exact_pre_mul = (k * log2_hi)
        exact_pre_mul.set_attributes(exact=True)
        exact_hi_part = vx - exact_pre_mul
        exact_hi_part.set_attributes(exact=True,
                                     tag="exact_hi",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        exact_lo_part = -k * log2_lo
        exact_lo_part.set_attributes(tag="exact_lo",
                                     debug=debug_multi,
                                     prevent_optimization=True)
        r = exact_hi_part + exact_lo_part
        r.set_tag("r")
        r.set_attributes(debug=debug_multi)

        approx_interval = Interval(-log(2) / 2, log(2) / 2)

        approx_interval_half = approx_interval / 2
        approx_interval_split = [
            Interval(-log(2) / 2, inf(approx_interval_half)),
            approx_interval_half,
            Interval(sup(approx_interval_half),
                     log(2) / 2)
        ]

        # TODO: should be computed automatically
        exact_hi_interval = approx_interval
        exact_lo_interval = -interval_k * log2_lo

        opt_r = self.optimise_scheme(r, copy={})

        tag_map = {}
        self.opt_engine.register_nodes_by_tag(opt_r, tag_map)

        cg_eval_error_copy_map = {
            vx:
            Variable("x", precision=self.precision, interval=interval_vx),
            tag_map["k"]:
            Variable("k", interval=interval_k, precision=self.precision)
        }

        #try:
        if is_gappa_installed():
            eval_error = self.gappa_engine.get_eval_error_v2(
                self.opt_engine,
                opt_r,
                cg_eval_error_copy_map,
                gappa_filename="red_arg.g")
        else:
            eval_error = 0.0
            Log.report(Log.Warning,
                       "gappa is not installed in this environnement")
        Log.report(Log.Info, "eval error: %s" % eval_error)

        local_ulp = sup(ulp(sollya.exp(approx_interval), self.precision))
        # FIXME refactor error_goal from accuracy
        Log.report(Log.Info, "accuracy: %s" % self.accuracy)
        if isinstance(self.accuracy, ML_Faithful):
            error_goal = local_ulp
        elif isinstance(self.accuracy, ML_CorrectlyRounded):
            error_goal = S2**-1 * local_ulp
        elif isinstance(self.accuracy, ML_DegradedAccuracyAbsolute):
            error_goal = self.accuracy.goal
        elif isinstance(self.accuracy, ML_DegradedAccuracyRelative):
            error_goal = self.accuracy.goal
        else:
            Log.report(Log.Error, "unknown accuracy: %s" % self.accuracy)

        # error_goal = local_ulp #S2**-(self.precision.get_field_size()+1)
        error_goal_approx = S2**-1 * error_goal

        Log.report(Log.Info,
                   "\033[33;1m building mathematical polynomial \033[0m\n")
        poly_degree = max(
            sup(
                guessdegree(
                    expm1(sollya.x) / sollya.x, approx_interval,
                    error_goal_approx)) - 1, 2)
        init_poly_degree = poly_degree

        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_estrin_scheme
        #polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme

        while 1:
            Log.report(Log.Info, "attempting poly degree: %d" % poly_degree)
            precision_list = [1] + [self.precision] * (poly_degree)
            poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
                expm1(sollya.x),
                poly_degree,
                precision_list,
                approx_interval,
                sollya.absolute,
                error_function=error_function)
            Log.report(Log.Info, "polynomial: %s " % poly_object)
            sub_poly = poly_object.sub_poly(start_index=2)
            Log.report(Log.Info, "polynomial: %s " % sub_poly)

            Log.report(Log.Info, "poly approx error: %s" % poly_approx_error)

            Log.report(
                Log.Info,
                "\033[33;1m generating polynomial evaluation scheme \033[0m")
            pre_poly = polynomial_scheme_builder(
                poly_object, r, unified_precision=self.precision)
            pre_poly.set_attributes(tag="pre_poly", debug=debug_multi)

            pre_sub_poly = polynomial_scheme_builder(
                sub_poly, r, unified_precision=self.precision)
            pre_sub_poly.set_attributes(tag="pre_sub_poly", debug=debug_multi)

            poly = 1 + (exact_hi_part + (exact_lo_part + pre_sub_poly))
            poly.set_tag("poly")

            # optimizing poly before evaluation error computation
            #opt_poly = self.opt_engine.optimization_process(poly, self.precision, fuse_fma = fuse_fma)
            #opt_sub_poly = self.opt_engine.optimization_process(pre_sub_poly, self.precision, fuse_fma = fuse_fma)
            opt_poly = self.optimise_scheme(poly)
            opt_sub_poly = self.optimise_scheme(pre_sub_poly)

            # evaluating error of the polynomial approximation
            r_gappa_var = Variable("r",
                                   precision=self.precision,
                                   interval=approx_interval)
            exact_hi_gappa_var = Variable("exact_hi",
                                          precision=self.precision,
                                          interval=exact_hi_interval)
            exact_lo_gappa_var = Variable("exact_lo",
                                          precision=self.precision,
                                          interval=exact_lo_interval)
            vx_gappa_var = Variable("x",
                                    precision=self.precision,
                                    interval=interval_vx)
            k_gappa_var = Variable("k",
                                   interval=interval_k,
                                   precision=self.precision)

            #print "exact_hi interval: ", exact_hi_interval

            sub_poly_error_copy_map = {
                #r.get_handle().get_node(): r_gappa_var,
                #vx.get_handle().get_node():  vx_gappa_var,
                exact_hi_part.get_handle().get_node():
                exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node():
                exact_lo_gappa_var,
                #k.get_handle().get_node(): k_gappa_var,
            }

            poly_error_copy_map = {
                exact_hi_part.get_handle().get_node(): exact_hi_gappa_var,
                exact_lo_part.get_handle().get_node(): exact_lo_gappa_var,
            }

            if is_gappa_installed():
                sub_poly_eval_error = -1.0
                sub_poly_eval_error = self.gappa_engine.get_eval_error_v2(
                    self.opt_engine,
                    opt_sub_poly,
                    sub_poly_error_copy_map,
                    gappa_filename="%s_gappa_sub_poly.g" % self.function_name)

                dichotomy_map = [
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[0],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[1],
                    },
                    {
                        exact_hi_part.get_handle().get_node():
                        approx_interval_split[2],
                    },
                ]
                poly_eval_error_dico = self.gappa_engine.get_eval_error_v3(
                    self.opt_engine,
                    opt_poly,
                    poly_error_copy_map,
                    gappa_filename="gappa_poly.g",
                    dichotomy=dichotomy_map)

                poly_eval_error = max(
                    [sup(abs(err)) for err in poly_eval_error_dico])
            else:
                poly_eval_error = 0.0
                sub_poly_eval_error = 0.0
                Log.report(Log.Warning,
                           "gappa is not installed in this environnement")
                Log.report(Log.Info, "stopping autonomous degree research")
                # incrementing polynomial degree to counteract initial decrementation effect
                poly_degree += 1
                break
            Log.report(Log.Info, "poly evaluation error: %s" % poly_eval_error)
            Log.report(Log.Info,
                       "sub poly evaluation error: %s" % sub_poly_eval_error)

            global_poly_error = None
            global_rel_poly_error = None

            for case_index in range(3):
                poly_error = poly_approx_error + poly_eval_error_dico[
                    case_index]
                rel_poly_error = sup(
                    abs(poly_error /
                        sollya.exp(approx_interval_split[case_index])))
                if global_rel_poly_error == None or rel_poly_error > global_rel_poly_error:
                    global_rel_poly_error = rel_poly_error
                    global_poly_error = poly_error
            flag = error_goal > global_rel_poly_error

            if flag:
                break
            else:
                poly_degree += 1

        late_overflow_test = Comparison(ik,
                                        self.precision.get_emax(),
                                        specifier=Comparison.Greater,
                                        likely=False,
                                        debug=debug_multi,
                                        tag="late_overflow_test")
        overflow_exp_offset = (self.precision.get_emax() -
                               self.precision.get_field_size() / 2)
        diff_k = Subtraction(
            ik,
            Constant(overflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            debug=debug_multi,
            tag="diff_k",
        )
        late_overflow_result = (ExponentInsertion(
            diff_k, precision=self.precision) * poly) * ExponentInsertion(
                overflow_exp_offset, precision=self.precision)
        late_overflow_result.set_attributes(silent=False,
                                            tag="late_overflow_result",
                                            debug=debug_multi,
                                            precision=self.precision)
        late_overflow_return = ConditionBlock(
            Test(late_overflow_result, specifier=Test.IsInfty, likely=False),
            ExpRaiseReturn(ML_FPE_Overflow,
                           return_value=FP_PlusInfty(self.precision)),
            Return(late_overflow_result, precision=self.precision))

        late_underflow_test = Comparison(k,
                                         self.precision.get_emin_normal(),
                                         specifier=Comparison.LessOrEqual,
                                         likely=False)
        underflow_exp_offset = 2 * self.precision.get_field_size()
        corrected_exp = Addition(
            ik,
            Constant(underflow_exp_offset,
                     precision=self.precision.get_integer_format()),
            precision=self.precision.get_integer_format(),
            tag="corrected_exp")
        late_underflow_result = (
            ExponentInsertion(corrected_exp, precision=self.precision) *
            poly) * ExponentInsertion(-underflow_exp_offset,
                                      precision=self.precision)
        late_underflow_result.set_attributes(debug=debug_multi,
                                             tag="late_underflow_result",
                                             silent=False)
        test_subnormal = Test(late_underflow_result,
                              specifier=Test.IsSubnormal)
        late_underflow_return = Statement(
            ConditionBlock(
                test_subnormal,
                ExpRaiseReturn(ML_FPE_Underflow,
                               return_value=late_underflow_result)),
            Return(late_underflow_result, precision=self.precision))

        twok = ExponentInsertion(ik,
                                 tag="exp_ik",
                                 debug=debug_multi,
                                 precision=self.precision)
        #std_result = twok * ((1 + exact_hi_part * pre_poly) + exact_lo_part * pre_poly)
        std_result = twok * poly
        std_result.set_attributes(tag="std_result", debug=debug_multi)
        result_scheme = ConditionBlock(
            late_overflow_test, late_overflow_return,
            ConditionBlock(late_underflow_test, late_underflow_return,
                           Return(std_result, precision=self.precision)))
        std_return = ConditionBlock(
            early_overflow_test, early_overflow_return,
            ConditionBlock(early_underflow_test, early_underflow_return,
                           result_scheme))

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = ConditionBlock(
            test_nan_or_inf,
            Statement(ClearException() if self.libm_compliant else Statement(),
                      specific_return), std_return)

        return scheme
예제 #18
0
    def generate_scheme(self):
        # declaring target and instantiating optimization engine

        vx = self.implementation.add_input_variable("x", self.precision)

        Log.set_dump_stdout(True)

        Log.report(Log.Info,
                   "\033[33;1m generating implementation scheme \033[0m")
        if self.debug_flag:
            Log.report(Log.Info, "\033[31;1m debug has been enabled \033[0;m")

        # local overloading of RaiseReturn operation
        def ExpRaiseReturn(*args, **kwords):
            kwords["arg_value"] = vx
            kwords["function_name"] = self.function_name
            return RaiseReturn(*args, **kwords)

        index_size = 3

        vx = Abs(vx)
        int_precision = {
            ML_Binary32: ML_Int32,
            ML_Binary64: ML_Int64
        }[self.precision]

        # argument reduction
        arg_reg_value = log(2) / 2**index_size
        inv_log2_value = round(1 / arg_reg_value,
                               self.precision.get_sollya_object(), RN)
        inv_log2_cst = Constant(inv_log2_value,
                                precision=self.precision,
                                tag="inv_log2")

        # for r_hi to be accurate we ensure k * log2_hi_value_cst is exact
        # by limiting the number of non-zero bits in log2_hi_value_cst
        # cosh(x) ~ exp(abs(x))/2  for a big enough x
        # cosh(x) > 2^1023 <=> exp(x) > 2^1024 <=> x > log(2^21024)
        # k = inv_log2_value * x
        # -1 for guard
        max_k_approx = inv_log2_value * log(sollya.SollyaObject(2)**1024)
        max_k_bitsize = int(ceil(log2(max_k_approx)))
        Log.report(Log.Info, "max_k_bitsize: %d" % max_k_bitsize)
        log2_hi_value_precision = self.precision.get_precision(
        ) - max_k_bitsize - 1

        log2_hi_value = round(arg_reg_value, log2_hi_value_precision, RN)
        log2_lo_value = round(arg_reg_value - log2_hi_value,
                              self.precision.get_sollya_object(), RN)
        log2_hi_value_cst = Constant(log2_hi_value,
                                     tag="log2_hi_value",
                                     precision=self.precision)
        log2_lo_value_cst = Constant(log2_lo_value,
                                     tag="log2_lo_value",
                                     precision=self.precision)

        k = Trunc(Multiplication(inv_log2_cst, vx), precision=self.precision)
        k_log2 = Multiplication(k,
                                log2_hi_value_cst,
                                precision=self.precision,
                                exact=True,
                                tag="k_log2",
                                unbreakable=True)
        r_hi = vx - k_log2
        r_hi.set_attributes(tag="r_hi", debug=debug_multi, unbreakable=True)
        r_lo = -k * log2_lo_value_cst
        # reduced argument
        r = r_hi + r_lo
        r.set_attributes(tag="r", debug=debug_multi)

        r_eval_error = self.get_eval_error(
            r_hi,
            variable_copy_map={
                vx:
                Variable("vx",
                         interval=Interval(0, 715),
                         precision=self.precision),
                k:
                Variable("k",
                         interval=Interval(0, 1024),
                         precision=int_precision)
            })
        print "r_eval_error: ", r_eval_error

        approx_interval = Interval(-arg_reg_value, arg_reg_value)
        error_goal_approx = 2**-(self.precision.get_precision())

        poly_degree = sup(
            guessdegree(exp(sollya.x), approx_interval, error_goal_approx))
        precision_list = [1] + [self.precision] * (poly_degree)

        k_integer = Conversion(k,
                               precision=int_precision,
                               tag="k_integer",
                               debug=debug_multi)
        k_hi = BitLogicRightShift(k_integer,
                                  Constant(index_size),
                                  tag="k_int_hi",
                                  precision=int_precision,
                                  debug=debug_multi)
        k_lo = Modulo(k_integer,
                      2**index_size,
                      tag="k_int_lo",
                      precision=int_precision,
                      debug=debug_multi)
        pow_exp = ExponentInsertion(Conversion(k_hi, precision=int_precision),
                                    precision=self.precision,
                                    tag="pow_exp",
                                    debug=debug_multi)

        exp_table = ML_Table(dimensions=[2 * 2**index_size, 4],
                             storage_precision=self.precision,
                             tag=self.uniquify_name("exp2_table"))
        for i in range(2 * 2**index_size):
            input_value = i - 2**index_size if i >= 2**index_size else i
            # using SollyaObject wrapper to force evaluation by sollya
            # with higher precision
            exp_value = sollya.SollyaObject(2)**((input_value) *
                                                 2**-index_size)
            mexp_value = sollya.SollyaObject(2)**((-input_value) *
                                                  2**-index_size)
            pos_value_hi = round(exp_value, self.precision.get_sollya_object(),
                                 RN)
            pos_value_lo = round(exp_value - pos_value_hi,
                                 self.precision.get_sollya_object(), RN)
            neg_value_hi = round(mexp_value,
                                 self.precision.get_sollya_object(), RN)
            neg_value_lo = round(mexp_value - neg_value_hi,
                                 self.precision.get_sollya_object(), RN)
            exp_table[i][0] = neg_value_hi
            exp_table[i][1] = neg_value_lo
            exp_table[i][2] = pos_value_hi
            exp_table[i][3] = pos_value_lo

        # log2_value = log(2) / 2^index_size
        # cosh(x) = 1/2 * (exp(x) + exp(-x))
        # exp(x) = exp(x - k * log2_value + k * log2_value
        #
        # r = x - k * log2_value
        # exp(x) = exp(r) * 2 ^ (k / 2^index_size)
        #
        # k / 2^index_size = h + l * 2^-index_size
        # exp(x) = exp(r) * 2^h * 2^(l *2^-index_size)
        #
        # cosh(x) = exp(r) * 2^(h-1) 2^(l *2^-index_size) + exp(-r) * 2^(-h-1) * 2^(-l *2^-index_size)
        #
        error_function = lambda p, f, ai, mod, t: dirtyinfnorm(f - p, ai)

        poly_object, poly_approx_error = Polynomial.build_from_approximation_with_error(
            exp(sollya.x),
            poly_degree,
            precision_list,
            approx_interval,
            sollya.absolute,
            error_function=error_function)

        print "poly_approx_error: ", poly_approx_error, float(
            log2(poly_approx_error))

        polynomial_scheme_builder = PolynomialSchemeEvaluator.generate_horner_scheme
        poly_pos = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            r,
            unified_precision=self.precision)
        poly_pos.set_attributes(tag="poly_pos", debug=debug_multi)

        poly_neg = polynomial_scheme_builder(
            poly_object.sub_poly(start_index=1),
            -r,
            unified_precision=self.precision)
        poly_neg.set_attributes(tag="poly_neg", debug=debug_multi)

        table_index = Addition(k_lo,
                               Constant(2**index_size,
                                        precision=int_precision),
                               precision=int_precision,
                               tag="table_index",
                               debug=debug_multi)

        neg_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      0,
                                      tag="neg_value_load_hi",
                                      debug=debug_multi)
        neg_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      1,
                                      tag="neg_value_load_lo",
                                      debug=debug_multi)
        pos_value_load_hi = TableLoad(exp_table,
                                      table_index,
                                      2,
                                      tag="pos_value_load_hi",
                                      debug=debug_multi)
        pos_value_load_lo = TableLoad(exp_table,
                                      table_index,
                                      3,
                                      tag="pos_value_load_lo",
                                      debug=debug_multi)

        k_plus = Max(
            Subtraction(k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_plus",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))
        k_neg = Max(
            Subtraction(-k_hi,
                        Constant(1, precision=int_precision),
                        precision=int_precision,
                        tag="k_neg",
                        debug=debug_multi),
            Constant(self.precision.get_emin_normal(),
                     precision=int_precision))

        pow_exp_pos = ExponentInsertion(k_plus, precision=self.precision)
        pow_exp_neg = ExponentInsertion(k_neg, precision=self.precision)

        pos_exp = (
            pos_value_load_hi +
            (pos_value_load_hi * poly_pos +
             (pos_value_load_lo + pos_value_load_lo * poly_pos))) * pow_exp_pos
        pos_exp.set_attributes(tag="pos_exp", debug=debug_multi)

        neg_exp = (
            neg_value_load_hi +
            (neg_value_load_hi * poly_neg +
             (neg_value_load_lo + neg_value_load_lo * poly_neg))) * pow_exp_neg
        neg_exp.set_attributes(tag="neg_exp", debug=debug_multi)

        result = Addition(pos_exp,
                          neg_exp,
                          precision=self.precision,
                          tag="result",
                          debug=debug_multi)

        # ov_value
        ov_value = round(acosh(self.precision.get_max_value()),
                         self.precision.get_sollya_object(), RD)
        ov_flag = Comparison(Abs(vx),
                             Constant(ov_value, precision=self.precision),
                             specifier=Comparison.Greater)

        # main scheme
        Log.report(Log.Info, "\033[33;1m MDL scheme \033[0m")
        scheme = Statement(
            Return(Select(ov_flag, FP_PlusInfty(self.precision), result)))

        return scheme